Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:53

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cassert>
0021 #include <vector>
0022 
0023 #include "arrow/acero/options.h"
0024 #include "arrow/acero/schema_util.h"
0025 #include "arrow/result.h"
0026 #include "arrow/status.h"
0027 
0028 namespace arrow {
0029 
0030 using compute::ExecContext;
0031 
0032 namespace acero {
0033 
0034 class ARROW_ACERO_EXPORT HashJoinSchema {
0035  public:
0036   Status Init(JoinType join_type, const Schema& left_schema,
0037               const std::vector<FieldRef>& left_keys, const Schema& right_schema,
0038               const std::vector<FieldRef>& right_keys, const Expression& filter,
0039               const std::string& left_field_name_prefix,
0040               const std::string& right_field_name_prefix);
0041 
0042   Status Init(JoinType join_type, const Schema& left_schema,
0043               const std::vector<FieldRef>& left_keys,
0044               const std::vector<FieldRef>& left_output, const Schema& right_schema,
0045               const std::vector<FieldRef>& right_keys,
0046               const std::vector<FieldRef>& right_output, const Expression& filter,
0047               const std::string& left_field_name_prefix,
0048               const std::string& right_field_name_prefix);
0049 
0050   static Status ValidateSchemas(JoinType join_type, const Schema& left_schema,
0051                                 const std::vector<FieldRef>& left_keys,
0052                                 const std::vector<FieldRef>& left_output,
0053                                 const Schema& right_schema,
0054                                 const std::vector<FieldRef>& right_keys,
0055                                 const std::vector<FieldRef>& right_output,
0056                                 const std::string& left_field_name_prefix,
0057                                 const std::string& right_field_name_prefix);
0058 
0059   bool HasDictionaries() const;
0060 
0061   bool HasLargeBinary() const;
0062 
0063   Result<Expression> BindFilter(Expression filter, const Schema& left_schema,
0064                                 const Schema& right_schema, ExecContext* exec_context);
0065   std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
0066                                            const std::string& right_field_name_suffix);
0067 
0068   bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
0069 
0070   bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
0071 
0072   static int kMissingField() {
0073     return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
0074   }
0075 
0076   SchemaProjectionMaps<HashJoinProjection> proj_maps[2];
0077 
0078  private:
0079   static bool IsTypeSupported(const DataType& type);
0080 
0081   Status CollectFilterColumns(std::vector<FieldRef>& left_filter,
0082                               std::vector<FieldRef>& right_filter,
0083                               const Expression& filter, const Schema& left_schema,
0084                               const Schema& right_schema);
0085 
0086   Expression RewriteFilterToUseFilterSchema(int right_filter_offset,
0087                                             const SchemaProjectionMap& left_to_filter,
0088                                             const SchemaProjectionMap& right_to_filter,
0089                                             const Expression& filter);
0090 
0091   bool PayloadIsEmpty(int side) const {
0092     assert(side == 0 || side == 1);
0093     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
0094   }
0095 
0096   static Result<std::vector<FieldRef>> ComputePayload(const Schema& schema,
0097                                                       const std::vector<FieldRef>& output,
0098                                                       const std::vector<FieldRef>& filter,
0099                                                       const std::vector<FieldRef>& key);
0100 };
0101 
0102 }  // namespace acero
0103 }  // namespace arrow