File indexing completed on 2025-08-28 08:26:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cassert>
0021 #include <vector>
0022
0023 #include "arrow/acero/options.h"
0024 #include "arrow/acero/schema_util.h"
0025 #include "arrow/result.h"
0026 #include "arrow/status.h"
0027
0028 namespace arrow {
0029
0030 using compute::ExecContext;
0031
0032 namespace acero {
0033
0034 class ARROW_ACERO_EXPORT HashJoinSchema {
0035 public:
0036 Status Init(JoinType join_type, const Schema& left_schema,
0037 const std::vector<FieldRef>& left_keys, const Schema& right_schema,
0038 const std::vector<FieldRef>& right_keys, const Expression& filter,
0039 const std::string& left_field_name_prefix,
0040 const std::string& right_field_name_prefix);
0041
0042 Status Init(JoinType join_type, const Schema& left_schema,
0043 const std::vector<FieldRef>& left_keys,
0044 const std::vector<FieldRef>& left_output, const Schema& right_schema,
0045 const std::vector<FieldRef>& right_keys,
0046 const std::vector<FieldRef>& right_output, const Expression& filter,
0047 const std::string& left_field_name_prefix,
0048 const std::string& right_field_name_prefix);
0049
0050 static Status ValidateSchemas(JoinType join_type, const Schema& left_schema,
0051 const std::vector<FieldRef>& left_keys,
0052 const std::vector<FieldRef>& left_output,
0053 const Schema& right_schema,
0054 const std::vector<FieldRef>& right_keys,
0055 const std::vector<FieldRef>& right_output,
0056 const std::string& left_field_name_prefix,
0057 const std::string& right_field_name_prefix);
0058
0059 bool HasDictionaries() const;
0060
0061 bool HasLargeBinary() const;
0062
0063 Result<Expression> BindFilter(Expression filter, const Schema& left_schema,
0064 const Schema& right_schema, ExecContext* exec_context);
0065 std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
0066 const std::string& right_field_name_suffix);
0067
0068 bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
0069
0070 bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
0071
0072 static int kMissingField() {
0073 return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
0074 }
0075
0076 SchemaProjectionMaps<HashJoinProjection> proj_maps[2];
0077
0078 private:
0079 static bool IsTypeSupported(const DataType& type);
0080
0081 Status CollectFilterColumns(std::vector<FieldRef>& left_filter,
0082 std::vector<FieldRef>& right_filter,
0083 const Expression& filter, const Schema& left_schema,
0084 const Schema& right_schema);
0085
0086 Expression RewriteFilterToUseFilterSchema(int right_filter_offset,
0087 const SchemaProjectionMap& left_to_filter,
0088 const SchemaProjectionMap& right_to_filter,
0089 const Expression& filter);
0090
0091 bool PayloadIsEmpty(int side) const {
0092 assert(side == 0 || side == 1);
0093 return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
0094 }
0095
0096 static Result<std::vector<FieldRef>> ComputePayload(const Schema& schema,
0097 const std::vector<FieldRef>& output,
0098 const std::vector<FieldRef>& filter,
0099 const std::vector<FieldRef>& key);
0100 };
0101
0102 }
0103 }