Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:53

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <functional>
0021 #include <memory>
0022 #include <vector>
0023 
0024 #include "arrow/acero/accumulation_queue.h"
0025 #include "arrow/acero/bloom_filter.h"
0026 #include "arrow/acero/options.h"
0027 #include "arrow/acero/query_context.h"
0028 #include "arrow/acero/schema_util.h"
0029 #include "arrow/acero/task_util.h"
0030 #include "arrow/result.h"
0031 #include "arrow/status.h"
0032 #include "arrow/type.h"
0033 #include "arrow/util/tracing.h"
0034 
0035 namespace arrow {
0036 namespace acero {
0037 
0038 using util::AccumulationQueue;
0039 
0040 class HashJoinImpl {
0041  public:
0042   using OutputBatchCallback = std::function<Status(int64_t, ExecBatch)>;
0043   using BuildFinishedCallback = std::function<Status(size_t)>;
0044   using FinishedCallback = std::function<Status(int64_t)>;
0045   using RegisterTaskGroupCallback = std::function<int(
0046       std::function<Status(size_t, int64_t)>, std::function<Status(size_t)>)>;
0047   using StartTaskGroupCallback = std::function<Status(int, int64_t)>;
0048   using AbortContinuationImpl = std::function<void()>;
0049 
0050   virtual ~HashJoinImpl() = default;
0051   virtual Status Init(QueryContext* ctx, JoinType join_type, size_t num_threads,
0052                       const HashJoinProjectionMaps* proj_map_left,
0053                       const HashJoinProjectionMaps* proj_map_right,
0054                       std::vector<JoinKeyCmp> key_cmp, Expression filter,
0055                       RegisterTaskGroupCallback register_task_group_callback,
0056                       StartTaskGroupCallback start_task_group_callback,
0057                       OutputBatchCallback output_batch_callback,
0058                       FinishedCallback finished_callback) = 0;
0059 
0060   virtual Status BuildHashTable(size_t thread_index, AccumulationQueue batches,
0061                                 BuildFinishedCallback on_finished) = 0;
0062   virtual Status ProbeSingleBatch(size_t thread_index, ExecBatch batch) = 0;
0063   virtual Status ProbingFinished(size_t thread_index) = 0;
0064   virtual void Abort(TaskScheduler::AbortContinuationImpl pos_abort_callback) = 0;
0065   virtual std::string ToString() const = 0;
0066 
0067   static Result<std::unique_ptr<HashJoinImpl>> MakeBasic();
0068   static Result<std::unique_ptr<HashJoinImpl>> MakeSwiss();
0069 
0070  protected:
0071   arrow::util::tracing::Span span_;
0072 };
0073 
0074 }  // namespace acero
0075 }  // namespace arrow