arrow/compute/kernel.h

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017
0018 // NOTE: API is EXPERIMENTAL and will change without going through a
0019 // deprecation cycle
0020
0021 #pragma once
0022
0023 #include <cstddef>
0024 #include <cstdint>
0025 #include <functional>
0026 #include <memory>
0027 #include <string>
0028 #include <utility>
0029 #include <vector>
0030
0031 #include "arrow/buffer.h"
0032 #include "arrow/compute/exec.h"
0033 #include "arrow/datum.h"
0034 #include "arrow/device_allocation_type_set.h"
0035 #include "arrow/memory_pool.h"
0036 #include "arrow/result.h"
0037 #include "arrow/status.h"
0038 #include "arrow/type.h"
0039 #include "arrow/util/macros.h"
0040 #include "arrow/util/visibility.h"
0041
0042 // macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
0043 // No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
0044 #if defined(__APPLE__) && defined(PREALLOCATE)
0045 #  undef PREALLOCATE
0046 #endif
0047
0048 namespace arrow {
0049 namespace compute {
0050
0051 class FunctionOptions;
0052
0053 /// \brief Base class for opaque kernel-specific state. For example, if there
0054 /// is some kind of initialization required.
0055 struct ARROW_EXPORT KernelState {
0056   virtual ~KernelState() = default;
0057 };
0058
0059 /// \brief Context/state for the execution of a particular kernel.
0060 class ARROW_EXPORT KernelContext {
0061  public:
0062   // Can pass optional backreference; not used consistently for the
0063   // moment but will be made so in the future
0064   explicit KernelContext(ExecContext* exec_ctx, const Kernel* kernel = NULLPTR)
0065       : exec_ctx_(exec_ctx), kernel_(kernel) {}
0066
0067   /// \brief Allocate buffer from the context's memory pool. The contents are
0068   /// not initialized.
0069   Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);
0070
0071   /// \brief Allocate buffer for bitmap from the context's memory pool. Like
0072   /// Allocate, the contents of the buffer are not initialized but the last
0073   /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
0074   Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
0075
0076   /// \brief Assign the active KernelState to be utilized for each stage of
0077   /// kernel execution. Ownership and memory lifetime of the KernelState must
0078   /// be minded separately.
0079   void SetState(KernelState* state) { state_ = state; }
0080
0081   // Set kernel that is being invoked since some kernel
0082   // implementations will examine the kernel state.
0083   void SetKernel(const Kernel* kernel) { kernel_ = kernel; }
0084
0085   KernelState* state() { return state_; }
0086
0087   /// \brief Configuration related to function execution that is to be shared
0088   /// across multiple kernels.
0089   ExecContext* exec_context() { return exec_ctx_; }
0090
0091   /// \brief The memory pool to use for allocations. For now, it uses the
0092   /// MemoryPool contained in the ExecContext used to create the KernelContext.
0093   MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }
0094
0095   const Kernel* kernel() const { return kernel_; }
0096
0097  private:
0098   ExecContext* exec_ctx_;
0099   KernelState* state_ = NULLPTR;
0100   const Kernel* kernel_ = NULLPTR;
0101 };
0102
0103 /// \brief An type-checking interface to permit customizable validation rules
0104 /// for use with InputType and KernelSignature. This is for scenarios where the
0105 /// acceptance is not an exact type instance, such as a TIMESTAMP type for a
0106 /// specific TimeUnit, but permitting any time zone.
0107 struct ARROW_EXPORT TypeMatcher {
0108   virtual ~TypeMatcher() = default;
0109
0110   /// \brief Return true if this matcher accepts the data type.
0111   virtual bool Matches(const DataType& type) const = 0;
0112
0113   /// \brief A human-interpretable string representation of what the type
0114   /// matcher checks for, usable when printing KernelSignature or formatting
0115   /// error messages.
0116   virtual std::string ToString() const = 0;
0117
0118   /// \brief Return true if this TypeMatcher contains the same matching rule as
0119   /// the other. Currently depends on RTTI.
0120   virtual bool Equals(const TypeMatcher& other) const = 0;
0121 };
0122
0123 namespace match {
0124
0125 /// \brief Match any DataType instance having the same DataType::id.
0126 ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);
0127
0128 /// \brief Match any TimestampType instance having the same unit, but the time
0129 /// zones can be different.
0130 ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
0131 ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
0132 ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
0133 ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);
0134
0135 // \brief Match any integer type
0136 ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();
0137
0138 // Match types using 32-bit varbinary representation
0139 ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();
0140
0141 // Match types using 64-bit varbinary representation
0142 ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();
0143
0144 // Match any fixed binary type
0145 ARROW_EXPORT std::shared_ptr<TypeMatcher> FixedSizeBinaryLike();
0146
0147 // \brief Match any primitive type (boolean or any type representable as a C
0148 // Type)
0149 ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
0150
0151 // \brief Match any integer type that can be used as run-end in run-end encoded
0152 // arrays
0153 ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndInteger();
0154
0155 /// \brief Match run-end encoded types that use any valid run-end type and
0156 /// encode specific value types
0157 ///
0158 /// @param[in] value_type_matcher a matcher that is applied to the values field
0159 ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
0160     std::shared_ptr<TypeMatcher> value_type_matcher);
0161
0162 /// \brief Match run-end encoded types that use any valid run-end type and
0163 /// encode specific value types
0164 ///
0165 /// @param[in] value_type_id a type id that the type of the values field should match
0166 ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(Type::type value_type_id);
0167
0168 /// \brief Match run-end encoded types that encode specific run-end and value types
0169 ///
0170 /// @param[in] run_end_type_matcher a matcher that is applied to the run_ends field
0171 /// @param[in] value_type_matcher a matcher that is applied to the values field
0172 ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
0173     std::shared_ptr<TypeMatcher> run_end_type_matcher,
0174     std::shared_ptr<TypeMatcher> value_type_matcher);
0175
0176 }  // namespace match
0177
0178 /// \brief An object used for type-checking arguments to be passed to a kernel
0179 /// and stored in a KernelSignature. The type-checking rule can be supplied
0180 /// either with an exact DataType instance or a custom TypeMatcher.
0181 class ARROW_EXPORT InputType {
0182  public:
0183   /// \brief The kind of type-checking rule that the InputType contains.
0184   enum Kind {
0185     /// \brief Accept any value type.
0186     ANY_TYPE,
0187
0188     /// \brief A fixed arrow::DataType and will only exact match having this
0189     /// exact type (e.g. same TimestampType unit, same decimal scale and
0190     /// precision, or same nested child types).
0191     EXACT_TYPE,
0192
0193     /// \brief Uses a TypeMatcher implementation to check the type.
0194     USE_TYPE_MATCHER
0195   };
0196
0197   /// \brief Accept any value type
0198   InputType() : kind_(ANY_TYPE) {}
0199
0200   /// \brief Accept an exact value type.
0201   InputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
0202       : kind_(EXACT_TYPE), type_(std::move(type)) {}
0203
0204   /// \brief Use the passed TypeMatcher to type check.
0205   InputType(std::shared_ptr<TypeMatcher> type_matcher)  // NOLINT implicit construction
0206       : kind_(USE_TYPE_MATCHER), type_matcher_(std::move(type_matcher)) {}
0207
0208   /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
0209   /// its implementation.
0210   InputType(Type::type type_id)  // NOLINT implicit construction
0211       : InputType(match::SameTypeId(type_id)) {}
0212
0213   InputType(const InputType& other) { CopyInto(other); }
0214
0215   void operator=(const InputType& other) { CopyInto(other); }
0216
0217   InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
0218
0219   void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
0220
0221   // \brief Match any input (array, scalar of any type)
0222   static InputType Any() { return InputType(); }
0223
0224   /// \brief Return true if this input type matches the same type cases as the
0225   /// other.
0226   bool Equals(const InputType& other) const;
0227
0228   bool operator==(const InputType& other) const { return this->Equals(other); }
0229
0230   bool operator!=(const InputType& other) const { return !(*this == other); }
0231
0232   /// \brief Return hash code.
0233   size_t Hash() const;
0234
0235   /// \brief Render a human-readable string representation.
0236   std::string ToString() const;
0237
0238   /// \brief Return true if the Datum matches this argument kind in
0239   /// type (and only allows scalar or array-like Datums).
0240   bool Matches(const Datum& value) const;
0241
0242   /// \brief Return true if the type matches this InputType
0243   bool Matches(const DataType& type) const;
0244
0245   /// \brief The type matching rule that this InputType uses.
0246   Kind kind() const { return kind_; }
0247
0248   /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
0249   /// must match. Otherwise this function should not be used and will assert in
0250   /// debug builds.
0251   const std::shared_ptr<DataType>& type() const;
0252
0253   /// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
0254   /// checking the type of a value. Otherwise this function should not be used
0255   /// and will assert in debug builds.
0256   const TypeMatcher& type_matcher() const;
0257
0258  private:
0259   void CopyInto(const InputType& other) {
0260     this->kind_ = other.kind_;
0261     this->type_ = other.type_;
0262     this->type_matcher_ = other.type_matcher_;
0263   }
0264
0265   void MoveInto(InputType&& other) {
0266     this->kind_ = other.kind_;
0267     this->type_ = std::move(other.type_);
0268     this->type_matcher_ = std::move(other.type_matcher_);
0269   }
0270
0271   Kind kind_;
0272
0273   // For EXACT_TYPE Kind
0274   std::shared_ptr<DataType> type_;
0275
0276   // For USE_TYPE_MATCHER Kind
0277   std::shared_ptr<TypeMatcher> type_matcher_;
0278 };
0279
0280 /// \brief Container to capture both exact and input-dependent output types.
0281 class ARROW_EXPORT OutputType {
0282  public:
0283   /// \brief An enum indicating whether the value type is an invariant fixed
0284   /// value or one that's computed by a kernel-defined resolver function.
0285   enum ResolveKind { FIXED, COMPUTED };
0286
0287   /// Type resolution function. Given input types, return output type.  This
0288   /// function MAY may use the kernel state to decide the output type based on
0289   /// the FunctionOptions.
0290   ///
0291   /// This function SHOULD _not_ be used to check for arity, that is to be
0292   /// performed one or more layers above.
0293   using Resolver =
0294       std::function<Result<TypeHolder>(KernelContext*, const std::vector<TypeHolder>&)>;
0295
0296   /// \brief Output an exact type
0297   OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
0298       : kind_(FIXED), type_(std::move(type)) {}
0299
0300   /// \brief Output a computed type depending on actual input types
0301   template <typename Fn>
0302   OutputType(Fn resolver)  // NOLINT implicit construction
0303       : kind_(COMPUTED), resolver_(std::move(resolver)) {}
0304
0305   OutputType(const OutputType& other) {
0306     this->kind_ = other.kind_;
0307     this->type_ = other.type_;
0308     this->resolver_ = other.resolver_;
0309   }
0310
0311   OutputType(OutputType&& other) {
0312     this->kind_ = other.kind_;
0313     this->type_ = std::move(other.type_);
0314     this->resolver_ = other.resolver_;
0315   }
0316
0317   OutputType& operator=(const OutputType&) = default;
0318   OutputType& operator=(OutputType&&) = default;
0319
0320   /// \brief Return the type of the expected output value of the kernel given
0321   /// the input argument types. The resolver may make use of state information
0322   /// kept in the KernelContext.
0323   Result<TypeHolder> Resolve(KernelContext* ctx,
0324                              const std::vector<TypeHolder>& args) const;
0325
0326   /// \brief The exact output value type for the FIXED kind.
0327   const std::shared_ptr<DataType>& type() const;
0328
0329   /// \brief For use with COMPUTED resolution strategy. It may be more
0330   /// convenient to invoke this with OutputType::Resolve returned from this
0331   /// method.
0332   const Resolver& resolver() const;
0333
0334   /// \brief Render a human-readable string representation.
0335   std::string ToString() const;
0336
0337   /// \brief Return the kind of type resolution of this output type, whether
0338   /// fixed/invariant or computed by a resolver.
0339   ResolveKind kind() const { return kind_; }
0340
0341  private:
0342   ResolveKind kind_;
0343
0344   // For FIXED resolution
0345   std::shared_ptr<DataType> type_;
0346
0347   // For COMPUTED resolution
0348   Resolver resolver_ = NULLPTR;
0349 };
0350
0351 /// \brief Holds the input types and output type of the kernel.
0352 ///
0353 /// VarArgs functions with minimum N arguments should pass up to N input types to be
0354 /// used to validate the input types of a function invocation. The first N-1 types
0355 /// will be matched against the first N-1 arguments, and the last type will be
0356 /// matched against the remaining arguments.
0357 class ARROW_EXPORT KernelSignature {
0358  public:
0359   KernelSignature(std::vector<InputType> in_types, OutputType out_type,
0360                   bool is_varargs = false);
0361
0362   /// \brief Convenience ctor since make_shared can be awkward
0363   static std::shared_ptr<KernelSignature> Make(std::vector<InputType> in_types,
0364                                                OutputType out_type,
0365                                                bool is_varargs = false);
0366
0367   /// \brief Return true if the signature if compatible with the list of input
0368   /// value descriptors.
0369   bool MatchesInputs(const std::vector<TypeHolder>& types) const;
0370
0371   /// \brief Returns true if the input types of each signature are
0372   /// equal. Well-formed functions should have a deterministic output type
0373   /// given input types, but currently it is the responsibility of the
0374   /// developer to ensure this.
0375   bool Equals(const KernelSignature& other) const;
0376
0377   bool operator==(const KernelSignature& other) const { return this->Equals(other); }
0378
0379   bool operator!=(const KernelSignature& other) const { return !(*this == other); }
0380
0381   /// \brief Compute a hash code for the signature
0382   size_t Hash() const;
0383
0384   /// \brief The input types for the kernel. For VarArgs functions, this should
0385   /// generally contain a single validator to use for validating all of the
0386   /// function arguments.
0387   const std::vector<InputType>& in_types() const { return in_types_; }
0388
0389   /// \brief The output type for the kernel. Use Resolve to return the
0390   /// exact output given input argument types, since many kernels'
0391   /// output types depend on their input types (or their type
0392   /// metadata).
0393   const OutputType& out_type() const { return out_type_; }
0394
0395   /// \brief Render a human-readable string representation
0396   std::string ToString() const;
0397
0398   bool is_varargs() const { return is_varargs_; }
0399
0400  private:
0401   std::vector<InputType> in_types_;
0402   OutputType out_type_;
0403   bool is_varargs_;
0404
0405   // For caching the hash code after it's computed the first time
0406   mutable uint64_t hash_code_;
0407 };
0408
0409 /// \brief A function may contain multiple variants of a kernel for a given
0410 /// type combination for different SIMD levels. Based on the active system's
0411 /// CPU info or the user's preferences, we can elect to use one over the other.
0412 struct SimdLevel {
0413   enum type { NONE = 0, SSE4_2, AVX, AVX2, AVX512, NEON, MAX };
0414 };
0415
0416 /// \brief The strategy to use for propagating or otherwise populating the
0417 /// validity bitmap of a kernel output.
0418 struct NullHandling {
0419   enum type {
0420     /// Compute the output validity bitmap by intersecting the validity bitmaps
0421     /// of the arguments using bitwise-and operations. This means that values
0422     /// in the output are valid/non-null only if the corresponding values in
0423     /// all input arguments were valid/non-null. Kernel generally need not
0424     /// touch the bitmap thereafter, but a kernel's exec function is permitted
0425     /// to alter the bitmap after the null intersection is computed if it needs
0426     /// to.
0427     INTERSECTION,
0428
0429     /// Kernel expects a pre-allocated buffer to write the result bitmap
0430     /// into. The preallocated memory is not zeroed (except for the last byte),
0431     /// so the kernel should ensure to completely populate the bitmap.
0432     COMPUTED_PREALLOCATE,
0433
0434     /// Kernel allocates and sets the validity bitmap of the output.
0435     COMPUTED_NO_PREALLOCATE,
0436
0437     /// Kernel output is never null and a validity bitmap does not need to be
0438     /// allocated.
0439     OUTPUT_NOT_NULL
0440   };
0441 };
0442
0443 /// \brief The preference for memory preallocation of fixed-width type outputs
0444 /// in kernel execution.
0445 struct MemAllocation {
0446   enum type {
0447     // For data types that support pre-allocation (i.e. fixed-width), the
0448     // kernel expects to be provided a pre-allocated data buffer to write
0449     // into. Non-fixed-width types must always allocate their own data
0450     // buffers. The allocation made for the same length as the execution batch,
0451     // so vector kernels yielding differently sized output should not use this.
0452     //
0453     // It is valid for the data to not be preallocated but the validity bitmap
0454     // is (or is computed using the intersection/bitwise-and method).
0455     //
0456     // For variable-size output types like BinaryType or StringType, or for
0457     // nested types, this option has no effect.
0458     PREALLOCATE,
0459
0460     // The kernel is responsible for allocating its own data buffer for
0461     // fixed-width type outputs.
0462     NO_PREALLOCATE
0463   };
0464 };
0465
0466 struct Kernel;
0467
0468 /// \brief Arguments to pass to an KernelInit function. A struct is used to help
0469 /// avoid API breakage should the arguments passed need to be expanded.
0470 struct KernelInitArgs {
0471   /// \brief A pointer to the kernel being initialized. The init function may
0472   /// depend on the kernel's KernelSignature or other data contained there.
0473   const Kernel* kernel;
0474
0475   /// \brief The types of the input arguments that the kernel is
0476   /// about to be executed against.
0477   const std::vector<TypeHolder>& inputs;
0478
0479   /// \brief Opaque options specific to this kernel. May be nullptr for functions
0480   /// that do not require options.
0481   const FunctionOptions* options;
0482 };
0483
0484 /// \brief Common initializer function for all kernel types.
0485 using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
0486     KernelContext*, const KernelInitArgs&)>;
0487
0488 /// \brief Base type for kernels. Contains the function signature and
0489 /// optionally the state initialization function, along with some common
0490 /// attributes
0491 struct ARROW_EXPORT Kernel {
0492   Kernel() = default;
0493
0494   Kernel(std::shared_ptr<KernelSignature> sig, KernelInit init)
0495       : signature(std::move(sig)), init(std::move(init)) {}
0496
0497   Kernel(std::vector<InputType> in_types, OutputType out_type, KernelInit init)
0498       : Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)),
0499                std::move(init)) {}
0500
0501   /// \brief The "signature" of the kernel containing the InputType input
0502   /// argument validators and OutputType output type resolver.
0503   std::shared_ptr<KernelSignature> signature;
0504
0505   /// \brief Create a new KernelState for invocations of this kernel, e.g. to
0506   /// set up any options or state relevant for execution.
0507   KernelInit init;
0508
0509   /// \brief Create a vector of new KernelState for invocations of this kernel.
0510   static Status InitAll(KernelContext*, const KernelInitArgs&,
0511                         std::vector<std::unique_ptr<KernelState>>*);
0512
0513   /// \brief Indicates whether execution can benefit from parallelization
0514   /// (splitting large chunks into smaller chunks and using multiple
0515   /// threads). Some kernels may not support parallel execution at
0516   /// all. Synchronization and concurrency-related issues are currently the
0517   /// responsibility of the Kernel's implementation.
0518   bool parallelizable = true;
0519
0520   /// \brief Indicates the level of SIMD instruction support in the host CPU is
0521   /// required to use the function. The intention is for functions to be able to
0522   /// contain multiple kernels with the same signature but different levels of SIMD,
0523   /// so that the most optimized kernel supported on a host's processor can be chosen.
0524   SimdLevel::type simd_level = SimdLevel::NONE;
0525
0526   // Additional kernel-specific data
0527   std::shared_ptr<KernelState> data;
0528 };
0529
0530 /// \brief The scalar kernel execution API that must be implemented for SCALAR
0531 /// kernel types. This includes both stateless and stateful kernels. Kernels
0532 /// depending on some execution state access that state via subclasses of
0533 /// KernelState set on the KernelContext object. Implementations should
0534 /// endeavor to write into pre-allocated memory if they are able, though for
0535 /// some kernels (e.g. in cases when a builder like StringBuilder) must be
0536 /// employed this may not be possible.
0537 using ArrayKernelExec = Status (*)(KernelContext*, const ExecSpan&, ExecResult*);
0538
0539 /// \brief Kernel data structure for implementations of ScalarFunction. In
0540 /// addition to the members found in Kernel, contains the null handling
0541 /// and memory pre-allocation preferences.
0542 struct ARROW_EXPORT ScalarKernel : public Kernel {
0543   ScalarKernel() = default;
0544
0545   ScalarKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
0546                KernelInit init = NULLPTR)
0547       : Kernel(std::move(sig), init), exec(exec) {}
0548
0549   ScalarKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
0550                KernelInit init = NULLPTR)
0551       : Kernel(std::move(in_types), std::move(out_type), std::move(init)), exec(exec) {}
0552
0553   /// \brief Perform a single invocation of this kernel. Depending on the
0554   /// implementation, it may only write into preallocated memory, while in some
0555   /// cases it will allocate its own memory. Any required state is managed
0556   /// through the KernelContext.
0557   ArrayKernelExec exec;
0558
0559   /// \brief Writing execution results into larger contiguous allocations
0560   /// requires that the kernel be able to write into sliced output ArrayData*,
0561   /// including sliced output validity bitmaps. Some kernel implementations may
0562   /// not be able to do this, so setting this to false disables this
0563   /// functionality.
0564   bool can_write_into_slices = true;
0565
0566   // For scalar functions preallocated data and intersecting arg validity
0567   // bitmaps is a reasonable default
0568   NullHandling::type null_handling = NullHandling::INTERSECTION;
0569   MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
0570 };
0571
0572 // ----------------------------------------------------------------------
0573 // VectorKernel (for VectorFunction)
0574
0575 /// \brief Kernel data structure for implementations of VectorFunction. In
0576 /// contains an optional finalizer function, the null handling and memory
0577 /// pre-allocation preferences (which have different defaults from
0578 /// ScalarKernel), and some other execution-related options.
0579 struct ARROW_EXPORT VectorKernel : public Kernel {
0580   /// \brief See VectorKernel::finalize member for usage
0581   using FinalizeFunc = std::function<Status(KernelContext*, std::vector<Datum>*)>;
0582
0583   /// \brief Function for executing a stateful VectorKernel against a
0584   /// ChunkedArray input. Does not need to be defined for all VectorKernels
0585   using ChunkedExec = Status (*)(KernelContext*, const ExecBatch&, Datum* out);
0586
0587   VectorKernel() = default;
0588
0589   VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
0590                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
0591       : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
0592         exec(exec),
0593         finalize(std::move(finalize)) {}
0594
0595   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
0596                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
0597       : Kernel(std::move(sig), std::move(init)),
0598         exec(exec),
0599         finalize(std::move(finalize)) {}
0600
0601   /// \brief Perform a single invocation of this kernel. Any required state is
0602   /// managed through the KernelContext.
0603   ArrayKernelExec exec;
0604
0605   /// \brief Execute the kernel on a ChunkedArray. Does not need to be defined
0606   ChunkedExec exec_chunked = NULLPTR;
0607
0608   /// \brief For VectorKernel, convert intermediate results into finalized
0609   /// results. Mutates input argument. Some kernels may accumulate state
0610   /// (example: hashing-related functions) through processing chunked inputs, and
0611   /// then need to attach some accumulated state to each of the outputs of
0612   /// processing each chunk of data.
0613   FinalizeFunc finalize;
0614
0615   /// Since vector kernels generally are implemented rather differently from
0616   /// scalar/elementwise kernels (and they may not even yield arrays of the same
0617   /// size), so we make the developer opt-in to any memory preallocation rather
0618   /// than having to turn it off.
0619   NullHandling::type null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
0620   MemAllocation::type mem_allocation = MemAllocation::NO_PREALLOCATE;
0621
0622   /// \brief Writing execution results into larger contiguous allocations
0623   /// requires that the kernel be able to write into sliced output ArrayData*,
0624   /// including sliced output validity bitmaps. Some kernel implementations may
0625   /// not be able to do this, so setting this to false disables this
0626   /// functionality.
0627   bool can_write_into_slices = true;
0628
0629   /// Some vector kernels can do chunkwise execution using ExecSpanIterator,
0630   /// in some cases accumulating some state. Other kernels (like Take) need to
0631   /// be passed whole arrays and don't work on ChunkedArray inputs
0632   bool can_execute_chunkwise = true;
0633
0634   /// Some kernels (like unique and value_counts) yield non-chunked output from
0635   /// chunked-array inputs. This option controls how the results are boxed when
0636   /// returned from ExecVectorFunction
0637   ///
0638   /// true -> ChunkedArray
0639   /// false -> Array
0640   bool output_chunked = true;
0641 };
0642
0643 // ----------------------------------------------------------------------
0644 // ScalarAggregateKernel (for ScalarAggregateFunction)
0645
0646 using ScalarAggregateConsume = Status (*)(KernelContext*, const ExecSpan&);
0647 using ScalarAggregateMerge = Status (*)(KernelContext*, KernelState&&, KernelState*);
0648 // Finalize returns Datum to permit multiple return values
0649 using ScalarAggregateFinalize = Status (*)(KernelContext*, Datum*);
0650
0651 /// \brief Kernel data structure for implementations of
0652 /// ScalarAggregateFunction. The four necessary components of an aggregation
0653 /// kernel are the init, consume, merge, and finalize functions.
0654 ///
0655 /// * init: creates a new KernelState for a kernel.
0656 /// * consume: processes an ExecSpan and updates the KernelState found in the
0657 ///   KernelContext.
0658 /// * merge: combines one KernelState with another.
0659 /// * finalize: produces the end result of the aggregation using the
0660 ///   KernelState in the KernelContext.
0661 struct ARROW_EXPORT ScalarAggregateKernel : public Kernel {
0662   ScalarAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
0663                         ScalarAggregateConsume consume, ScalarAggregateMerge merge,
0664                         ScalarAggregateFinalize finalize, const bool ordered)
0665       : Kernel(std::move(sig), std::move(init)),
0666         consume(consume),
0667         merge(merge),
0668         finalize(finalize),
0669         ordered(ordered) {}
0670
0671   ScalarAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
0672                         KernelInit init, ScalarAggregateConsume consume,
0673                         ScalarAggregateMerge merge, ScalarAggregateFinalize finalize,
0674                         const bool ordered)
0675       : ScalarAggregateKernel(
0676             KernelSignature::Make(std::move(in_types), std::move(out_type)),
0677             std::move(init), consume, merge, finalize, ordered) {}
0678
0679   /// \brief Merge a vector of KernelStates into a single KernelState.
0680   /// The merged state will be returned and will be set on the KernelContext.
0681   static Result<std::unique_ptr<KernelState>> MergeAll(
0682       const ScalarAggregateKernel* kernel, KernelContext* ctx,
0683       std::vector<std::unique_ptr<KernelState>> states);
0684
0685   ScalarAggregateConsume consume;
0686   ScalarAggregateMerge merge;
0687   ScalarAggregateFinalize finalize;
0688   /// \brief Whether this kernel requires ordering
0689   /// Some aggregations, such as, "first", requires some kind of input order. The
0690   /// order can be implicit, e.g., the order of the input data, or explicit, e.g.
0691   /// the ordering specified with a window aggregation.
0692   /// The caller of the aggregate kernel is responsible for passing data in some
0693   /// defined order to the kernel. The flag here is a way for the kernel to tell
0694   /// the caller that data passed to the kernel must be defined in some order.
0695   bool ordered = false;
0696 };
0697
0698 // ----------------------------------------------------------------------
0699 // HashAggregateKernel (for HashAggregateFunction)
0700
0701 using HashAggregateResize = Status (*)(KernelContext*, int64_t);
0702 using HashAggregateConsume = Status (*)(KernelContext*, const ExecSpan&);
0703 using HashAggregateMerge = Status (*)(KernelContext*, KernelState&&, const ArrayData&);
0704
0705 // Finalize returns Datum to permit multiple return values
0706 using HashAggregateFinalize = Status (*)(KernelContext*, Datum*);
0707
0708 /// \brief Kernel data structure for implementations of
0709 /// HashAggregateFunction. The four necessary components of an aggregation
0710 /// kernel are the init, consume, merge, and finalize functions.
0711 ///
0712 /// * init: creates a new KernelState for a kernel.
0713 /// * resize: ensure that the KernelState can accommodate the specified number of groups.
0714 /// * consume: processes an ExecSpan (which includes the argument as well
0715 ///   as an array of group identifiers) and updates the KernelState found in the
0716 ///   KernelContext.
0717 /// * merge: combines one KernelState with another.
0718 /// * finalize: produces the end result of the aggregation using the
0719 ///   KernelState in the KernelContext.
0720 struct ARROW_EXPORT HashAggregateKernel : public Kernel {
0721   HashAggregateKernel() = default;
0722
0723   HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
0724                       HashAggregateResize resize, HashAggregateConsume consume,
0725                       HashAggregateMerge merge, HashAggregateFinalize finalize,
0726                       const bool ordered)
0727       : Kernel(std::move(sig), std::move(init)),
0728         resize(resize),
0729         consume(consume),
0730         merge(merge),
0731         finalize(finalize),
0732         ordered(ordered) {}
0733
0734   HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
0735                       KernelInit init, HashAggregateConsume consume,
0736                       HashAggregateResize resize, HashAggregateMerge merge,
0737                       HashAggregateFinalize finalize, const bool ordered)
0738       : HashAggregateKernel(
0739             KernelSignature::Make(std::move(in_types), std::move(out_type)),
0740             std::move(init), resize, consume, merge, finalize, ordered) {}
0741
0742   HashAggregateResize resize;
0743   HashAggregateConsume consume;
0744   HashAggregateMerge merge;
0745   HashAggregateFinalize finalize;
0746   /// @brief whether the summarizer requires ordering
0747   /// This is similar to ScalarAggregateKernel. See ScalarAggregateKernel
0748   /// for detailed doc of this variable.
0749   bool ordered = false;
0750 };
0751
0752 }  // namespace compute
0753 }  // namespace arrow