Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:56

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 // NOTE: API is EXPERIMENTAL and will change without going through a
0019 // deprecation cycle.
0020 
0021 #pragma once
0022 
0023 #include <string>
0024 #include <utility>
0025 #include <vector>
0026 
0027 #include "arrow/compute/kernel.h"
0028 #include "arrow/compute/type_fwd.h"
0029 #include "arrow/datum.h"
0030 #include "arrow/result.h"
0031 #include "arrow/status.h"
0032 #include "arrow/util/compare.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/visibility.h"
0035 
0036 namespace arrow {
0037 namespace compute {
0038 
0039 /// \addtogroup compute-functions
0040 /// @{
0041 
0042 /// \brief Contains the number of required arguments for the function.
0043 ///
0044 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
0045 struct ARROW_EXPORT Arity {
0046   /// \brief A function taking no arguments
0047   static Arity Nullary() { return Arity(0, false); }
0048 
0049   /// \brief A function taking 1 argument
0050   static Arity Unary() { return Arity(1, false); }
0051 
0052   /// \brief A function taking 2 arguments
0053   static Arity Binary() { return Arity(2, false); }
0054 
0055   /// \brief A function taking 3 arguments
0056   static Arity Ternary() { return Arity(3, false); }
0057 
0058   /// \brief A function taking a variable number of arguments
0059   ///
0060   /// \param[in] min_args the minimum number of arguments required when
0061   /// invoking the function
0062   static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }
0063 
0064   // NOTE: the 0-argument form (default constructor) is required for Cython
0065   explicit Arity(int num_args = 0, bool is_varargs = false)
0066       : num_args(num_args), is_varargs(is_varargs) {}
0067 
0068   /// The number of required arguments (or the minimum number for varargs
0069   /// functions).
0070   int num_args;
0071 
0072   /// If true, then the num_args is the minimum number of required arguments.
0073   bool is_varargs = false;
0074 };
0075 
0076 struct ARROW_EXPORT FunctionDoc {
0077   /// \brief A one-line summary of the function, using a verb.
0078   ///
0079   /// For example, "Add two numeric arrays or scalars".
0080   std::string summary;
0081 
0082   /// \brief A detailed description of the function, meant to follow the summary.
0083   std::string description;
0084 
0085   /// \brief Symbolic names (identifiers) for the function arguments.
0086   ///
0087   /// Some bindings may use this to generate nicer function signatures.
0088   std::vector<std::string> arg_names;
0089 
0090   // TODO add argument descriptions?
0091 
0092   /// \brief Name of the options class, if any.
0093   std::string options_class;
0094 
0095   /// \brief Whether options are required for function execution
0096   ///
0097   /// If false, then either the function does not have an options class
0098   /// or there is a usable default options value.
0099   bool options_required;
0100 
0101   FunctionDoc() = default;
0102 
0103   FunctionDoc(std::string summary, std::string description,
0104               std::vector<std::string> arg_names, std::string options_class = "",
0105               bool options_required = false)
0106       : summary(std::move(summary)),
0107         description(std::move(description)),
0108         arg_names(std::move(arg_names)),
0109         options_class(std::move(options_class)),
0110         options_required(options_required) {}
0111 
0112   static const FunctionDoc& Empty();
0113 };
0114 
0115 /// \brief An executor of a function with a preconfigured kernel
0116 class ARROW_EXPORT FunctionExecutor {
0117  public:
0118   virtual ~FunctionExecutor() = default;
0119   /// \brief Initialize or re-initialize the preconfigured kernel
0120   ///
0121   /// This method may be called zero or more times. Depending on how
0122   /// the FunctionExecutor was obtained, it may already have been initialized.
0123   virtual Status Init(const FunctionOptions* options = NULLPTR,
0124                       ExecContext* exec_ctx = NULLPTR) = 0;
0125   /// \brief Execute the preconfigured kernel with arguments that must fit it
0126   ///
0127   /// The method requires the arguments be castable to the preconfigured types.
0128   ///
0129   /// \param[in] args Arguments to execute the function on
0130   /// \param[in] length Length of arguments batch or -1 to default it. If the
0131   /// function has no parameters, this determines the batch length, defaulting
0132   /// to 0. Otherwise, if the function is scalar, this must equal the argument
0133   /// batch's inferred length or be -1 to default to it. This is ignored for
0134   /// vector functions.
0135   virtual Result<Datum> Execute(const std::vector<Datum>& args, int64_t length = -1) = 0;
0136 };
0137 
0138 /// \brief Base class for compute functions. Function implementations contain a
0139 /// collection of "kernels" which are implementations of the function for
0140 /// specific argument types. Selecting a viable kernel for executing a function
0141 /// is referred to as "dispatching".
0142 class ARROW_EXPORT Function {
0143  public:
0144   /// \brief The kind of function, which indicates in what contexts it is
0145   /// valid for use.
0146   enum Kind {
0147     /// A function that performs scalar data operations on whole arrays of
0148     /// data. Can generally process Array or Scalar values. The size of the
0149     /// output will be the same as the size (or broadcasted size, in the case
0150     /// of mixing Array and Scalar inputs) of the input.
0151     SCALAR,
0152 
0153     /// A function with array input and output whose behavior depends on the
0154     /// values of the entire arrays passed, rather than the value of each scalar
0155     /// value.
0156     VECTOR,
0157 
0158     /// A function that computes scalar summary statistics from array input.
0159     SCALAR_AGGREGATE,
0160 
0161     /// A function that computes grouped summary statistics from array input
0162     /// and an array of group identifiers.
0163     HASH_AGGREGATE,
0164 
0165     /// A function that dispatches to other functions and does not contain its
0166     /// own kernels.
0167     META
0168   };
0169 
0170   virtual ~Function() = default;
0171 
0172   /// \brief The name of the kernel. The registry enforces uniqueness of names.
0173   const std::string& name() const { return name_; }
0174 
0175   /// \brief The kind of kernel, which indicates in what contexts it is valid
0176   /// for use.
0177   Function::Kind kind() const { return kind_; }
0178 
0179   /// \brief Contains the number of arguments the function requires, or if the
0180   /// function accepts variable numbers of arguments.
0181   const Arity& arity() const { return arity_; }
0182 
0183   /// \brief Return the function documentation
0184   const FunctionDoc& doc() const { return doc_; }
0185 
0186   /// \brief Returns the number of registered kernels for this function.
0187   virtual int num_kernels() const = 0;
0188 
0189   /// \brief Return a kernel that can execute the function given the exact
0190   /// argument types (without implicit type casts).
0191   ///
0192   /// NB: This function is overridden in CastFunction.
0193   virtual Result<const Kernel*> DispatchExact(const std::vector<TypeHolder>& types) const;
0194 
0195   /// \brief Return a best-match kernel that can execute the function given the argument
0196   /// types, after implicit casts are applied.
0197   ///
0198   /// \param[in,out] values Argument types. An element may be modified to
0199   /// indicate that the returned kernel only approximately matches the input
0200   /// value descriptors; callers are responsible for casting inputs to the type
0201   /// required by the kernel.
0202   virtual Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const;
0203 
0204   /// \brief Get a function executor with a best-matching kernel
0205   ///
0206   /// The returned executor will by default work with the default FunctionOptions
0207   /// and KernelContext. If you want to change that, call `FunctionExecutor::Init`.
0208   virtual Result<std::shared_ptr<FunctionExecutor>> GetBestExecutor(
0209       std::vector<TypeHolder> inputs) const;
0210 
0211   /// \brief Execute the function eagerly with the passed input arguments with
0212   /// kernel dispatch, batch iteration, and memory allocation details taken
0213   /// care of.
0214   ///
0215   /// If the `options` pointer is null, then `default_options()` will be used.
0216   ///
0217   /// This function can be overridden in subclasses.
0218   virtual Result<Datum> Execute(const std::vector<Datum>& args,
0219                                 const FunctionOptions* options, ExecContext* ctx) const;
0220 
0221   virtual Result<Datum> Execute(const ExecBatch& batch, const FunctionOptions* options,
0222                                 ExecContext* ctx) const;
0223 
0224   /// \brief Returns the default options for this function.
0225   ///
0226   /// Whatever option semantics a Function has, implementations must guarantee
0227   /// that default_options() is valid to pass to Execute as options.
0228   const FunctionOptions* default_options() const { return default_options_; }
0229 
0230   virtual Status Validate() const;
0231 
0232   /// \brief Returns the pure property for this function.
0233   ///
0234   /// Impure functions are those that may return different results for the same
0235   /// input arguments. For example, a function that returns a random number is
0236   /// not pure. An expression containing only pure functions can be simplified by
0237   /// pre-evaluating any sub-expressions that have constant arguments.
0238   virtual bool is_pure() const { return true; }
0239 
0240  protected:
0241   Function(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
0242            const FunctionOptions* default_options)
0243       : name_(std::move(name)),
0244         kind_(kind),
0245         arity_(arity),
0246         doc_(std::move(doc)),
0247         default_options_(default_options) {}
0248 
0249   Status CheckArity(size_t num_args) const;
0250 
0251   std::string name_;
0252   Function::Kind kind_;
0253   Arity arity_;
0254   const FunctionDoc doc_;
0255   const FunctionOptions* default_options_ = NULLPTR;
0256 };
0257 
0258 namespace detail {
0259 
0260 template <typename KernelType>
0261 class FunctionImpl : public Function {
0262  public:
0263   /// \brief Return pointers to current-available kernels for inspection
0264   std::vector<const KernelType*> kernels() const {
0265     std::vector<const KernelType*> result;
0266     for (const auto& kernel : kernels_) {
0267       result.push_back(&kernel);
0268     }
0269     return result;
0270   }
0271 
0272   int num_kernels() const override { return static_cast<int>(kernels_.size()); }
0273 
0274  protected:
0275   FunctionImpl(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
0276                const FunctionOptions* default_options)
0277       : Function(std::move(name), kind, arity, std::move(doc), default_options) {}
0278 
0279   std::vector<KernelType> kernels_;
0280 };
0281 
0282 /// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
0283 ARROW_EXPORT
0284 const Kernel* DispatchExactImpl(const Function* func, const std::vector<TypeHolder>&);
0285 
0286 /// \brief Return an error message if no Kernel is found.
0287 ARROW_EXPORT
0288 Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>&);
0289 
0290 }  // namespace detail
0291 
0292 /// \brief A function that executes elementwise operations on arrays or
0293 /// scalars, and therefore whose results generally do not depend on the order
0294 /// of the values in the arguments. Accepts and returns arrays that are all of
0295 /// the same size. These functions roughly correspond to the functions used in
0296 /// SQL expressions.
0297 class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
0298  public:
0299   using KernelType = ScalarKernel;
0300 
0301   ScalarFunction(std::string name, const Arity& arity, FunctionDoc doc,
0302                  const FunctionOptions* default_options = NULLPTR, bool is_pure = true)
0303       : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity,
0304                                            std::move(doc), default_options),
0305         is_pure_(is_pure) {}
0306 
0307   /// \brief Add a kernel with given input/output types, no required state
0308   /// initialization, preallocation for fixed-width types, and default null
0309   /// handling (intersect validity bitmaps of inputs).
0310   Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
0311                    ArrayKernelExec exec, KernelInit init = NULLPTR);
0312 
0313   /// \brief Add a kernel (function implementation). Returns error if the
0314   /// kernel's signature does not match the function's arity.
0315   Status AddKernel(ScalarKernel kernel);
0316 
0317   /// \brief Returns the pure property for this function.
0318   bool is_pure() const override { return is_pure_; }
0319 
0320  private:
0321   const bool is_pure_;
0322 };
0323 
0324 /// \brief A function that executes general array operations that may yield
0325 /// outputs of different sizes or have results that depend on the whole array
0326 /// contents. These functions roughly correspond to the functions found in
0327 /// non-SQL array languages like APL and its derivatives.
0328 class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
0329  public:
0330   using KernelType = VectorKernel;
0331 
0332   VectorFunction(std::string name, const Arity& arity, FunctionDoc doc,
0333                  const FunctionOptions* default_options = NULLPTR)
0334       : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity,
0335                                            std::move(doc), default_options) {}
0336 
0337   /// \brief Add a simple kernel with given input/output types, no required
0338   /// state initialization, no data preallocation, and no preallocation of the
0339   /// validity bitmap.
0340   Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
0341                    ArrayKernelExec exec, KernelInit init = NULLPTR);
0342 
0343   /// \brief Add a kernel (function implementation). Returns error if the
0344   /// kernel's signature does not match the function's arity.
0345   Status AddKernel(VectorKernel kernel);
0346 };
0347 
0348 class ARROW_EXPORT ScalarAggregateFunction
0349     : public detail::FunctionImpl<ScalarAggregateKernel> {
0350  public:
0351   using KernelType = ScalarAggregateKernel;
0352 
0353   ScalarAggregateFunction(std::string name, const Arity& arity, FunctionDoc doc,
0354                           const FunctionOptions* default_options = NULLPTR)
0355       : detail::FunctionImpl<ScalarAggregateKernel>(std::move(name),
0356                                                     Function::SCALAR_AGGREGATE, arity,
0357                                                     std::move(doc), default_options) {}
0358 
0359   /// \brief Add a kernel (function implementation). Returns error if the
0360   /// kernel's signature does not match the function's arity.
0361   Status AddKernel(ScalarAggregateKernel kernel);
0362 };
0363 
0364 class ARROW_EXPORT HashAggregateFunction
0365     : public detail::FunctionImpl<HashAggregateKernel> {
0366  public:
0367   using KernelType = HashAggregateKernel;
0368 
0369   HashAggregateFunction(std::string name, const Arity& arity, FunctionDoc doc,
0370                         const FunctionOptions* default_options = NULLPTR)
0371       : detail::FunctionImpl<HashAggregateKernel>(std::move(name),
0372                                                   Function::HASH_AGGREGATE, arity,
0373                                                   std::move(doc), default_options) {}
0374 
0375   /// \brief Add a kernel (function implementation). Returns error if the
0376   /// kernel's signature does not match the function's arity.
0377   Status AddKernel(HashAggregateKernel kernel);
0378 };
0379 
0380 /// \brief A function that dispatches to other functions. Must implement
0381 /// MetaFunction::ExecuteImpl.
0382 ///
0383 /// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
0384 /// of concrete Function types, but must handle other Datum kinds on its own.
0385 class ARROW_EXPORT MetaFunction : public Function {
0386  public:
0387   int num_kernels() const override { return 0; }
0388 
0389   Result<Datum> Execute(const std::vector<Datum>& args, const FunctionOptions* options,
0390                         ExecContext* ctx) const override;
0391 
0392   Result<Datum> Execute(const ExecBatch& batch, const FunctionOptions* options,
0393                         ExecContext* ctx) const override;
0394 
0395  protected:
0396   virtual Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
0397                                     const FunctionOptions* options,
0398                                     ExecContext* ctx) const = 0;
0399 
0400   MetaFunction(std::string name, const Arity& arity, FunctionDoc doc,
0401                const FunctionOptions* default_options = NULLPTR)
0402       : Function(std::move(name), Function::META, arity, std::move(doc),
0403                  default_options) {}
0404 };
0405 
0406 /// @}
0407 
0408 }  // namespace compute
0409 }  // namespace arrow