Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:56

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 // This API is EXPERIMENTAL.
0019 
0020 #pragma once
0021 
0022 #include <memory>
0023 #include <string>
0024 #include <utility>
0025 #include <variant>
0026 #include <vector>
0027 
0028 #include "arrow/compute/type_fwd.h"
0029 #include "arrow/datum.h"
0030 #include "arrow/type_fwd.h"
0031 #include "arrow/util/small_vector.h"
0032 
0033 namespace arrow {
0034 namespace compute {
0035 
0036 /// \defgroup expression-core Expressions to describe data transformations
0037 ///
0038 /// @{
0039 
0040 /// An unbound expression which maps a single Datum to another Datum.
0041 /// An expression is one of
0042 /// - A literal Datum.
0043 /// - A reference to a single (potentially nested) field of the input Datum.
0044 /// - A call to a compute function, with arguments specified by other Expressions.
0045 class ARROW_EXPORT Expression {
0046  public:
0047   struct Call {
0048     std::string function_name;
0049     std::vector<Expression> arguments;
0050     std::shared_ptr<FunctionOptions> options;
0051     // Cached hash value
0052     size_t hash;
0053 
0054     // post-Bind properties:
0055     std::shared_ptr<Function> function;
0056     const Kernel* kernel = NULLPTR;
0057     std::shared_ptr<KernelState> kernel_state;
0058     TypeHolder type;
0059 
0060     void ComputeHash();
0061   };
0062 
0063   std::string ToString() const;
0064   bool Equals(const Expression& other) const;
0065   size_t hash() const;
0066   struct Hash {
0067     size_t operator()(const Expression& expr) const { return expr.hash(); }
0068   };
0069 
0070   /// Bind this expression to the given input type, looking up Kernels and field types.
0071   /// Some expression simplification may be performed and implicit casts will be inserted.
0072   /// Any state necessary for execution will be initialized and returned.
0073   Result<Expression> Bind(const TypeHolder& in, ExecContext* = NULLPTR) const;
0074   Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
0075 
0076   // XXX someday
0077   // Clone all KernelState in this bound expression. If any function referenced by this
0078   // expression has mutable KernelState, it is not safe to execute or apply simplification
0079   // passes to it (or copies of it!) from multiple threads. Cloning state produces new
0080   // KernelStates where necessary to ensure that Expressions may be manipulated safely
0081   // on multiple threads.
0082   // Result<ExpressionState> CloneState() const;
0083   // Status SetState(ExpressionState);
0084 
0085   /// Return true if all an expression's field references have explicit types
0086   /// and all of its functions' kernels are looked up.
0087   bool IsBound() const;
0088 
0089   /// Return true if this expression is composed only of Scalar literals, field
0090   /// references, and calls to ScalarFunctions.
0091   bool IsScalarExpression() const;
0092 
0093   /// Return true if this expression is literal and entirely null.
0094   bool IsNullLiteral() const;
0095 
0096   /// Return true if this expression could evaluate to true. Will return true for any
0097   /// unbound or non-boolean Expressions. IsSatisfiable does not (currently) do any
0098   /// canonicalization or simplification of the expression, so even Expressions
0099   /// which are unsatisfiable may spuriously return `true` here. This function is
0100   /// intended for use in predicate pushdown where a filter expression is simplified
0101   /// by a guarantee, so it assumes that trying to simplify again would be redundant.
0102   bool IsSatisfiable() const;
0103 
0104   // XXX someday
0105   // Result<PipelineGraph> GetPipelines();
0106 
0107   bool is_valid() const { return impl_ != NULLPTR; }
0108 
0109   /// Access a Call or return nullptr if this expression is not a call
0110   const Call* call() const;
0111   /// Access a Datum or return nullptr if this expression is not a literal
0112   const Datum* literal() const;
0113   /// Access a FieldRef or return nullptr if this expression is not a field_ref
0114   const FieldRef* field_ref() const;
0115 
0116   /// The type to which this expression will evaluate
0117   const DataType* type() const;
0118   // XXX someday
0119   // NullGeneralization::type nullable() const;
0120 
0121   struct Parameter {
0122     FieldRef ref;
0123 
0124     // post-bind properties
0125     TypeHolder type;
0126     ::arrow::internal::SmallVector<int, 2> indices;
0127   };
0128   const Parameter* parameter() const;
0129 
0130   Expression() = default;
0131   explicit Expression(Call call);
0132   explicit Expression(Datum literal);
0133   explicit Expression(Parameter parameter);
0134 
0135  private:
0136   using Impl = std::variant<Datum, Parameter, Call>;
0137   std::shared_ptr<Impl> impl_;
0138 
0139   ARROW_FRIEND_EXPORT friend bool Identical(const Expression& l, const Expression& r);
0140 };
0141 
0142 inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
0143 inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
0144 
0145 ARROW_EXPORT void PrintTo(const Expression&, std::ostream*);
0146 
0147 // Factories
0148 
0149 ARROW_EXPORT
0150 Expression literal(Datum lit);
0151 
0152 template <typename Arg>
0153 Expression literal(Arg&& arg) {
0154   return literal(Datum(std::forward<Arg>(arg)));
0155 }
0156 
0157 ARROW_EXPORT
0158 Expression field_ref(FieldRef ref);
0159 
0160 ARROW_EXPORT
0161 Expression call(std::string function, std::vector<Expression> arguments,
0162                 std::shared_ptr<FunctionOptions> options = NULLPTR);
0163 
0164 template <typename Options, typename = typename std::enable_if<
0165                                 std::is_base_of<FunctionOptions, Options>::value>::type>
0166 Expression call(std::string function, std::vector<Expression> arguments,
0167                 Options options) {
0168   return call(std::move(function), std::move(arguments),
0169               std::make_shared<Options>(std::move(options)));
0170 }
0171 
0172 /// Assemble a list of all fields referenced by an Expression at any depth.
0173 ARROW_EXPORT
0174 std::vector<FieldRef> FieldsInExpression(const Expression&);
0175 
0176 /// Check if the expression references any fields.
0177 ARROW_EXPORT
0178 bool ExpressionHasFieldRefs(const Expression&);
0179 
0180 struct ARROW_EXPORT KnownFieldValues;
0181 
0182 /// Assemble a mapping from field references to known values. This derives known values
0183 /// from "equal" and "is_null" Expressions referencing a field and a literal.
0184 ARROW_EXPORT
0185 Result<KnownFieldValues> ExtractKnownFieldValues(
0186     const Expression& guaranteed_true_predicate);
0187 
0188 /// @}
0189 
0190 /// \defgroup expression-passes Functions for modification of Expressions
0191 ///
0192 /// @{
0193 ///
0194 /// These transform bound expressions. Some transforms utilize a guarantee, which is
0195 /// provided as an Expression which is guaranteed to evaluate to true. The
0196 /// guaranteed_true_predicate need not be bound, but canonicalization is currently
0197 /// deferred to producers of guarantees. For example in order to be recognized as a
0198 /// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
0199 /// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
0200 /// other semantically identical Expressions will not be recognized.
0201 
0202 /// Weak canonicalization which establishes guarantees for subsequent passes. Even
0203 /// equivalent Expressions may result in different canonicalized expressions.
0204 /// TODO this could be a strong canonicalization
0205 ARROW_EXPORT
0206 Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
0207 
0208 /// Simplify Expressions based on literal arguments (for example, add(null, x) will always
0209 /// be null so replace the call with a null literal). Includes early evaluation of all
0210 /// calls whose arguments are entirely literal.
0211 ARROW_EXPORT
0212 Result<Expression> FoldConstants(Expression);
0213 
0214 /// Simplify Expressions by replacing with known values of the fields which it references.
0215 ARROW_EXPORT
0216 Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
0217                                                 Expression);
0218 
0219 /// Simplify an expression by replacing subexpressions based on a guarantee:
0220 /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
0221 /// used to remove redundant function calls from a filter expression or to replace a
0222 /// reference to a constant-value field with a literal.
0223 ARROW_EXPORT
0224 Result<Expression> SimplifyWithGuarantee(Expression,
0225                                          const Expression& guaranteed_true_predicate);
0226 
0227 /// Replace all named field refs (e.g. "x" or "x.y") with field paths (e.g. [0] or [1,3])
0228 ///
0229 /// This isn't usually needed and does not offer any simplification by itself.  However,
0230 /// it can be useful to normalize an expression to paths to make it simpler to work with.
0231 ARROW_EXPORT Result<Expression> RemoveNamedRefs(Expression expression);
0232 
0233 /// @}
0234 
0235 // Execution
0236 
0237 /// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
0238 /// RecordBatch which may have missing or incorrectly ordered columns.
0239 /// Missing fields will be replaced with null scalars.
0240 ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
0241                                              const Datum& partial,
0242                                              Expression guarantee = literal(true));
0243 
0244 /// Execute a scalar expression against the provided state and input ExecBatch. This
0245 /// expression must be bound.
0246 ARROW_EXPORT
0247 Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
0248                                       ExecContext* = NULLPTR);
0249 
0250 /// Convenience function for invoking against a RecordBatch
0251 ARROW_EXPORT
0252 Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
0253                                       const Datum& partial_input, ExecContext* = NULLPTR);
0254 
0255 // Serialization
0256 
0257 ARROW_EXPORT
0258 Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
0259 
0260 ARROW_EXPORT
0261 Result<Expression> Deserialize(std::shared_ptr<Buffer>);
0262 
0263 /// \defgroup expression-convenience Helpers for convenient expression creation
0264 ///
0265 /// @{
0266 
0267 ARROW_EXPORT Expression project(std::vector<Expression> values,
0268                                 std::vector<std::string> names);
0269 
0270 ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
0271 
0272 ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
0273 
0274 ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
0275 
0276 ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
0277 
0278 ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
0279 
0280 ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
0281 
0282 ARROW_EXPORT Expression is_null(Expression lhs, bool nan_is_null = false);
0283 
0284 ARROW_EXPORT Expression is_valid(Expression lhs);
0285 
0286 ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
0287 ARROW_EXPORT Expression and_(const std::vector<Expression>&);
0288 ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
0289 ARROW_EXPORT Expression or_(const std::vector<Expression>&);
0290 ARROW_EXPORT Expression not_(Expression operand);
0291 
0292 /// @}
0293 
0294 }  // namespace compute
0295 }  // namespace arrow