![]() |
|
|||
File indexing completed on 2025-08-28 08:26:56
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <memory> 0021 #include <utility> 0022 0023 #include "arrow/compute/function_options.h" 0024 #include "arrow/compute/ordering.h" 0025 #include "arrow/result.h" 0026 #include "arrow/type_fwd.h" 0027 0028 namespace arrow { 0029 namespace compute { 0030 0031 class ExecContext; 0032 0033 /// \addtogroup compute-concrete-options 0034 /// @{ 0035 0036 class ARROW_EXPORT FilterOptions : public FunctionOptions { 0037 public: 0038 /// Configure the action taken when a slot of the selection mask is null 0039 enum NullSelectionBehavior { 0040 /// The corresponding filtered value will be removed in the output. 0041 DROP, 0042 /// The corresponding filtered value will be null in the output. 0043 EMIT_NULL, 0044 }; 0045 0046 explicit FilterOptions(NullSelectionBehavior null_selection = DROP); 0047 static constexpr char const kTypeName[] = "FilterOptions"; 0048 static FilterOptions Defaults() { return FilterOptions(); } 0049 0050 NullSelectionBehavior null_selection_behavior = DROP; 0051 }; 0052 0053 class ARROW_EXPORT TakeOptions : public FunctionOptions { 0054 public: 0055 explicit TakeOptions(bool boundscheck = true); 0056 static constexpr char const kTypeName[] = "TakeOptions"; 0057 static TakeOptions BoundsCheck() { return TakeOptions(true); } 0058 static TakeOptions NoBoundsCheck() { return TakeOptions(false); } 0059 static TakeOptions Defaults() { return BoundsCheck(); } 0060 0061 bool boundscheck = true; 0062 }; 0063 0064 /// \brief Options for the dictionary encode function 0065 class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions { 0066 public: 0067 /// Configure how null values will be encoded 0068 enum NullEncodingBehavior { 0069 /// The null value will be added to the dictionary with a proper index. 0070 ENCODE, 0071 /// The null value will be masked in the indices array. 0072 MASK 0073 }; 0074 0075 explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK); 0076 static constexpr char const kTypeName[] = "DictionaryEncodeOptions"; 0077 static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); } 0078 0079 NullEncodingBehavior null_encoding_behavior = MASK; 0080 }; 0081 0082 /// \brief Options for the run-end encode function 0083 class ARROW_EXPORT RunEndEncodeOptions : public FunctionOptions { 0084 public: 0085 explicit RunEndEncodeOptions(std::shared_ptr<DataType> run_end_type = int32()); 0086 static constexpr char const kTypeName[] = "RunEndEncodeOptions"; 0087 static RunEndEncodeOptions Defaults() { return RunEndEncodeOptions(); } 0088 0089 std::shared_ptr<DataType> run_end_type; 0090 }; 0091 0092 class ARROW_EXPORT ArraySortOptions : public FunctionOptions { 0093 public: 0094 explicit ArraySortOptions(SortOrder order = SortOrder::Ascending, 0095 NullPlacement null_placement = NullPlacement::AtEnd); 0096 static constexpr char const kTypeName[] = "ArraySortOptions"; 0097 static ArraySortOptions Defaults() { return ArraySortOptions(); } 0098 0099 /// Sorting order 0100 SortOrder order; 0101 /// Whether nulls and NaNs are placed at the start or at the end 0102 NullPlacement null_placement; 0103 }; 0104 0105 class ARROW_EXPORT SortOptions : public FunctionOptions { 0106 public: 0107 explicit SortOptions(std::vector<SortKey> sort_keys = {}, 0108 NullPlacement null_placement = NullPlacement::AtEnd); 0109 explicit SortOptions(const Ordering& ordering); 0110 static constexpr char const kTypeName[] = "SortOptions"; 0111 static SortOptions Defaults() { return SortOptions(); } 0112 /// Convenience constructor to create an ordering from SortOptions 0113 /// 0114 /// Note: Both classes contain the exact same information. However, 0115 /// sort_options should only be used in a "function options" context while Ordering 0116 /// is used more generally. 0117 Ordering AsOrdering() && { return Ordering(std::move(sort_keys), null_placement); } 0118 Ordering AsOrdering() const& { return Ordering(sort_keys, null_placement); } 0119 0120 /// Column key(s) to order by and how to order by these sort keys. 0121 std::vector<SortKey> sort_keys; 0122 /// Whether nulls and NaNs are placed at the start or at the end 0123 NullPlacement null_placement; 0124 }; 0125 0126 /// \brief SelectK options 0127 class ARROW_EXPORT SelectKOptions : public FunctionOptions { 0128 public: 0129 explicit SelectKOptions(int64_t k = -1, std::vector<SortKey> sort_keys = {}); 0130 static constexpr char const kTypeName[] = "SelectKOptions"; 0131 static SelectKOptions Defaults() { return SelectKOptions(); } 0132 0133 static SelectKOptions TopKDefault(int64_t k, std::vector<std::string> key_names = {}) { 0134 std::vector<SortKey> keys; 0135 for (const auto& name : key_names) { 0136 keys.emplace_back(SortKey(name, SortOrder::Descending)); 0137 } 0138 if (key_names.empty()) { 0139 keys.emplace_back(SortKey("not-used", SortOrder::Descending)); 0140 } 0141 return SelectKOptions{k, keys}; 0142 } 0143 static SelectKOptions BottomKDefault(int64_t k, 0144 std::vector<std::string> key_names = {}) { 0145 std::vector<SortKey> keys; 0146 for (const auto& name : key_names) { 0147 keys.emplace_back(SortKey(name, SortOrder::Ascending)); 0148 } 0149 if (key_names.empty()) { 0150 keys.emplace_back(SortKey("not-used", SortOrder::Ascending)); 0151 } 0152 return SelectKOptions{k, keys}; 0153 } 0154 0155 /// The number of `k` elements to keep. 0156 int64_t k; 0157 /// Column key(s) to order by and how to order by these sort keys. 0158 std::vector<SortKey> sort_keys; 0159 }; 0160 0161 /// \brief Rank options 0162 class ARROW_EXPORT RankOptions : public FunctionOptions { 0163 public: 0164 /// Configure how ties between equal values are handled 0165 enum Tiebreaker { 0166 /// Ties get the smallest possible rank in sorted order. 0167 Min, 0168 /// Ties get the largest possible rank in sorted order. 0169 Max, 0170 /// Ranks are assigned in order of when ties appear in the input. 0171 /// This ensures the ranks are a stable permutation of the input. 0172 First, 0173 /// The ranks span a dense [1, M] interval where M is the number 0174 /// of distinct values in the input. 0175 Dense 0176 }; 0177 0178 explicit RankOptions(std::vector<SortKey> sort_keys = {}, 0179 NullPlacement null_placement = NullPlacement::AtEnd, 0180 Tiebreaker tiebreaker = RankOptions::First); 0181 /// Convenience constructor for array inputs 0182 explicit RankOptions(SortOrder order, 0183 NullPlacement null_placement = NullPlacement::AtEnd, 0184 Tiebreaker tiebreaker = RankOptions::First) 0185 : RankOptions({SortKey("", order)}, null_placement, tiebreaker) {} 0186 0187 static constexpr char const kTypeName[] = "RankOptions"; 0188 static RankOptions Defaults() { return RankOptions(); } 0189 0190 /// Column key(s) to order by and how to order by these sort keys. 0191 std::vector<SortKey> sort_keys; 0192 /// Whether nulls and NaNs are placed at the start or at the end 0193 NullPlacement null_placement; 0194 /// Tiebreaker for dealing with equal values in ranks 0195 Tiebreaker tiebreaker; 0196 }; 0197 0198 /// \brief Partitioning options for NthToIndices 0199 class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { 0200 public: 0201 explicit PartitionNthOptions(int64_t pivot, 0202 NullPlacement null_placement = NullPlacement::AtEnd); 0203 PartitionNthOptions() : PartitionNthOptions(0) {} 0204 static constexpr char const kTypeName[] = "PartitionNthOptions"; 0205 0206 /// The index into the equivalent sorted array of the partition pivot element. 0207 int64_t pivot; 0208 /// Whether nulls and NaNs are partitioned at the start or at the end 0209 NullPlacement null_placement; 0210 }; 0211 0212 /// \brief Options for cumulative functions 0213 /// \note Also aliased as CumulativeSumOptions for backward compatibility 0214 class ARROW_EXPORT CumulativeOptions : public FunctionOptions { 0215 public: 0216 explicit CumulativeOptions(bool skip_nulls = false); 0217 explicit CumulativeOptions(double start, bool skip_nulls = false); 0218 explicit CumulativeOptions(std::shared_ptr<Scalar> start, bool skip_nulls = false); 0219 static constexpr char const kTypeName[] = "CumulativeOptions"; 0220 static CumulativeOptions Defaults() { return CumulativeOptions(); } 0221 0222 /// Optional starting value for cumulative operation computation, default depends on the 0223 /// operation and input type. 0224 /// - sum: 0 0225 /// - prod: 1 0226 /// - min: maximum of the input type 0227 /// - max: minimum of the input type 0228 /// - mean: start is ignored because it has no meaning for mean 0229 std::optional<std::shared_ptr<Scalar>> start; 0230 0231 /// If true, nulls in the input are ignored and produce a corresponding null output. 0232 /// When false, the first null encountered is propagated through the remaining output. 0233 bool skip_nulls = false; 0234 }; 0235 using CumulativeSumOptions = CumulativeOptions; // For backward compatibility 0236 0237 /// \brief Options for pairwise functions 0238 class ARROW_EXPORT PairwiseOptions : public FunctionOptions { 0239 public: 0240 explicit PairwiseOptions(int64_t periods = 1); 0241 static constexpr char const kTypeName[] = "PairwiseOptions"; 0242 static PairwiseOptions Defaults() { return PairwiseOptions(); } 0243 0244 /// Periods to shift for applying the binary operation, accepts negative values. 0245 int64_t periods = 1; 0246 }; 0247 0248 /// \brief Options for list_flatten function 0249 class ARROW_EXPORT ListFlattenOptions : public FunctionOptions { 0250 public: 0251 explicit ListFlattenOptions(bool recursive = false); 0252 static constexpr char const kTypeName[] = "ListFlattenOptions"; 0253 static ListFlattenOptions Defaults() { return ListFlattenOptions(); } 0254 0255 /// \brief If true, the list is flattened recursively until a non-list 0256 /// array is formed. 0257 bool recursive = false; 0258 }; 0259 0260 /// @} 0261 0262 /// \brief Filter with a boolean selection filter 0263 /// 0264 /// The output will be populated with values from the input at positions 0265 /// where the selection filter is not 0. Nulls in the filter will be handled 0266 /// based on options.null_selection_behavior. 0267 /// 0268 /// For example given values = ["a", "b", "c", null, "e", "f"] and 0269 /// filter = [0, 1, 1, 0, null, 1], the output will be 0270 /// (null_selection_behavior == DROP) = ["b", "c", "f"] 0271 /// (null_selection_behavior == EMIT_NULL) = ["b", "c", null, "f"] 0272 /// 0273 /// \param[in] values array to filter 0274 /// \param[in] filter indicates which values should be filtered out 0275 /// \param[in] options configures null_selection_behavior 0276 /// \param[in] ctx the function execution context, optional 0277 /// \return the resulting datum 0278 ARROW_EXPORT 0279 Result<Datum> Filter(const Datum& values, const Datum& filter, 0280 const FilterOptions& options = FilterOptions::Defaults(), 0281 ExecContext* ctx = NULLPTR); 0282 0283 namespace internal { 0284 0285 // These internal functions are implemented in kernels/vector_selection.cc 0286 0287 /// \brief Return the number of selected indices in the boolean filter 0288 /// 0289 /// \param filter a plain or run-end encoded boolean array with or without nulls 0290 /// \param null_selection how to handle nulls in the filter 0291 ARROW_EXPORT 0292 int64_t GetFilterOutputSize(const ArraySpan& filter, 0293 FilterOptions::NullSelectionBehavior null_selection); 0294 0295 /// \brief Compute uint64 selection indices for use with Take given a boolean 0296 /// filter 0297 /// 0298 /// \param filter a plain or run-end encoded boolean array with or without nulls 0299 /// \param null_selection how to handle nulls in the filter 0300 ARROW_EXPORT 0301 Result<std::shared_ptr<ArrayData>> GetTakeIndices( 0302 const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection, 0303 MemoryPool* memory_pool = default_memory_pool()); 0304 0305 } // namespace internal 0306 0307 /// \brief ReplaceWithMask replaces each value in the array corresponding 0308 /// to a true value in the mask with the next element from `replacements`. 0309 /// 0310 /// \param[in] values Array input to replace 0311 /// \param[in] mask Array or Scalar of Boolean mask values 0312 /// \param[in] replacements The replacement values to draw from. There must 0313 /// be as many replacement values as true values in the mask. 0314 /// \param[in] ctx the function execution context, optional 0315 /// 0316 /// \return the resulting datum 0317 /// 0318 /// \since 5.0.0 0319 /// \note API not yet finalized 0320 ARROW_EXPORT 0321 Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask, 0322 const Datum& replacements, ExecContext* ctx = NULLPTR); 0323 0324 /// \brief FillNullForward fill null values in forward direction 0325 /// 0326 /// The output array will be of the same type as the input values 0327 /// array, with replaced null values in forward direction. 0328 /// 0329 /// For example given values = ["a", "b", "c", null, null, "f"], 0330 /// the output will be = ["a", "b", "c", "c", "c", "f"] 0331 /// 0332 /// \param[in] values datum from which to take 0333 /// \param[in] ctx the function execution context, optional 0334 /// \return the resulting datum 0335 ARROW_EXPORT 0336 Result<Datum> FillNullForward(const Datum& values, ExecContext* ctx = NULLPTR); 0337 0338 /// \brief FillNullBackward fill null values in backward direction 0339 /// 0340 /// The output array will be of the same type as the input values 0341 /// array, with replaced null values in backward direction. 0342 /// 0343 /// For example given values = ["a", "b", "c", null, null, "f"], 0344 /// the output will be = ["a", "b", "c", "f", "f", "f"] 0345 /// 0346 /// \param[in] values datum from which to take 0347 /// \param[in] ctx the function execution context, optional 0348 /// \return the resulting datum 0349 ARROW_EXPORT 0350 Result<Datum> FillNullBackward(const Datum& values, ExecContext* ctx = NULLPTR); 0351 0352 /// \brief Take from an array of values at indices in another array 0353 /// 0354 /// The output array will be of the same type as the input values 0355 /// array, with elements taken from the values array at the given 0356 /// indices. If an index is null then the taken element will be null. 0357 /// 0358 /// For example given values = ["a", "b", "c", null, "e", "f"] and 0359 /// indices = [2, 1, null, 3], the output will be 0360 /// = [values[2], values[1], null, values[3]] 0361 /// = ["c", "b", null, null] 0362 /// 0363 /// \param[in] values datum from which to take 0364 /// \param[in] indices which values to take 0365 /// \param[in] options options 0366 /// \param[in] ctx the function execution context, optional 0367 /// \return the resulting datum 0368 ARROW_EXPORT 0369 Result<Datum> Take(const Datum& values, const Datum& indices, 0370 const TakeOptions& options = TakeOptions::Defaults(), 0371 ExecContext* ctx = NULLPTR); 0372 0373 /// \brief Take with Array inputs and output 0374 ARROW_EXPORT 0375 Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices, 0376 const TakeOptions& options = TakeOptions::Defaults(), 0377 ExecContext* ctx = NULLPTR); 0378 0379 /// \brief Drop Null from an array of values 0380 /// 0381 /// The output array will be of the same type as the input values 0382 /// array, with elements taken from the values array without nulls. 0383 /// 0384 /// For example given values = ["a", "b", "c", null, "e", "f"], 0385 /// the output will be = ["a", "b", "c", "e", "f"] 0386 /// 0387 /// \param[in] values datum from which to take 0388 /// \param[in] ctx the function execution context, optional 0389 /// \return the resulting datum 0390 ARROW_EXPORT 0391 Result<Datum> DropNull(const Datum& values, ExecContext* ctx = NULLPTR); 0392 0393 /// \brief DropNull with Array inputs and output 0394 ARROW_EXPORT 0395 Result<std::shared_ptr<Array>> DropNull(const Array& values, ExecContext* ctx = NULLPTR); 0396 0397 /// \brief Return indices that partition an array around n-th sorted element. 0398 /// 0399 /// Find index of n-th(0 based) smallest value and perform indirect 0400 /// partition of an array around that element. Output indices[0 ~ n-1] 0401 /// holds values no greater than n-th element, and indices[n+1 ~ end] 0402 /// holds values no less than n-th element. Elements in each partition 0403 /// is not sorted. Nulls will be partitioned to the end of the output. 0404 /// Output is not guaranteed to be stable. 0405 /// 0406 /// \param[in] values array to be partitioned 0407 /// \param[in] n pivot array around sorted n-th element 0408 /// \param[in] ctx the function execution context, optional 0409 /// \return offsets indices that would partition an array 0410 ARROW_EXPORT 0411 Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n, 0412 ExecContext* ctx = NULLPTR); 0413 0414 /// \brief Return indices that partition an array around n-th sorted element. 0415 /// 0416 /// This overload takes a PartitionNthOptions specifying the pivot index 0417 /// and the null handling. 0418 /// 0419 /// \param[in] values array to be partitioned 0420 /// \param[in] options options including pivot index and null handling 0421 /// \param[in] ctx the function execution context, optional 0422 /// \return offsets indices that would partition an array 0423 ARROW_EXPORT 0424 Result<std::shared_ptr<Array>> NthToIndices(const Array& values, 0425 const PartitionNthOptions& options, 0426 ExecContext* ctx = NULLPTR); 0427 0428 /// \brief Return indices that would select the first `k` elements. 0429 /// 0430 /// Perform an indirect sort of the datum, keeping only the first `k` elements. The output 0431 /// array will contain indices such that the item indicated by the k-th index will be in 0432 /// the position it would be if the datum were sorted by `options.sort_keys`. However, 0433 /// indices of null values will not be part of the output. The sort is not guaranteed to 0434 /// be stable. 0435 /// 0436 /// \param[in] datum datum to be partitioned 0437 /// \param[in] options options 0438 /// \param[in] ctx the function execution context, optional 0439 /// \return a datum with the same schema as the input 0440 ARROW_EXPORT 0441 Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum, 0442 const SelectKOptions& options, 0443 ExecContext* ctx = NULLPTR); 0444 0445 /// \brief Return the indices that would sort an array. 0446 /// 0447 /// Perform an indirect sort of array. The output array will contain 0448 /// indices that would sort an array, which would be the same length 0449 /// as input. Nulls will be stably partitioned to the end of the output 0450 /// regardless of order. 0451 /// 0452 /// For example given array = [null, 1, 3.3, null, 2, 5.3] and order 0453 /// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0, 0454 /// 3]. 0455 /// 0456 /// \param[in] array array to sort 0457 /// \param[in] order ascending or descending 0458 /// \param[in] ctx the function execution context, optional 0459 /// \return offsets indices that would sort an array 0460 ARROW_EXPORT 0461 Result<std::shared_ptr<Array>> SortIndices(const Array& array, 0462 SortOrder order = SortOrder::Ascending, 0463 ExecContext* ctx = NULLPTR); 0464 0465 /// \brief Return the indices that would sort an array. 0466 /// 0467 /// This overload takes a ArraySortOptions specifying the sort order 0468 /// and the null handling. 0469 /// 0470 /// \param[in] array array to sort 0471 /// \param[in] options options including sort order and null handling 0472 /// \param[in] ctx the function execution context, optional 0473 /// \return offsets indices that would sort an array 0474 ARROW_EXPORT 0475 Result<std::shared_ptr<Array>> SortIndices(const Array& array, 0476 const ArraySortOptions& options, 0477 ExecContext* ctx = NULLPTR); 0478 0479 /// \brief Return the indices that would sort a chunked array. 0480 /// 0481 /// Perform an indirect sort of chunked array. The output array will 0482 /// contain indices that would sort a chunked array, which would be 0483 /// the same length as input. Nulls will be stably partitioned to the 0484 /// end of the output regardless of order. 0485 /// 0486 /// For example given chunked_array = [[null, 1], [3.3], [null, 2, 0487 /// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2, 0488 /// 4, 1, 0, 3]. 0489 /// 0490 /// \param[in] chunked_array chunked array to sort 0491 /// \param[in] order ascending or descending 0492 /// \param[in] ctx the function execution context, optional 0493 /// \return offsets indices that would sort an array 0494 ARROW_EXPORT 0495 Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array, 0496 SortOrder order = SortOrder::Ascending, 0497 ExecContext* ctx = NULLPTR); 0498 0499 /// \brief Return the indices that would sort a chunked array. 0500 /// 0501 /// This overload takes a ArraySortOptions specifying the sort order 0502 /// and the null handling. 0503 /// 0504 /// \param[in] chunked_array chunked array to sort 0505 /// \param[in] options options including sort order and null handling 0506 /// \param[in] ctx the function execution context, optional 0507 /// \return offsets indices that would sort an array 0508 ARROW_EXPORT 0509 Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array, 0510 const ArraySortOptions& options, 0511 ExecContext* ctx = NULLPTR); 0512 0513 /// \brief Return the indices that would sort an input in the 0514 /// specified order. Input is one of array, chunked array record batch 0515 /// or table. 0516 /// 0517 /// Perform an indirect sort of input. The output array will contain 0518 /// indices that would sort an input, which would be the same length 0519 /// as input. Nulls will be stably partitioned to the start or to the end 0520 /// of the output depending on SortOrder::null_placement. 0521 /// 0522 /// For example given input (table) = { 0523 /// "column1": [[null, 1], [ 3, null, 2, 1]], 0524 /// "column2": [[ 5], [3, null, null, 5, 5]], 0525 /// } and options = { 0526 /// {"column1", SortOrder::Ascending}, 0527 /// {"column2", SortOrder::Descending}, 0528 /// }, the output will be [5, 1, 4, 2, 0, 3]. 0529 /// 0530 /// \param[in] datum array, chunked array, record batch or table to sort 0531 /// \param[in] options options 0532 /// \param[in] ctx the function execution context, optional 0533 /// \return offsets indices that would sort a table 0534 ARROW_EXPORT 0535 Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options, 0536 ExecContext* ctx = NULLPTR); 0537 0538 /// \brief Compute unique elements from an array-like object 0539 /// 0540 /// Note if a null occurs in the input it will NOT be included in the output. 0541 /// 0542 /// \param[in] datum array-like input 0543 /// \param[in] ctx the function execution context, optional 0544 /// \return result as Array 0545 /// 0546 /// \since 1.0.0 0547 /// \note API not yet finalized 0548 ARROW_EXPORT 0549 Result<std::shared_ptr<Array>> Unique(const Datum& datum, ExecContext* ctx = NULLPTR); 0550 0551 // Constants for accessing the output of ValueCounts 0552 ARROW_EXPORT extern const char kValuesFieldName[]; 0553 ARROW_EXPORT extern const char kCountsFieldName[]; 0554 ARROW_EXPORT extern const int32_t kValuesFieldIndex; 0555 ARROW_EXPORT extern const int32_t kCountsFieldIndex; 0556 0557 /// \brief Return counts of unique elements from an array-like object. 0558 /// 0559 /// Note that the counts do not include counts for nulls in the array. These can be 0560 /// obtained separately from metadata. 0561 /// 0562 /// For floating point arrays there is no attempt to normalize -0.0, 0.0 and NaN values 0563 /// which can lead to unexpected results if the input Array has these values. 0564 /// 0565 /// \param[in] value array-like input 0566 /// \param[in] ctx the function execution context, optional 0567 /// \return counts An array of <input type "Values", int64_t "Counts"> structs. 0568 /// 0569 /// \since 1.0.0 0570 /// \note API not yet finalized 0571 ARROW_EXPORT 0572 Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value, 0573 ExecContext* ctx = NULLPTR); 0574 0575 /// \brief Dictionary-encode values in an array-like object 0576 /// 0577 /// Any nulls encountered in the dictionary will be handled according to the 0578 /// specified null encoding behavior. 0579 /// 0580 /// For example, given values ["a", "b", null, "a", null] the output will be 0581 /// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null] 0582 /// (null_encoding == MASK) Indices: [0, 1, null, 0, null] / Dict: ["a", "b"] 0583 /// 0584 /// If the input is already dictionary encoded this function is a no-op unless 0585 /// it needs to modify the null_encoding (TODO) 0586 /// 0587 /// \param[in] data array-like input 0588 /// \param[in] ctx the function execution context, optional 0589 /// \param[in] options configures null encoding behavior 0590 /// \return result with same shape and type as input 0591 /// 0592 /// \since 1.0.0 0593 /// \note API not yet finalized 0594 ARROW_EXPORT 0595 Result<Datum> DictionaryEncode( 0596 const Datum& data, 0597 const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(), 0598 ExecContext* ctx = NULLPTR); 0599 0600 /// \brief Run-end-encode values in an array-like object 0601 /// 0602 /// The returned run-end encoded type uses the same value type of the input and 0603 /// run-end type defined in the options. 0604 /// 0605 /// \param[in] value array-like input 0606 /// \param[in] options configures encoding behavior 0607 /// \param[in] ctx the function execution context, optional 0608 /// \return result with same shape but run-end encoded 0609 /// 0610 /// \since 12.0.0 0611 /// \note API not yet finalized 0612 ARROW_EXPORT 0613 Result<Datum> RunEndEncode( 0614 const Datum& value, 0615 const RunEndEncodeOptions& options = RunEndEncodeOptions::Defaults(), 0616 ExecContext* ctx = NULLPTR); 0617 0618 /// \brief Decode a Run-End Encoded array to a plain array 0619 /// 0620 /// The output data type is the same as the values array type of run-end encoded 0621 /// input. 0622 /// 0623 /// \param[in] value run-end-encoded input 0624 /// \param[in] ctx the function execution context, optional 0625 /// \return plain array resulting from decoding the run-end encoded input 0626 /// 0627 /// \since 12.0.0 0628 /// \note API not yet finalized 0629 ARROW_EXPORT 0630 Result<Datum> RunEndDecode(const Datum& value, ExecContext* ctx = NULLPTR); 0631 0632 /// \brief Compute the cumulative sum of an array-like object 0633 /// 0634 /// \param[in] values array-like input 0635 /// \param[in] options configures cumulative sum behavior 0636 /// \param[in] check_overflow whether to check for overflow, if true, return Invalid 0637 /// status on overflow, otherwise wrap around on overflow 0638 /// \param[in] ctx the function execution context, optional 0639 ARROW_EXPORT 0640 Result<Datum> CumulativeSum( 0641 const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(), 0642 bool check_overflow = false, ExecContext* ctx = NULLPTR); 0643 0644 /// \brief Compute the cumulative product of an array-like object 0645 /// 0646 /// \param[in] values array-like input 0647 /// \param[in] options configures cumulative prod behavior 0648 /// \param[in] check_overflow whether to check for overflow, if true, return Invalid 0649 /// status on overflow, otherwise wrap around on overflow 0650 /// \param[in] ctx the function execution context, optional 0651 ARROW_EXPORT 0652 Result<Datum> CumulativeProd( 0653 const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(), 0654 bool check_overflow = false, ExecContext* ctx = NULLPTR); 0655 0656 /// \brief Compute the cumulative max of an array-like object 0657 /// 0658 /// \param[in] values array-like input 0659 /// \param[in] options configures cumulative max behavior 0660 /// \param[in] ctx the function execution context, optional 0661 ARROW_EXPORT 0662 Result<Datum> CumulativeMax( 0663 const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(), 0664 ExecContext* ctx = NULLPTR); 0665 0666 /// \brief Compute the cumulative min of an array-like object 0667 /// 0668 /// \param[in] values array-like input 0669 /// \param[in] options configures cumulative min behavior 0670 /// \param[in] ctx the function execution context, optional 0671 ARROW_EXPORT 0672 Result<Datum> CumulativeMin( 0673 const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(), 0674 ExecContext* ctx = NULLPTR); 0675 0676 /// \brief Compute the cumulative mean of an array-like object 0677 /// 0678 /// \param[in] values array-like input 0679 /// \param[in] options configures cumulative mean behavior, `start` is ignored 0680 /// \param[in] ctx the function execution context, optional 0681 ARROW_EXPORT 0682 Result<Datum> CumulativeMean( 0683 const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(), 0684 ExecContext* ctx = NULLPTR); 0685 0686 /// \brief Return the first order difference of an array. 0687 /// 0688 /// Computes the first order difference of an array, i.e. 0689 /// output[i] = input[i] - input[i - p] if i >= p 0690 /// output[i] = null otherwise 0691 /// where p is the period. For example, with p = 1, 0692 /// Diff([1, 4, 9, 10, 15]) = [null, 3, 5, 1, 5]. 0693 /// With p = 2, 0694 /// Diff([1, 4, 9, 10, 15]) = [null, null, 8, 6, 6] 0695 /// p can also be negative, in which case the diff is computed in 0696 /// the opposite direction. 0697 /// \param[in] array array input 0698 /// \param[in] options options, specifying overflow behavior and period 0699 /// \param[in] check_overflow whether to return error on overflow 0700 /// \param[in] ctx the function execution context, optional 0701 /// \return result as array 0702 ARROW_EXPORT 0703 Result<std::shared_ptr<Array>> PairwiseDiff(const Array& array, 0704 const PairwiseOptions& options, 0705 bool check_overflow = false, 0706 ExecContext* ctx = NULLPTR); 0707 0708 } // namespace compute 0709 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |