Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:56

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <memory>
0021 #include <utility>
0022 
0023 #include "arrow/compute/function_options.h"
0024 #include "arrow/compute/ordering.h"
0025 #include "arrow/result.h"
0026 #include "arrow/type_fwd.h"
0027 
0028 namespace arrow {
0029 namespace compute {
0030 
0031 class ExecContext;
0032 
0033 /// \addtogroup compute-concrete-options
0034 /// @{
0035 
0036 class ARROW_EXPORT FilterOptions : public FunctionOptions {
0037  public:
0038   /// Configure the action taken when a slot of the selection mask is null
0039   enum NullSelectionBehavior {
0040     /// The corresponding filtered value will be removed in the output.
0041     DROP,
0042     /// The corresponding filtered value will be null in the output.
0043     EMIT_NULL,
0044   };
0045 
0046   explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
0047   static constexpr char const kTypeName[] = "FilterOptions";
0048   static FilterOptions Defaults() { return FilterOptions(); }
0049 
0050   NullSelectionBehavior null_selection_behavior = DROP;
0051 };
0052 
0053 class ARROW_EXPORT TakeOptions : public FunctionOptions {
0054  public:
0055   explicit TakeOptions(bool boundscheck = true);
0056   static constexpr char const kTypeName[] = "TakeOptions";
0057   static TakeOptions BoundsCheck() { return TakeOptions(true); }
0058   static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
0059   static TakeOptions Defaults() { return BoundsCheck(); }
0060 
0061   bool boundscheck = true;
0062 };
0063 
0064 /// \brief Options for the dictionary encode function
0065 class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
0066  public:
0067   /// Configure how null values will be encoded
0068   enum NullEncodingBehavior {
0069     /// The null value will be added to the dictionary with a proper index.
0070     ENCODE,
0071     /// The null value will be masked in the indices array.
0072     MASK
0073   };
0074 
0075   explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
0076   static constexpr char const kTypeName[] = "DictionaryEncodeOptions";
0077   static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
0078 
0079   NullEncodingBehavior null_encoding_behavior = MASK;
0080 };
0081 
0082 /// \brief Options for the run-end encode function
0083 class ARROW_EXPORT RunEndEncodeOptions : public FunctionOptions {
0084  public:
0085   explicit RunEndEncodeOptions(std::shared_ptr<DataType> run_end_type = int32());
0086   static constexpr char const kTypeName[] = "RunEndEncodeOptions";
0087   static RunEndEncodeOptions Defaults() { return RunEndEncodeOptions(); }
0088 
0089   std::shared_ptr<DataType> run_end_type;
0090 };
0091 
0092 class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
0093  public:
0094   explicit ArraySortOptions(SortOrder order = SortOrder::Ascending,
0095                             NullPlacement null_placement = NullPlacement::AtEnd);
0096   static constexpr char const kTypeName[] = "ArraySortOptions";
0097   static ArraySortOptions Defaults() { return ArraySortOptions(); }
0098 
0099   /// Sorting order
0100   SortOrder order;
0101   /// Whether nulls and NaNs are placed at the start or at the end
0102   NullPlacement null_placement;
0103 };
0104 
0105 class ARROW_EXPORT SortOptions : public FunctionOptions {
0106  public:
0107   explicit SortOptions(std::vector<SortKey> sort_keys = {},
0108                        NullPlacement null_placement = NullPlacement::AtEnd);
0109   explicit SortOptions(const Ordering& ordering);
0110   static constexpr char const kTypeName[] = "SortOptions";
0111   static SortOptions Defaults() { return SortOptions(); }
0112   /// Convenience constructor to create an ordering from SortOptions
0113   ///
0114   /// Note: Both classes contain the exact same information.  However,
0115   /// sort_options should only be used in a "function options" context while Ordering
0116   /// is used more generally.
0117   Ordering AsOrdering() && { return Ordering(std::move(sort_keys), null_placement); }
0118   Ordering AsOrdering() const& { return Ordering(sort_keys, null_placement); }
0119 
0120   /// Column key(s) to order by and how to order by these sort keys.
0121   std::vector<SortKey> sort_keys;
0122   /// Whether nulls and NaNs are placed at the start or at the end
0123   NullPlacement null_placement;
0124 };
0125 
0126 /// \brief SelectK options
0127 class ARROW_EXPORT SelectKOptions : public FunctionOptions {
0128  public:
0129   explicit SelectKOptions(int64_t k = -1, std::vector<SortKey> sort_keys = {});
0130   static constexpr char const kTypeName[] = "SelectKOptions";
0131   static SelectKOptions Defaults() { return SelectKOptions(); }
0132 
0133   static SelectKOptions TopKDefault(int64_t k, std::vector<std::string> key_names = {}) {
0134     std::vector<SortKey> keys;
0135     for (const auto& name : key_names) {
0136       keys.emplace_back(SortKey(name, SortOrder::Descending));
0137     }
0138     if (key_names.empty()) {
0139       keys.emplace_back(SortKey("not-used", SortOrder::Descending));
0140     }
0141     return SelectKOptions{k, keys};
0142   }
0143   static SelectKOptions BottomKDefault(int64_t k,
0144                                        std::vector<std::string> key_names = {}) {
0145     std::vector<SortKey> keys;
0146     for (const auto& name : key_names) {
0147       keys.emplace_back(SortKey(name, SortOrder::Ascending));
0148     }
0149     if (key_names.empty()) {
0150       keys.emplace_back(SortKey("not-used", SortOrder::Ascending));
0151     }
0152     return SelectKOptions{k, keys};
0153   }
0154 
0155   /// The number of `k` elements to keep.
0156   int64_t k;
0157   /// Column key(s) to order by and how to order by these sort keys.
0158   std::vector<SortKey> sort_keys;
0159 };
0160 
0161 /// \brief Rank options
0162 class ARROW_EXPORT RankOptions : public FunctionOptions {
0163  public:
0164   /// Configure how ties between equal values are handled
0165   enum Tiebreaker {
0166     /// Ties get the smallest possible rank in sorted order.
0167     Min,
0168     /// Ties get the largest possible rank in sorted order.
0169     Max,
0170     /// Ranks are assigned in order of when ties appear in the input.
0171     /// This ensures the ranks are a stable permutation of the input.
0172     First,
0173     /// The ranks span a dense [1, M] interval where M is the number
0174     /// of distinct values in the input.
0175     Dense
0176   };
0177 
0178   explicit RankOptions(std::vector<SortKey> sort_keys = {},
0179                        NullPlacement null_placement = NullPlacement::AtEnd,
0180                        Tiebreaker tiebreaker = RankOptions::First);
0181   /// Convenience constructor for array inputs
0182   explicit RankOptions(SortOrder order,
0183                        NullPlacement null_placement = NullPlacement::AtEnd,
0184                        Tiebreaker tiebreaker = RankOptions::First)
0185       : RankOptions({SortKey("", order)}, null_placement, tiebreaker) {}
0186 
0187   static constexpr char const kTypeName[] = "RankOptions";
0188   static RankOptions Defaults() { return RankOptions(); }
0189 
0190   /// Column key(s) to order by and how to order by these sort keys.
0191   std::vector<SortKey> sort_keys;
0192   /// Whether nulls and NaNs are placed at the start or at the end
0193   NullPlacement null_placement;
0194   /// Tiebreaker for dealing with equal values in ranks
0195   Tiebreaker tiebreaker;
0196 };
0197 
0198 /// \brief Partitioning options for NthToIndices
0199 class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
0200  public:
0201   explicit PartitionNthOptions(int64_t pivot,
0202                                NullPlacement null_placement = NullPlacement::AtEnd);
0203   PartitionNthOptions() : PartitionNthOptions(0) {}
0204   static constexpr char const kTypeName[] = "PartitionNthOptions";
0205 
0206   /// The index into the equivalent sorted array of the partition pivot element.
0207   int64_t pivot;
0208   /// Whether nulls and NaNs are partitioned at the start or at the end
0209   NullPlacement null_placement;
0210 };
0211 
0212 /// \brief Options for cumulative functions
0213 /// \note Also aliased as CumulativeSumOptions for backward compatibility
0214 class ARROW_EXPORT CumulativeOptions : public FunctionOptions {
0215  public:
0216   explicit CumulativeOptions(bool skip_nulls = false);
0217   explicit CumulativeOptions(double start, bool skip_nulls = false);
0218   explicit CumulativeOptions(std::shared_ptr<Scalar> start, bool skip_nulls = false);
0219   static constexpr char const kTypeName[] = "CumulativeOptions";
0220   static CumulativeOptions Defaults() { return CumulativeOptions(); }
0221 
0222   /// Optional starting value for cumulative operation computation, default depends on the
0223   /// operation and input type.
0224   /// - sum: 0
0225   /// - prod: 1
0226   /// - min: maximum of the input type
0227   /// - max: minimum of the input type
0228   /// - mean: start is ignored because it has no meaning for mean
0229   std::optional<std::shared_ptr<Scalar>> start;
0230 
0231   /// If true, nulls in the input are ignored and produce a corresponding null output.
0232   /// When false, the first null encountered is propagated through the remaining output.
0233   bool skip_nulls = false;
0234 };
0235 using CumulativeSumOptions = CumulativeOptions;  // For backward compatibility
0236 
0237 /// \brief Options for pairwise functions
0238 class ARROW_EXPORT PairwiseOptions : public FunctionOptions {
0239  public:
0240   explicit PairwiseOptions(int64_t periods = 1);
0241   static constexpr char const kTypeName[] = "PairwiseOptions";
0242   static PairwiseOptions Defaults() { return PairwiseOptions(); }
0243 
0244   /// Periods to shift for applying the binary operation, accepts negative values.
0245   int64_t periods = 1;
0246 };
0247 
0248 /// \brief Options for list_flatten function
0249 class ARROW_EXPORT ListFlattenOptions : public FunctionOptions {
0250  public:
0251   explicit ListFlattenOptions(bool recursive = false);
0252   static constexpr char const kTypeName[] = "ListFlattenOptions";
0253   static ListFlattenOptions Defaults() { return ListFlattenOptions(); }
0254 
0255   /// \brief If true, the list is flattened recursively until a non-list
0256   /// array is formed.
0257   bool recursive = false;
0258 };
0259 
0260 /// @}
0261 
0262 /// \brief Filter with a boolean selection filter
0263 ///
0264 /// The output will be populated with values from the input at positions
0265 /// where the selection filter is not 0. Nulls in the filter will be handled
0266 /// based on options.null_selection_behavior.
0267 ///
0268 /// For example given values = ["a", "b", "c", null, "e", "f"] and
0269 /// filter = [0, 1, 1, 0, null, 1], the output will be
0270 /// (null_selection_behavior == DROP)      = ["b", "c", "f"]
0271 /// (null_selection_behavior == EMIT_NULL) = ["b", "c", null, "f"]
0272 ///
0273 /// \param[in] values array to filter
0274 /// \param[in] filter indicates which values should be filtered out
0275 /// \param[in] options configures null_selection_behavior
0276 /// \param[in] ctx the function execution context, optional
0277 /// \return the resulting datum
0278 ARROW_EXPORT
0279 Result<Datum> Filter(const Datum& values, const Datum& filter,
0280                      const FilterOptions& options = FilterOptions::Defaults(),
0281                      ExecContext* ctx = NULLPTR);
0282 
0283 namespace internal {
0284 
0285 // These internal functions are implemented in kernels/vector_selection.cc
0286 
0287 /// \brief Return the number of selected indices in the boolean filter
0288 ///
0289 /// \param filter a plain or run-end encoded boolean array with or without nulls
0290 /// \param null_selection how to handle nulls in the filter
0291 ARROW_EXPORT
0292 int64_t GetFilterOutputSize(const ArraySpan& filter,
0293                             FilterOptions::NullSelectionBehavior null_selection);
0294 
0295 /// \brief Compute uint64 selection indices for use with Take given a boolean
0296 /// filter
0297 ///
0298 /// \param filter a plain or run-end encoded boolean array with or without nulls
0299 /// \param null_selection how to handle nulls in the filter
0300 ARROW_EXPORT
0301 Result<std::shared_ptr<ArrayData>> GetTakeIndices(
0302     const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection,
0303     MemoryPool* memory_pool = default_memory_pool());
0304 
0305 }  // namespace internal
0306 
0307 /// \brief ReplaceWithMask replaces each value in the array corresponding
0308 /// to a true value in the mask with the next element from `replacements`.
0309 ///
0310 /// \param[in] values Array input to replace
0311 /// \param[in] mask Array or Scalar of Boolean mask values
0312 /// \param[in] replacements The replacement values to draw from. There must
0313 /// be as many replacement values as true values in the mask.
0314 /// \param[in] ctx the function execution context, optional
0315 ///
0316 /// \return the resulting datum
0317 ///
0318 /// \since 5.0.0
0319 /// \note API not yet finalized
0320 ARROW_EXPORT
0321 Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
0322                               const Datum& replacements, ExecContext* ctx = NULLPTR);
0323 
0324 /// \brief FillNullForward fill null values in forward direction
0325 ///
0326 /// The output array will be of the same type as the input values
0327 /// array, with replaced null values in forward direction.
0328 ///
0329 /// For example given values = ["a", "b", "c", null, null, "f"],
0330 /// the output will be = ["a", "b", "c", "c", "c", "f"]
0331 ///
0332 /// \param[in] values datum from which to take
0333 /// \param[in] ctx the function execution context, optional
0334 /// \return the resulting datum
0335 ARROW_EXPORT
0336 Result<Datum> FillNullForward(const Datum& values, ExecContext* ctx = NULLPTR);
0337 
0338 /// \brief FillNullBackward fill null values in backward direction
0339 ///
0340 /// The output array will be of the same type as the input values
0341 /// array, with replaced null values in backward direction.
0342 ///
0343 /// For example given values = ["a", "b", "c", null, null, "f"],
0344 /// the output will be = ["a", "b", "c", "f", "f", "f"]
0345 ///
0346 /// \param[in] values datum from which to take
0347 /// \param[in] ctx the function execution context, optional
0348 /// \return the resulting datum
0349 ARROW_EXPORT
0350 Result<Datum> FillNullBackward(const Datum& values, ExecContext* ctx = NULLPTR);
0351 
0352 /// \brief Take from an array of values at indices in another array
0353 ///
0354 /// The output array will be of the same type as the input values
0355 /// array, with elements taken from the values array at the given
0356 /// indices. If an index is null then the taken element will be null.
0357 ///
0358 /// For example given values = ["a", "b", "c", null, "e", "f"] and
0359 /// indices = [2, 1, null, 3], the output will be
0360 /// = [values[2], values[1], null, values[3]]
0361 /// = ["c", "b", null, null]
0362 ///
0363 /// \param[in] values datum from which to take
0364 /// \param[in] indices which values to take
0365 /// \param[in] options options
0366 /// \param[in] ctx the function execution context, optional
0367 /// \return the resulting datum
0368 ARROW_EXPORT
0369 Result<Datum> Take(const Datum& values, const Datum& indices,
0370                    const TakeOptions& options = TakeOptions::Defaults(),
0371                    ExecContext* ctx = NULLPTR);
0372 
0373 /// \brief Take with Array inputs and output
0374 ARROW_EXPORT
0375 Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
0376                                     const TakeOptions& options = TakeOptions::Defaults(),
0377                                     ExecContext* ctx = NULLPTR);
0378 
0379 /// \brief Drop Null from an array of values
0380 ///
0381 /// The output array will be of the same type as the input values
0382 /// array, with elements taken from the values array without nulls.
0383 ///
0384 /// For example given values = ["a", "b", "c", null, "e", "f"],
0385 /// the output will be = ["a", "b", "c", "e", "f"]
0386 ///
0387 /// \param[in] values datum from which to take
0388 /// \param[in] ctx the function execution context, optional
0389 /// \return the resulting datum
0390 ARROW_EXPORT
0391 Result<Datum> DropNull(const Datum& values, ExecContext* ctx = NULLPTR);
0392 
0393 /// \brief DropNull with Array inputs and output
0394 ARROW_EXPORT
0395 Result<std::shared_ptr<Array>> DropNull(const Array& values, ExecContext* ctx = NULLPTR);
0396 
0397 /// \brief Return indices that partition an array around n-th sorted element.
0398 ///
0399 /// Find index of n-th(0 based) smallest value and perform indirect
0400 /// partition of an array around that element. Output indices[0 ~ n-1]
0401 /// holds values no greater than n-th element, and indices[n+1 ~ end]
0402 /// holds values no less than n-th element. Elements in each partition
0403 /// is not sorted. Nulls will be partitioned to the end of the output.
0404 /// Output is not guaranteed to be stable.
0405 ///
0406 /// \param[in] values array to be partitioned
0407 /// \param[in] n pivot array around sorted n-th element
0408 /// \param[in] ctx the function execution context, optional
0409 /// \return offsets indices that would partition an array
0410 ARROW_EXPORT
0411 Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
0412                                             ExecContext* ctx = NULLPTR);
0413 
0414 /// \brief Return indices that partition an array around n-th sorted element.
0415 ///
0416 /// This overload takes a PartitionNthOptions specifying the pivot index
0417 /// and the null handling.
0418 ///
0419 /// \param[in] values array to be partitioned
0420 /// \param[in] options options including pivot index and null handling
0421 /// \param[in] ctx the function execution context, optional
0422 /// \return offsets indices that would partition an array
0423 ARROW_EXPORT
0424 Result<std::shared_ptr<Array>> NthToIndices(const Array& values,
0425                                             const PartitionNthOptions& options,
0426                                             ExecContext* ctx = NULLPTR);
0427 
0428 /// \brief Return indices that would select the first `k` elements.
0429 ///
0430 /// Perform an indirect sort of the datum, keeping only the first `k` elements. The output
0431 /// array will contain indices such that the item indicated by the k-th index will be in
0432 /// the position it would be if the datum were sorted by `options.sort_keys`. However,
0433 /// indices of null values will not be part of the output. The sort is not guaranteed to
0434 /// be stable.
0435 ///
0436 /// \param[in] datum datum to be partitioned
0437 /// \param[in] options options
0438 /// \param[in] ctx the function execution context, optional
0439 /// \return a datum with the same schema as the input
0440 ARROW_EXPORT
0441 Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
0442                                                const SelectKOptions& options,
0443                                                ExecContext* ctx = NULLPTR);
0444 
0445 /// \brief Return the indices that would sort an array.
0446 ///
0447 /// Perform an indirect sort of array. The output array will contain
0448 /// indices that would sort an array, which would be the same length
0449 /// as input. Nulls will be stably partitioned to the end of the output
0450 /// regardless of order.
0451 ///
0452 /// For example given array = [null, 1, 3.3, null, 2, 5.3] and order
0453 /// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0,
0454 /// 3].
0455 ///
0456 /// \param[in] array array to sort
0457 /// \param[in] order ascending or descending
0458 /// \param[in] ctx the function execution context, optional
0459 /// \return offsets indices that would sort an array
0460 ARROW_EXPORT
0461 Result<std::shared_ptr<Array>> SortIndices(const Array& array,
0462                                            SortOrder order = SortOrder::Ascending,
0463                                            ExecContext* ctx = NULLPTR);
0464 
0465 /// \brief Return the indices that would sort an array.
0466 ///
0467 /// This overload takes a ArraySortOptions specifying the sort order
0468 /// and the null handling.
0469 ///
0470 /// \param[in] array array to sort
0471 /// \param[in] options options including sort order and null handling
0472 /// \param[in] ctx the function execution context, optional
0473 /// \return offsets indices that would sort an array
0474 ARROW_EXPORT
0475 Result<std::shared_ptr<Array>> SortIndices(const Array& array,
0476                                            const ArraySortOptions& options,
0477                                            ExecContext* ctx = NULLPTR);
0478 
0479 /// \brief Return the indices that would sort a chunked array.
0480 ///
0481 /// Perform an indirect sort of chunked array. The output array will
0482 /// contain indices that would sort a chunked array, which would be
0483 /// the same length as input. Nulls will be stably partitioned to the
0484 /// end of the output regardless of order.
0485 ///
0486 /// For example given chunked_array = [[null, 1], [3.3], [null, 2,
0487 /// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2,
0488 /// 4, 1, 0, 3].
0489 ///
0490 /// \param[in] chunked_array chunked array to sort
0491 /// \param[in] order ascending or descending
0492 /// \param[in] ctx the function execution context, optional
0493 /// \return offsets indices that would sort an array
0494 ARROW_EXPORT
0495 Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
0496                                            SortOrder order = SortOrder::Ascending,
0497                                            ExecContext* ctx = NULLPTR);
0498 
0499 /// \brief Return the indices that would sort a chunked array.
0500 ///
0501 /// This overload takes a ArraySortOptions specifying the sort order
0502 /// and the null handling.
0503 ///
0504 /// \param[in] chunked_array chunked array to sort
0505 /// \param[in] options options including sort order and null handling
0506 /// \param[in] ctx the function execution context, optional
0507 /// \return offsets indices that would sort an array
0508 ARROW_EXPORT
0509 Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
0510                                            const ArraySortOptions& options,
0511                                            ExecContext* ctx = NULLPTR);
0512 
0513 /// \brief Return the indices that would sort an input in the
0514 /// specified order. Input is one of array, chunked array record batch
0515 /// or table.
0516 ///
0517 /// Perform an indirect sort of input. The output array will contain
0518 /// indices that would sort an input, which would be the same length
0519 /// as input. Nulls will be stably partitioned to the start or to the end
0520 /// of the output depending on SortOrder::null_placement.
0521 ///
0522 /// For example given input (table) = {
0523 /// "column1": [[null,   1], [   3, null, 2, 1]],
0524 /// "column2": [[   5], [3,   null, null, 5, 5]],
0525 /// } and options = {
0526 /// {"column1", SortOrder::Ascending},
0527 /// {"column2", SortOrder::Descending},
0528 /// }, the output will be [5, 1, 4, 2, 0, 3].
0529 ///
0530 /// \param[in] datum array, chunked array, record batch or table to sort
0531 /// \param[in] options options
0532 /// \param[in] ctx the function execution context, optional
0533 /// \return offsets indices that would sort a table
0534 ARROW_EXPORT
0535 Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options,
0536                                            ExecContext* ctx = NULLPTR);
0537 
0538 /// \brief Compute unique elements from an array-like object
0539 ///
0540 /// Note if a null occurs in the input it will NOT be included in the output.
0541 ///
0542 /// \param[in] datum array-like input
0543 /// \param[in] ctx the function execution context, optional
0544 /// \return result as Array
0545 ///
0546 /// \since 1.0.0
0547 /// \note API not yet finalized
0548 ARROW_EXPORT
0549 Result<std::shared_ptr<Array>> Unique(const Datum& datum, ExecContext* ctx = NULLPTR);
0550 
0551 // Constants for accessing the output of ValueCounts
0552 ARROW_EXPORT extern const char kValuesFieldName[];
0553 ARROW_EXPORT extern const char kCountsFieldName[];
0554 ARROW_EXPORT extern const int32_t kValuesFieldIndex;
0555 ARROW_EXPORT extern const int32_t kCountsFieldIndex;
0556 
0557 /// \brief Return counts of unique elements from an array-like object.
0558 ///
0559 /// Note that the counts do not include counts for nulls in the array.  These can be
0560 /// obtained separately from metadata.
0561 ///
0562 /// For floating point arrays there is no attempt to normalize -0.0, 0.0 and NaN values
0563 /// which can lead to unexpected results if the input Array has these values.
0564 ///
0565 /// \param[in] value array-like input
0566 /// \param[in] ctx the function execution context, optional
0567 /// \return counts An array of  <input type "Values", int64_t "Counts"> structs.
0568 ///
0569 /// \since 1.0.0
0570 /// \note API not yet finalized
0571 ARROW_EXPORT
0572 Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value,
0573                                                  ExecContext* ctx = NULLPTR);
0574 
0575 /// \brief Dictionary-encode values in an array-like object
0576 ///
0577 /// Any nulls encountered in the dictionary will be handled according to the
0578 /// specified null encoding behavior.
0579 ///
0580 /// For example, given values ["a", "b", null, "a", null] the output will be
0581 /// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null]
0582 /// (null_encoding == MASK)   Indices: [0, 1, null, 0, null] / Dict: ["a", "b"]
0583 ///
0584 /// If the input is already dictionary encoded this function is a no-op unless
0585 /// it needs to modify the null_encoding (TODO)
0586 ///
0587 /// \param[in] data array-like input
0588 /// \param[in] ctx the function execution context, optional
0589 /// \param[in] options configures null encoding behavior
0590 /// \return result with same shape and type as input
0591 ///
0592 /// \since 1.0.0
0593 /// \note API not yet finalized
0594 ARROW_EXPORT
0595 Result<Datum> DictionaryEncode(
0596     const Datum& data,
0597     const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(),
0598     ExecContext* ctx = NULLPTR);
0599 
0600 /// \brief Run-end-encode values in an array-like object
0601 ///
0602 /// The returned run-end encoded type uses the same value type of the input and
0603 /// run-end type defined in the options.
0604 ///
0605 /// \param[in] value array-like input
0606 /// \param[in] options configures encoding behavior
0607 /// \param[in] ctx the function execution context, optional
0608 /// \return result with same shape but run-end encoded
0609 ///
0610 /// \since 12.0.0
0611 /// \note API not yet finalized
0612 ARROW_EXPORT
0613 Result<Datum> RunEndEncode(
0614     const Datum& value,
0615     const RunEndEncodeOptions& options = RunEndEncodeOptions::Defaults(),
0616     ExecContext* ctx = NULLPTR);
0617 
0618 /// \brief Decode a Run-End Encoded array to a plain array
0619 ///
0620 /// The output data type is the same as the values array type of run-end encoded
0621 /// input.
0622 ///
0623 /// \param[in] value run-end-encoded input
0624 /// \param[in] ctx the function execution context, optional
0625 /// \return plain array resulting from decoding the run-end encoded input
0626 ///
0627 /// \since 12.0.0
0628 /// \note API not yet finalized
0629 ARROW_EXPORT
0630 Result<Datum> RunEndDecode(const Datum& value, ExecContext* ctx = NULLPTR);
0631 
0632 /// \brief Compute the cumulative sum of an array-like object
0633 ///
0634 /// \param[in] values array-like input
0635 /// \param[in] options configures cumulative sum behavior
0636 /// \param[in] check_overflow whether to check for overflow, if true, return Invalid
0637 /// status on overflow, otherwise wrap around on overflow
0638 /// \param[in] ctx the function execution context, optional
0639 ARROW_EXPORT
0640 Result<Datum> CumulativeSum(
0641     const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
0642     bool check_overflow = false, ExecContext* ctx = NULLPTR);
0643 
0644 /// \brief Compute the cumulative product of an array-like object
0645 ///
0646 /// \param[in] values array-like input
0647 /// \param[in] options configures cumulative prod behavior
0648 /// \param[in] check_overflow whether to check for overflow, if true, return Invalid
0649 /// status on overflow, otherwise wrap around on overflow
0650 /// \param[in] ctx the function execution context, optional
0651 ARROW_EXPORT
0652 Result<Datum> CumulativeProd(
0653     const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
0654     bool check_overflow = false, ExecContext* ctx = NULLPTR);
0655 
0656 /// \brief Compute the cumulative max of an array-like object
0657 ///
0658 /// \param[in] values array-like input
0659 /// \param[in] options configures cumulative max behavior
0660 /// \param[in] ctx the function execution context, optional
0661 ARROW_EXPORT
0662 Result<Datum> CumulativeMax(
0663     const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
0664     ExecContext* ctx = NULLPTR);
0665 
0666 /// \brief Compute the cumulative min of an array-like object
0667 ///
0668 /// \param[in] values array-like input
0669 /// \param[in] options configures cumulative min behavior
0670 /// \param[in] ctx the function execution context, optional
0671 ARROW_EXPORT
0672 Result<Datum> CumulativeMin(
0673     const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
0674     ExecContext* ctx = NULLPTR);
0675 
0676 /// \brief Compute the cumulative mean of an array-like object
0677 ///
0678 /// \param[in] values array-like input
0679 /// \param[in] options configures cumulative mean behavior, `start` is ignored
0680 /// \param[in] ctx the function execution context, optional
0681 ARROW_EXPORT
0682 Result<Datum> CumulativeMean(
0683     const Datum& values, const CumulativeOptions& options = CumulativeOptions::Defaults(),
0684     ExecContext* ctx = NULLPTR);
0685 
0686 /// \brief Return the first order difference of an array.
0687 ///
0688 /// Computes the first order difference of an array, i.e.
0689 ///   output[i] = input[i] - input[i - p]  if i >= p
0690 ///   output[i] = null                     otherwise
0691 /// where p is the period. For example, with p = 1,
0692 ///   Diff([1, 4, 9, 10, 15]) = [null, 3, 5, 1, 5].
0693 /// With p = 2,
0694 ///   Diff([1, 4, 9, 10, 15]) = [null, null, 8, 6, 6]
0695 /// p can also be negative, in which case the diff is computed in
0696 /// the opposite direction.
0697 /// \param[in] array array input
0698 /// \param[in] options options, specifying overflow behavior and period
0699 /// \param[in] check_overflow whether to return error on overflow
0700 /// \param[in] ctx the function execution context, optional
0701 /// \return result as array
0702 ARROW_EXPORT
0703 Result<std::shared_ptr<Array>> PairwiseDiff(const Array& array,
0704                                             const PairwiseOptions& options,
0705                                             bool check_overflow = false,
0706                                             ExecContext* ctx = NULLPTR);
0707 
0708 }  // namespace compute
0709 }  // namespace arrow