Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:54

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 // Array accessor classes run-end encoded arrays
0019 
0020 #pragma once
0021 
0022 #include <cstdint>
0023 #include <memory>
0024 #include <string>
0025 #include <utility>
0026 #include <vector>
0027 
0028 #include "arrow/array/array_base.h"
0029 #include "arrow/array/data.h"
0030 #include "arrow/result.h"
0031 #include "arrow/status.h"
0032 #include "arrow/type.h"
0033 #include "arrow/type_fwd.h"
0034 #include "arrow/util/checked_cast.h"
0035 #include "arrow/util/macros.h"
0036 #include "arrow/util/visibility.h"
0037 
0038 namespace arrow {
0039 
0040 /// \addtogroup run-end-encoded-arrays
0041 ///
0042 /// @{
0043 
0044 // ----------------------------------------------------------------------
0045 // RunEndEncoded
0046 
0047 /// \brief Array type for run-end encoded data
0048 class ARROW_EXPORT RunEndEncodedArray : public Array {
0049  private:
0050   std::shared_ptr<Array> run_ends_array_;
0051   std::shared_ptr<Array> values_array_;
0052 
0053  public:
0054   using TypeClass = RunEndEncodedType;
0055 
0056   explicit RunEndEncodedArray(const std::shared_ptr<ArrayData>& data);
0057 
0058   /// \brief Construct a RunEndEncodedArray from all parameters
0059   ///
0060   /// The length and offset parameters refer to the dimensions of the logical
0061   /// array which is the array we would get after expanding all the runs into
0062   /// repeated values. As such, length can be much greater than the length of
0063   /// the child run_ends and values arrays.
0064   RunEndEncodedArray(const std::shared_ptr<DataType>& type, int64_t length,
0065                      const std::shared_ptr<Array>& run_ends,
0066                      const std::shared_ptr<Array>& values, int64_t offset = 0);
0067 
0068   /// \brief Construct a RunEndEncodedArray from all parameters
0069   ///
0070   /// The length and offset parameters refer to the dimensions of the logical
0071   /// array which is the array we would get after expanding all the runs into
0072   /// repeated values. As such, length can be much greater than the length of
0073   /// the child run_ends and values arrays.
0074   static Result<std::shared_ptr<RunEndEncodedArray>> Make(
0075       const std::shared_ptr<DataType>& type, int64_t logical_length,
0076       const std::shared_ptr<Array>& run_ends, const std::shared_ptr<Array>& values,
0077       int64_t logical_offset = 0);
0078 
0079   /// \brief Construct a RunEndEncodedArray from values and run ends arrays
0080   ///
0081   /// The data type is automatically inferred from the arguments.
0082   /// The run_ends and values arrays must have the same length.
0083   static Result<std::shared_ptr<RunEndEncodedArray>> Make(
0084       int64_t logical_length, const std::shared_ptr<Array>& run_ends,
0085       const std::shared_ptr<Array>& values, int64_t logical_offset = 0);
0086 
0087  protected:
0088   void SetData(const std::shared_ptr<ArrayData>& data);
0089 
0090  public:
0091   /// \brief Returns an array holding the logical indexes of each run-end
0092   ///
0093   /// The physical offset to the array is applied.
0094   const std::shared_ptr<Array>& run_ends() const { return run_ends_array_; }
0095 
0096   /// \brief Returns an array holding the values of each run
0097   ///
0098   /// The physical offset to the array is applied.
0099   const std::shared_ptr<Array>& values() const { return values_array_; }
0100 
0101   /// \brief Returns an array holding the logical indexes of each run end
0102   ///
0103   /// If a non-zero logical offset is set, this function allocates a new
0104   /// array and rewrites all the run end values to be relative to the logical
0105   /// offset and cuts the end of the array to the logical length.
0106   Result<std::shared_ptr<Array>> LogicalRunEnds(MemoryPool* pool) const;
0107 
0108   /// \brief Returns an array holding the values of each run
0109   ///
0110   /// If a non-zero logical offset is set, this function allocates a new
0111   /// array containing only the values within the logical range.
0112   std::shared_ptr<Array> LogicalValues() const;
0113 
0114   /// \brief Find the physical offset of this REE array
0115   ///
0116   /// This function uses binary-search, so it has a O(log N) cost.
0117   int64_t FindPhysicalOffset() const;
0118 
0119   /// \brief Find the physical length of this REE array
0120   ///
0121   /// The physical length of an REE is the number of physical values (and
0122   /// run-ends) necessary to represent the logical range of values from offset
0123   /// to length.
0124   ///
0125   /// Avoid calling this function if the physical length can be established in
0126   /// some other way (e.g. when iterating over the runs sequentially until the
0127   /// end). This function uses binary-search, so it has a O(log N) cost.
0128   int64_t FindPhysicalLength() const;
0129 };
0130 
0131 /// @}
0132 
0133 }  // namespace arrow