![]() |
|
|||
File indexing completed on 2025-08-28 08:26:54
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 // Array accessor classes run-end encoded arrays 0019 0020 #pragma once 0021 0022 #include <cstdint> 0023 #include <memory> 0024 #include <string> 0025 #include <utility> 0026 #include <vector> 0027 0028 #include "arrow/array/array_base.h" 0029 #include "arrow/array/data.h" 0030 #include "arrow/result.h" 0031 #include "arrow/status.h" 0032 #include "arrow/type.h" 0033 #include "arrow/type_fwd.h" 0034 #include "arrow/util/checked_cast.h" 0035 #include "arrow/util/macros.h" 0036 #include "arrow/util/visibility.h" 0037 0038 namespace arrow { 0039 0040 /// \addtogroup run-end-encoded-arrays 0041 /// 0042 /// @{ 0043 0044 // ---------------------------------------------------------------------- 0045 // RunEndEncoded 0046 0047 /// \brief Array type for run-end encoded data 0048 class ARROW_EXPORT RunEndEncodedArray : public Array { 0049 private: 0050 std::shared_ptr<Array> run_ends_array_; 0051 std::shared_ptr<Array> values_array_; 0052 0053 public: 0054 using TypeClass = RunEndEncodedType; 0055 0056 explicit RunEndEncodedArray(const std::shared_ptr<ArrayData>& data); 0057 0058 /// \brief Construct a RunEndEncodedArray from all parameters 0059 /// 0060 /// The length and offset parameters refer to the dimensions of the logical 0061 /// array which is the array we would get after expanding all the runs into 0062 /// repeated values. As such, length can be much greater than the length of 0063 /// the child run_ends and values arrays. 0064 RunEndEncodedArray(const std::shared_ptr<DataType>& type, int64_t length, 0065 const std::shared_ptr<Array>& run_ends, 0066 const std::shared_ptr<Array>& values, int64_t offset = 0); 0067 0068 /// \brief Construct a RunEndEncodedArray from all parameters 0069 /// 0070 /// The length and offset parameters refer to the dimensions of the logical 0071 /// array which is the array we would get after expanding all the runs into 0072 /// repeated values. As such, length can be much greater than the length of 0073 /// the child run_ends and values arrays. 0074 static Result<std::shared_ptr<RunEndEncodedArray>> Make( 0075 const std::shared_ptr<DataType>& type, int64_t logical_length, 0076 const std::shared_ptr<Array>& run_ends, const std::shared_ptr<Array>& values, 0077 int64_t logical_offset = 0); 0078 0079 /// \brief Construct a RunEndEncodedArray from values and run ends arrays 0080 /// 0081 /// The data type is automatically inferred from the arguments. 0082 /// The run_ends and values arrays must have the same length. 0083 static Result<std::shared_ptr<RunEndEncodedArray>> Make( 0084 int64_t logical_length, const std::shared_ptr<Array>& run_ends, 0085 const std::shared_ptr<Array>& values, int64_t logical_offset = 0); 0086 0087 protected: 0088 void SetData(const std::shared_ptr<ArrayData>& data); 0089 0090 public: 0091 /// \brief Returns an array holding the logical indexes of each run-end 0092 /// 0093 /// The physical offset to the array is applied. 0094 const std::shared_ptr<Array>& run_ends() const { return run_ends_array_; } 0095 0096 /// \brief Returns an array holding the values of each run 0097 /// 0098 /// The physical offset to the array is applied. 0099 const std::shared_ptr<Array>& values() const { return values_array_; } 0100 0101 /// \brief Returns an array holding the logical indexes of each run end 0102 /// 0103 /// If a non-zero logical offset is set, this function allocates a new 0104 /// array and rewrites all the run end values to be relative to the logical 0105 /// offset and cuts the end of the array to the logical length. 0106 Result<std::shared_ptr<Array>> LogicalRunEnds(MemoryPool* pool) const; 0107 0108 /// \brief Returns an array holding the values of each run 0109 /// 0110 /// If a non-zero logical offset is set, this function allocates a new 0111 /// array containing only the values within the logical range. 0112 std::shared_ptr<Array> LogicalValues() const; 0113 0114 /// \brief Find the physical offset of this REE array 0115 /// 0116 /// This function uses binary-search, so it has a O(log N) cost. 0117 int64_t FindPhysicalOffset() const; 0118 0119 /// \brief Find the physical length of this REE array 0120 /// 0121 /// The physical length of an REE is the number of physical values (and 0122 /// run-ends) necessary to represent the logical range of values from offset 0123 /// to length. 0124 /// 0125 /// Avoid calling this function if the physical length can be established in 0126 /// some other way (e.g. when iterating over the runs sequentially until the 0127 /// end). This function uses binary-search, so it has a O(log N) cost. 0128 int64_t FindPhysicalLength() const; 0129 }; 0130 0131 /// @} 0132 0133 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |