File indexing completed on 2025-08-28 08:26:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021 #pragma once
0022
0023 #include <cstdint>
0024 #include <memory>
0025 #include <optional>
0026 #include <string>
0027 #include <string_view>
0028 #include <vector>
0029
0030 #include "arrow/array/array_base.h"
0031 #include "arrow/array/data.h"
0032 #include "arrow/buffer.h"
0033 #include "arrow/stl_iterator.h"
0034 #include "arrow/type.h"
0035 #include "arrow/util/checked_cast.h"
0036 #include "arrow/util/macros.h"
0037 #include "arrow/util/visibility.h"
0038
0039 namespace arrow {
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 template <typename TYPE>
0051 class BaseBinaryArray : public FlatArray {
0052 public:
0053 using TypeClass = TYPE;
0054 using offset_type = typename TypeClass::offset_type;
0055 using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
0056
0057
0058
0059 const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
0060 const offset_type pos = raw_value_offsets_[i];
0061 *out_length = raw_value_offsets_[i + 1] - pos;
0062 return raw_data_ + pos;
0063 }
0064
0065
0066
0067
0068
0069 std::string_view GetView(int64_t i) const {
0070 const offset_type pos = raw_value_offsets_[i];
0071 return std::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
0072 raw_value_offsets_[i + 1] - pos);
0073 }
0074
0075 std::optional<std::string_view> operator[](int64_t i) const {
0076 return *IteratorType(*this, i);
0077 }
0078
0079
0080
0081
0082
0083
0084 std::string_view Value(int64_t i) const { return GetView(i); }
0085
0086
0087
0088
0089
0090 std::string GetString(int64_t i) const { return std::string(GetView(i)); }
0091
0092
0093 std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
0094
0095
0096 std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
0097
0098 const offset_type* raw_value_offsets() const { return raw_value_offsets_; }
0099
0100 const uint8_t* raw_data() const { return raw_data_; }
0101
0102
0103
0104
0105
0106 offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }
0107
0108
0109
0110
0111 offset_type value_length(int64_t i) const {
0112 return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
0113 }
0114
0115
0116
0117
0118 offset_type total_values_length() const {
0119 if (data_->length > 0) {
0120 return raw_value_offsets_[data_->length] - raw_value_offsets_[0];
0121 } else {
0122 return 0;
0123 }
0124 }
0125
0126 IteratorType begin() const { return IteratorType(*this); }
0127
0128 IteratorType end() const { return IteratorType(*this, length()); }
0129
0130 protected:
0131
0132 BaseBinaryArray() = default;
0133
0134
0135 void SetData(const std::shared_ptr<ArrayData>& data) {
0136 this->Array::SetData(data);
0137 raw_value_offsets_ = data->GetValuesSafe<offset_type>(1);
0138 raw_data_ = data->GetValuesSafe<uint8_t>(2, 0);
0139 }
0140
0141 const offset_type* raw_value_offsets_ = NULLPTR;
0142 const uint8_t* raw_data_ = NULLPTR;
0143 };
0144
0145
0146 class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
0147 public:
0148 explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
0149
0150 BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
0151 const std::shared_ptr<Buffer>& data,
0152 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0153 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0154
0155 protected:
0156
0157 BinaryArray() : BaseBinaryArray() {}
0158 };
0159
0160
0161 class ARROW_EXPORT StringArray : public BinaryArray {
0162 public:
0163 using TypeClass = StringType;
0164
0165 explicit StringArray(const std::shared_ptr<ArrayData>& data);
0166
0167 StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
0168 const std::shared_ptr<Buffer>& data,
0169 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0170 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0171
0172
0173
0174
0175 Status ValidateUTF8() const;
0176 };
0177
0178
0179 class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
0180 public:
0181 explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
0182
0183 LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
0184 const std::shared_ptr<Buffer>& data,
0185 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0186 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0187
0188 protected:
0189
0190 LargeBinaryArray() : BaseBinaryArray() {}
0191 };
0192
0193
0194 class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
0195 public:
0196 using TypeClass = LargeStringType;
0197
0198 explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
0199
0200 LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
0201 const std::shared_ptr<Buffer>& data,
0202 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0203 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0204
0205
0206
0207
0208 Status ValidateUTF8() const;
0209 };
0210
0211
0212
0213
0214
0215
0216 class ARROW_EXPORT BinaryViewArray : public FlatArray {
0217 public:
0218 using TypeClass = BinaryViewType;
0219 using IteratorType = stl::ArrayIterator<BinaryViewArray>;
0220 using c_type = BinaryViewType::c_type;
0221
0222 explicit BinaryViewArray(std::shared_ptr<ArrayData> data);
0223
0224 BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
0225 std::shared_ptr<Buffer> views, BufferVector data_buffers,
0226 std::shared_ptr<Buffer> null_bitmap = NULLPTR,
0227 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0228
0229
0230 std::string_view GetView(int64_t i) const;
0231 std::string GetString(int64_t i) const { return std::string{GetView(i)}; }
0232
0233 const auto& values() const { return data_->buffers[1]; }
0234 const c_type* raw_values() const { return raw_values_; }
0235
0236 std::optional<std::string_view> operator[](int64_t i) const {
0237 return *IteratorType(*this, i);
0238 }
0239
0240 IteratorType begin() const { return IteratorType(*this); }
0241 IteratorType end() const { return IteratorType(*this, length()); }
0242
0243 protected:
0244 using FlatArray::FlatArray;
0245
0246 void SetData(std::shared_ptr<ArrayData> data) {
0247 FlatArray::SetData(std::move(data));
0248 raw_values_ = data_->GetValuesSafe<c_type>(1);
0249 }
0250
0251 const c_type* raw_values_;
0252 };
0253
0254
0255
0256 class ARROW_EXPORT StringViewArray : public BinaryViewArray {
0257 public:
0258 using TypeClass = StringViewType;
0259
0260 explicit StringViewArray(std::shared_ptr<ArrayData> data);
0261
0262 using BinaryViewArray::BinaryViewArray;
0263
0264
0265
0266
0267 Status ValidateUTF8() const;
0268 };
0269
0270
0271
0272
0273
0274 class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
0275 public:
0276 using TypeClass = FixedSizeBinaryType;
0277 using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
0278
0279 explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
0280
0281 FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
0282 const std::shared_ptr<Buffer>& data,
0283 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0284 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0285
0286 const uint8_t* GetValue(int64_t i) const { return values_ + i * byte_width_; }
0287 const uint8_t* Value(int64_t i) const { return GetValue(i); }
0288
0289 std::string_view GetView(int64_t i) const {
0290 return std::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width_);
0291 }
0292
0293 std::optional<std::string_view> operator[](int64_t i) const {
0294 return *IteratorType(*this, i);
0295 }
0296
0297 std::string GetString(int64_t i) const { return std::string(GetView(i)); }
0298
0299 int32_t byte_width() const { return byte_width_; }
0300
0301 const uint8_t* raw_values() const { return values_; }
0302
0303 IteratorType begin() const { return IteratorType(*this); }
0304
0305 IteratorType end() const { return IteratorType(*this, length()); }
0306
0307 protected:
0308 void SetData(const std::shared_ptr<ArrayData>& data) {
0309 this->PrimitiveArray::SetData(data);
0310 byte_width_ =
0311 internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
0312 values_ = raw_values_ + data_->offset * byte_width_;
0313 }
0314
0315 const uint8_t* values_;
0316 int32_t byte_width_;
0317 };
0318
0319
0320
0321 }