File indexing completed on 2025-08-28 08:26:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <cstring>
0022 #include <memory>
0023 #include <type_traits>
0024
0025 #include "arrow/array/builder_base.h"
0026 #include "arrow/buffer.h"
0027 #include "arrow/status.h"
0028 #include "arrow/type.h"
0029 #include "arrow/util/macros.h"
0030 #include "arrow/util/visibility.h"
0031
0032 namespace arrow {
0033
0034
0035
0036
0037
0038 namespace internal {
0039
0040 class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
0041 public:
0042 AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool,
0043 int64_t alignment = kDefaultBufferAlignment);
0044
0045 explicit AdaptiveIntBuilderBase(MemoryPool* pool,
0046 int64_t alignment = kDefaultBufferAlignment)
0047 : AdaptiveIntBuilderBase(sizeof(uint8_t), pool, alignment) {}
0048
0049
0050
0051 Status AppendNulls(int64_t length) final {
0052 ARROW_RETURN_NOT_OK(CommitPendingData());
0053 if (ARROW_PREDICT_TRUE(length > 0)) {
0054 ARROW_RETURN_NOT_OK(Reserve(length));
0055 memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
0056 UnsafeSetNull(length);
0057 }
0058 return Status::OK();
0059 }
0060
0061 Status AppendNull() final {
0062 pending_data_[pending_pos_] = 0;
0063 pending_valid_[pending_pos_] = 0;
0064 pending_has_nulls_ = true;
0065 ++pending_pos_;
0066 ++length_;
0067 ++null_count_;
0068
0069 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
0070 return CommitPendingData();
0071 }
0072 return Status::OK();
0073 }
0074
0075 Status AppendEmptyValues(int64_t length) final {
0076 ARROW_RETURN_NOT_OK(CommitPendingData());
0077 if (ARROW_PREDICT_TRUE(length > 0)) {
0078 ARROW_RETURN_NOT_OK(Reserve(length));
0079 memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
0080 UnsafeSetNotNull(length);
0081 }
0082 return Status::OK();
0083 }
0084
0085 Status AppendEmptyValue() final {
0086 pending_data_[pending_pos_] = 0;
0087 pending_valid_[pending_pos_] = 1;
0088 ++pending_pos_;
0089 ++length_;
0090
0091 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
0092 return CommitPendingData();
0093 }
0094 return Status::OK();
0095 }
0096
0097 void Reset() override;
0098 Status Resize(int64_t capacity) override;
0099
0100 protected:
0101 Status AppendInternal(const uint64_t val) {
0102 pending_data_[pending_pos_] = val;
0103 pending_valid_[pending_pos_] = 1;
0104 ++pending_pos_;
0105 ++length_;
0106
0107 if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
0108 return CommitPendingData();
0109 }
0110 return Status::OK();
0111 }
0112
0113 virtual Status CommitPendingData() = 0;
0114
0115 template <typename new_type, typename old_type>
0116 typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
0117 ExpandIntSizeInternal();
0118 template <typename new_type, typename old_type>
0119 typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
0120 ExpandIntSizeInternal();
0121
0122 std::shared_ptr<ResizableBuffer> data_;
0123 uint8_t* raw_data_ = NULLPTR;
0124
0125 const uint8_t start_int_size_;
0126 uint8_t int_size_;
0127
0128 static constexpr int32_t pending_size_ = 1024;
0129 uint8_t pending_valid_[pending_size_];
0130 uint64_t pending_data_[pending_size_];
0131 int32_t pending_pos_ = 0;
0132 bool pending_has_nulls_ = false;
0133 };
0134
0135 }
0136
0137 class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
0138 public:
0139 explicit AdaptiveUIntBuilder(uint8_t start_int_size,
0140 MemoryPool* pool = default_memory_pool());
0141
0142 explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
0143 : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
0144
0145 using internal::AdaptiveIntBuilderBase::Reset;
0146
0147
0148 Status Append(const uint64_t val) { return AppendInternal(val); }
0149
0150
0151
0152
0153
0154
0155
0156 Status AppendValues(const uint64_t* values, int64_t length,
0157 const uint8_t* valid_bytes = NULLPTR);
0158
0159 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0160
0161 std::shared_ptr<DataType> type() const override;
0162
0163 protected:
0164 Status CommitPendingData() override;
0165 Status ExpandIntSize(uint8_t new_int_size);
0166
0167 Status AppendValuesInternal(const uint64_t* values, int64_t length,
0168 const uint8_t* valid_bytes);
0169
0170 template <typename new_type>
0171 Status ExpandIntSizeN();
0172 };
0173
0174 class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
0175 public:
0176 explicit AdaptiveIntBuilder(uint8_t start_int_size,
0177 MemoryPool* pool = default_memory_pool(),
0178 int64_t alignment = kDefaultBufferAlignment);
0179
0180 explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool(),
0181 int64_t alignment = kDefaultBufferAlignment)
0182 : AdaptiveIntBuilder(sizeof(uint8_t), pool, alignment) {}
0183
0184 using internal::AdaptiveIntBuilderBase::Reset;
0185
0186
0187 Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
0188
0189
0190
0191
0192
0193
0194
0195 Status AppendValues(const int64_t* values, int64_t length,
0196 const uint8_t* valid_bytes = NULLPTR);
0197
0198 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0199
0200 std::shared_ptr<DataType> type() const override;
0201
0202 protected:
0203 Status CommitPendingData() override;
0204 Status ExpandIntSize(uint8_t new_int_size);
0205
0206 Status AppendValuesInternal(const int64_t* values, int64_t length,
0207 const uint8_t* valid_bytes);
0208
0209 template <typename new_type>
0210 Status ExpandIntSizeN();
0211 };
0212
0213
0214
0215 }