File indexing completed on 2025-08-28 08:26:55
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <vector>
0024
0025 #include "arrow/array/array_nested.h"
0026 #include "arrow/array/builder_base.h"
0027 #include "arrow/array/data.h"
0028 #include "arrow/buffer_builder.h"
0029 #include "arrow/memory_pool.h"
0030 #include "arrow/status.h"
0031 #include "arrow/type.h"
0032 #include "arrow/util/visibility.h"
0033
0034 namespace arrow {
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044 class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
0045 public:
0046 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0047
0048
0049 using ArrayBuilder::Finish;
0050
0051
0052 Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062 int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
0063 const std::string& field_name = "");
0064
0065 std::shared_ptr<DataType> type() const override;
0066
0067 int64_t length() const override { return types_builder_.length(); }
0068
0069 protected:
0070 BasicUnionBuilder(MemoryPool* pool, int64_t alignment,
0071 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
0072 const std::shared_ptr<DataType>& type);
0073
0074 int8_t NextTypeId();
0075
0076 std::vector<std::shared_ptr<Field>> child_fields_;
0077 std::vector<int8_t> type_codes_;
0078 UnionMode::type mode_;
0079
0080 std::vector<ArrayBuilder*> type_id_to_children_;
0081 std::vector<int> type_id_to_child_id_;
0082
0083 int8_t dense_type_id_ = 0;
0084 TypedBufferBuilder<int8_t> types_builder_;
0085 };
0086
0087
0088
0089
0090 class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
0091 public:
0092
0093
0094
0095 explicit DenseUnionBuilder(MemoryPool* pool,
0096 int64_t alignment = kDefaultBufferAlignment)
0097 : BasicUnionBuilder(pool, alignment, {}, dense_union(FieldVector{})),
0098 offsets_builder_(pool, alignment) {}
0099
0100
0101
0102 DenseUnionBuilder(MemoryPool* pool,
0103 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
0104 const std::shared_ptr<DataType>& type,
0105 int64_t alignment = kDefaultBufferAlignment)
0106 : BasicUnionBuilder(pool, alignment, children, type),
0107 offsets_builder_(pool, alignment) {}
0108
0109 Status AppendNull() final {
0110 const int8_t first_child_code = type_codes_[0];
0111 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
0112 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
0113 ARROW_RETURN_NOT_OK(
0114 offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
0115
0116 return child_builder->AppendNull();
0117 }
0118
0119 Status AppendNulls(int64_t length) final {
0120 const int8_t first_child_code = type_codes_[0];
0121 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
0122 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
0123 ARROW_RETURN_NOT_OK(
0124 offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
0125
0126 return child_builder->AppendNull();
0127 }
0128
0129 Status AppendEmptyValue() final {
0130 const int8_t first_child_code = type_codes_[0];
0131 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
0132 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
0133 ARROW_RETURN_NOT_OK(
0134 offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
0135
0136 return child_builder->AppendEmptyValue();
0137 }
0138
0139 Status AppendEmptyValues(int64_t length) final {
0140 const int8_t first_child_code = type_codes_[0];
0141 ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
0142 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
0143 ARROW_RETURN_NOT_OK(
0144 offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
0145
0146 return child_builder->AppendEmptyValue();
0147 }
0148
0149
0150
0151
0152
0153
0154
0155
0156 Status Append(int8_t next_type) {
0157 ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
0158 if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
0159 return Status::CapacityError(
0160 "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
0161 "child");
0162 }
0163 auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
0164 return offsets_builder_.Append(offset);
0165 }
0166
0167 Status AppendArraySlice(const ArraySpan& array, int64_t offset,
0168 int64_t length) override;
0169
0170 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0171
0172 private:
0173 TypedBufferBuilder<int32_t> offsets_builder_;
0174 };
0175
0176
0177
0178
0179 class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
0180 public:
0181
0182
0183
0184 explicit SparseUnionBuilder(MemoryPool* pool,
0185 int64_t alignment = kDefaultBufferAlignment)
0186 : BasicUnionBuilder(pool, alignment, {}, sparse_union(FieldVector{})) {}
0187
0188
0189
0190 SparseUnionBuilder(MemoryPool* pool,
0191 const std::vector<std::shared_ptr<ArrayBuilder>>& children,
0192 const std::shared_ptr<DataType>& type,
0193 int64_t alignment = kDefaultBufferAlignment)
0194 : BasicUnionBuilder(pool, alignment, children, type) {}
0195
0196
0197
0198
0199 Status AppendNull() final {
0200 const auto first_child_code = type_codes_[0];
0201 ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
0202 ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
0203 for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
0204 ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
0205 }
0206 return Status::OK();
0207 }
0208
0209
0210
0211
0212 Status AppendNulls(int64_t length) final {
0213 const auto first_child_code = type_codes_[0];
0214 ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
0215 ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
0216 for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
0217 ARROW_RETURN_NOT_OK(
0218 type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
0219 }
0220 return Status::OK();
0221 }
0222
0223 Status AppendEmptyValue() final {
0224 ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
0225 for (int8_t code : type_codes_) {
0226 ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
0227 }
0228 return Status::OK();
0229 }
0230
0231 Status AppendEmptyValues(int64_t length) final {
0232 ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
0233 for (int8_t code : type_codes_) {
0234 ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
0235 }
0236 return Status::OK();
0237 }
0238
0239
0240
0241
0242
0243
0244
0245
0246 Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
0247
0248 Status AppendArraySlice(const ArraySpan& array, int64_t offset,
0249 int64_t length) override;
0250 };
0251
0252
0253
0254 }