File indexing completed on 2025-08-28 08:27:00
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <type_traits>
0023 #include <vector>
0024
0025 #include "arrow/array.h"
0026 #include "arrow/array/builder_binary.h"
0027 #include "arrow/array/builder_primitive.h"
0028 #include "arrow/array/builder_time.h"
0029 #include "arrow/buffer.h"
0030 #include "arrow/testing/gtest_util.h"
0031 #include "arrow/type_fwd.h"
0032 #include "arrow/util/bit_util.h"
0033 #include "arrow/visit_type_inline.h"
0034
0035 namespace arrow {
0036
0037
0038
0039 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0040 void ArrayFromVector(const std::shared_ptr<DataType>& type,
0041 const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
0042 std::shared_ptr<Array>* out) {
0043 auto type_id = TYPE::type_id;
0044 ASSERT_EQ(type_id, type->id())
0045 << "template parameter and concrete DataType instance don't agree";
0046
0047 std::unique_ptr<ArrayBuilder> builder_ptr;
0048 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
0049
0050 auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
0051
0052 for (size_t i = 0; i < values.size(); ++i) {
0053 if (is_valid[i]) {
0054 ASSERT_OK(builder.Append(values[i]));
0055 } else {
0056 ASSERT_OK(builder.AppendNull());
0057 }
0058 }
0059 ASSERT_OK(builder.Finish(out));
0060 }
0061
0062 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0063 void ArrayFromVector(const std::shared_ptr<DataType>& type,
0064 const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
0065 auto type_id = TYPE::type_id;
0066 ASSERT_EQ(type_id, type->id())
0067 << "template parameter and concrete DataType instance don't agree";
0068
0069 std::unique_ptr<ArrayBuilder> builder_ptr;
0070 ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
0071
0072 auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
0073
0074 for (size_t i = 0; i < values.size(); ++i) {
0075 ASSERT_OK(builder.Append(values[i]));
0076 }
0077 ASSERT_OK(builder.Finish(out));
0078 }
0079
0080
0081
0082 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0083 void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
0084 std::shared_ptr<Array>* out) {
0085 auto type = TypeTraits<TYPE>::type_singleton();
0086 ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
0087 }
0088
0089 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0090 void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
0091 auto type = TypeTraits<TYPE>::type_singleton();
0092 ArrayFromVector<TYPE, C_TYPE>(type, values, out);
0093 }
0094
0095
0096
0097 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0098 void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
0099 const std::vector<std::vector<bool>>& is_valid,
0100 const std::vector<std::vector<C_TYPE>>& values,
0101 std::shared_ptr<ChunkedArray>* out) {
0102 ArrayVector chunks;
0103 ASSERT_EQ(is_valid.size(), values.size());
0104 for (size_t i = 0; i < values.size(); ++i) {
0105 std::shared_ptr<Array> array;
0106 ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
0107 chunks.push_back(array);
0108 }
0109 *out = std::make_shared<ChunkedArray>(chunks);
0110 }
0111
0112 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0113 void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
0114 const std::vector<std::vector<C_TYPE>>& values,
0115 std::shared_ptr<ChunkedArray>* out) {
0116 ArrayVector chunks;
0117 for (size_t i = 0; i < values.size(); ++i) {
0118 std::shared_ptr<Array> array;
0119 ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array);
0120 chunks.push_back(array);
0121 }
0122 *out = std::make_shared<ChunkedArray>(chunks);
0123 }
0124
0125
0126
0127 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0128 void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid,
0129 const std::vector<std::vector<C_TYPE>>& values,
0130 std::shared_ptr<ChunkedArray>* out) {
0131 auto type = TypeTraits<TYPE>::type_singleton();
0132 ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
0133 }
0134
0135 template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
0136 void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values,
0137 std::shared_ptr<ChunkedArray>* out) {
0138 auto type = TypeTraits<TYPE>::type_singleton();
0139 ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out);
0140 }
0141
0142 template <typename BuilderType>
0143 void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
0144 ASSERT_OK_AND_ASSIGN(*out, builder->Finish());
0145 AssertZeroPadded(**out);
0146 TestInitialized(**out);
0147 }
0148
0149 template <class T, class Builder>
0150 Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
0151 int64_t size, Builder* builder, std::shared_ptr<Array>* out) {
0152
0153 for (int64_t i = 0; i < size; ++i) {
0154 if (valid_bytes[i] > 0) {
0155 RETURN_NOT_OK(builder->Append(values[i]));
0156 } else {
0157 RETURN_NOT_OK(builder->AppendNull());
0158 }
0159 }
0160 return builder->Finish(out);
0161 }
0162
0163 template <typename Fn>
0164 struct VisitBuilder {
0165 template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
0166
0167
0168 typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
0169 Status Visit(const T&, ArrayBuilder* builder, Fn&& fn) {
0170 fn(internal::checked_cast<BuilderType*>(builder));
0171 return Status::OK();
0172 }
0173
0174 Status Visit(const DataType& t, ArrayBuilder* builder, Fn&& fn) {
0175 return Status::NotImplemented("visiting builders of type ", t);
0176 }
0177 };
0178
0179 template <typename Fn>
0180 Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
0181 const std::shared_ptr<DataType>& type, int64_t initial_capacity,
0182 int64_t visitor_repetitions, Fn&& fn) {
0183 std::unique_ptr<ArrayBuilder> builder;
0184 RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder));
0185
0186 if (initial_capacity != 0) {
0187 RETURN_NOT_OK(builder->Resize(initial_capacity));
0188 }
0189
0190 VisitBuilder<Fn> visitor;
0191 for (int64_t i = 0; i < visitor_repetitions; ++i) {
0192 RETURN_NOT_OK(
0193 VisitTypeInline(*builder->type(), &visitor, builder.get(), std::forward<Fn>(fn)));
0194 }
0195
0196 std::shared_ptr<Array> out;
0197 RETURN_NOT_OK(builder->Finish(&out));
0198 return out;
0199 }
0200
0201 template <typename Fn>
0202 Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
0203 const std::shared_ptr<DataType>& type, int64_t length, Fn&& fn) {
0204 return ArrayFromBuilderVisitor(type, length, length, std::forward<Fn>(fn));
0205 }
0206
0207 template <typename T>
0208 static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
0209 std::shared_ptr<Buffer>* result) {
0210 size_t length = is_valid.size();
0211
0212 ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length));
0213
0214 uint8_t* bitmap = buffer->mutable_data();
0215 for (size_t i = 0; i < static_cast<size_t>(length); ++i) {
0216 if (is_valid[i]) {
0217 bit_util::SetBit(bitmap, i);
0218 }
0219 }
0220
0221 *result = buffer;
0222 return Status::OK();
0223 }
0224
0225 template <typename T>
0226 inline void BitmapFromVector(const std::vector<T>& is_valid,
0227 std::shared_ptr<Buffer>* out) {
0228 ASSERT_OK(GetBitmapFromVector(is_valid, out));
0229 }
0230
0231 }