File indexing completed on 2026-04-17 08:28:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <limits>
0021 #include <memory>
0022 #include <random>
0023 #include <string>
0024 #include <utility>
0025 #include <vector>
0026
0027 #include "arrow/array.h"
0028 #include "arrow/array/builder_binary.h"
0029 #include "arrow/array/builder_decimal.h"
0030 #include "arrow/array/builder_primitive.h"
0031 #include "arrow/testing/gtest_util.h"
0032 #include "arrow/testing/random.h"
0033 #include "arrow/type_fwd.h"
0034 #include "arrow/type_traits.h"
0035 #include "arrow/util/decimal.h"
0036 #include "arrow/util/float16.h"
0037 #include "parquet/column_reader.h"
0038 #include "parquet/test_util.h"
0039
0040 namespace parquet {
0041
0042 using internal::RecordReader;
0043
0044 namespace arrow {
0045
0046 using ::arrow::Array;
0047 using ::arrow::ChunkedArray;
0048 using ::arrow::Status;
0049
0050 template <typename T, int32_t PRECISION, typename = ::arrow::enable_if_decimal<T>>
0051 struct DecimalWithPrecisionAndScale {
0052 using type = T;
0053 static_assert(PRECISION >= T::kMinPrecision && PRECISION <= T::kMaxPrecision,
0054 "Invalid precision value");
0055 static constexpr ::arrow::Type::type type_id = T::type_id;
0056 static constexpr int32_t precision = PRECISION;
0057 static constexpr int32_t scale = PRECISION - 1;
0058 };
0059 template <int32_t PRECISION>
0060 using Decimal32WithPrecisionAndScale =
0061 DecimalWithPrecisionAndScale<::arrow::Decimal32Type, PRECISION>;
0062 template <int32_t PRECISION>
0063 using Decimal64WithPrecisionAndScale =
0064 DecimalWithPrecisionAndScale<::arrow::Decimal64Type, PRECISION>;
0065 template <int32_t PRECISION>
0066 using Decimal128WithPrecisionAndScale =
0067 DecimalWithPrecisionAndScale<::arrow::Decimal128Type, PRECISION>;
0068 template <int32_t PRECISION>
0069 using Decimal256WithPrecisionAndScale =
0070 DecimalWithPrecisionAndScale<::arrow::Decimal256Type, PRECISION>;
0071
0072 template <class ArrowType>
0073 ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
0074 size_t size, std::shared_ptr<Array>* out) {
0075 using c_type = typename ArrowType::c_type;
0076 std::vector<c_type> values;
0077 if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
0078 values.resize(size);
0079 test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(0.0f),
0080 ::arrow::util::Float16(1.0f), values.data());
0081 } else {
0082 ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
0083 &values);
0084 }
0085 ::arrow::NumericBuilder<ArrowType> builder;
0086 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
0087 return builder.Finish(out);
0088 }
0089
0090 template <class ArrowType>
0091 ::arrow::enable_if_integer<ArrowType, Status> NonNullArray(size_t size,
0092 std::shared_ptr<Array>* out) {
0093 std::vector<typename ArrowType::c_type> values;
0094 ::arrow::randint(size, 0, 64, &values);
0095
0096
0097 ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
0098 ::arrow::default_memory_pool());
0099 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
0100 return builder.Finish(out);
0101 }
0102
0103 template <class ArrowType>
0104 ::arrow::enable_if_date<ArrowType, Status> NonNullArray(size_t size,
0105 std::shared_ptr<Array>* out) {
0106 std::vector<typename ArrowType::c_type> values;
0107 ::arrow::randint(size, 0, 24, &values);
0108 for (size_t i = 0; i < size; i++) {
0109 values[i] *= 86400000;
0110 }
0111
0112
0113 ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
0114 ::arrow::default_memory_pool());
0115 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
0116 return builder.Finish(out);
0117 }
0118
0119 template <class ArrowType>
0120 ::arrow::enable_if_base_binary<ArrowType, Status> NonNullArray(
0121 size_t size, std::shared_ptr<Array>* out) {
0122 using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
0123 BuilderType builder;
0124 for (size_t i = 0; i < size; i++) {
0125 RETURN_NOT_OK(builder.Append("test-string"));
0126 }
0127 return builder.Finish(out);
0128 }
0129
0130 template <typename ArrowType>
0131 ::arrow::enable_if_fixed_size_binary<ArrowType, Status> NonNullArray(
0132 size_t size, std::shared_ptr<Array>* out) {
0133 using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
0134
0135
0136 BuilderType builder(::arrow::fixed_size_binary(5));
0137 for (size_t i = 0; i < size; i++) {
0138 RETURN_NOT_OK(builder.Append("fixed"));
0139 }
0140 return builder.Finish(out);
0141 }
0142
0143 template <int32_t byte_width>
0144 static void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
0145 auto gen = ::arrow::random::RandomArrayGenerator(seed);
0146 std::shared_ptr<Array> decimals;
0147 if constexpr (byte_width == 4) {
0148 decimals = gen.Decimal32(::arrow::decimal32(precision, 0), n);
0149 } else if constexpr (byte_width == 8) {
0150 decimals = gen.Decimal64(::arrow::decimal64(precision, 0), n);
0151 } else if constexpr (byte_width == 16) {
0152 decimals = gen.Decimal128(::arrow::decimal128(precision, 0), n);
0153 } else {
0154 decimals = gen.Decimal256(::arrow::decimal256(precision, 0), n);
0155 }
0156 std::memcpy(out, decimals->data()->GetValues<uint8_t>(1, 0), byte_width * n);
0157 }
0158
0159 template <typename ArrowType, int32_t precision = ArrowType::precision>
0160 ::arrow::enable_if_t<std::is_same_v<ArrowType, DecimalWithPrecisionAndScale<
0161 typename ArrowType::type, precision>>,
0162 Status>
0163 NonNullArray(size_t size, std::shared_ptr<Array>* out) {
0164 constexpr int32_t kDecimalPrecision = precision;
0165 constexpr int32_t kDecimalScale = ArrowType::scale;
0166
0167 const auto type =
0168 std::make_shared<typename ArrowType::type>(kDecimalPrecision, kDecimalScale);
0169 const int32_t byte_width = type->byte_width();
0170
0171 constexpr int32_t seed = 0;
0172
0173 ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
0174 random_decimals<ArrowType::type::kByteWidth>(size, seed, kDecimalPrecision,
0175 out_buf->mutable_data());
0176
0177 using Builder = typename ::arrow::TypeTraits<typename ArrowType::type>::BuilderType;
0178 Builder builder(type);
0179 RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size));
0180 return builder.Finish(out);
0181 }
0182
0183 template <class ArrowType>
0184 ::arrow::enable_if_boolean<ArrowType, Status> NonNullArray(size_t size,
0185 std::shared_ptr<Array>* out) {
0186 std::vector<uint8_t> values;
0187 ::arrow::randint(size, 0, 1, &values);
0188 ::arrow::BooleanBuilder builder;
0189 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
0190 return builder.Finish(out);
0191 }
0192
0193
0194 template <typename ArrowType>
0195 ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
0196 size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Array>* out) {
0197 using c_type = typename ArrowType::c_type;
0198 std::vector<c_type> values;
0199 if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
0200 values.resize(size);
0201 test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(-1e4f),
0202 ::arrow::util::Float16(1e4f), values.data());
0203 } else {
0204 ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
0205 static_cast<c_type>(1e10), &values);
0206 }
0207 std::vector<uint8_t> valid_bytes(size, 1);
0208
0209 for (size_t i = 0; i < num_nulls; i++) {
0210 valid_bytes[i * 2] = 0;
0211 }
0212
0213 ::arrow::NumericBuilder<ArrowType> builder;
0214 if (values.size() > 0) {
0215 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
0216 }
0217 return builder.Finish(out);
0218 }
0219
0220
0221 template <typename ArrowType>
0222 ::arrow::enable_if_integer<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
0223 uint32_t seed,
0224 std::shared_ptr<Array>* out) {
0225 std::vector<typename ArrowType::c_type> values;
0226
0227
0228 (void)seed;
0229 ::arrow::randint(size, 0, 64, &values);
0230 std::vector<uint8_t> valid_bytes(size, 1);
0231
0232 for (size_t i = 0; i < num_nulls; i++) {
0233 valid_bytes[i * 2] = 0;
0234 }
0235
0236
0237 ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
0238 ::arrow::default_memory_pool());
0239 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
0240 return builder.Finish(out);
0241 }
0242
0243 template <typename ArrowType>
0244 ::arrow::enable_if_date<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
0245 uint32_t seed,
0246 std::shared_ptr<Array>* out) {
0247 std::vector<typename ArrowType::c_type> values;
0248
0249
0250 (void)seed;
0251 ::arrow::randint(size, 0, 24, &values);
0252 for (size_t i = 0; i < size; i++) {
0253 values[i] *= 86400000;
0254 }
0255 std::vector<uint8_t> valid_bytes(size, 1);
0256
0257 for (size_t i = 0; i < num_nulls; i++) {
0258 valid_bytes[i * 2] = 0;
0259 }
0260
0261
0262 ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
0263 ::arrow::default_memory_pool());
0264 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
0265 return builder.Finish(out);
0266 }
0267
0268
0269 template <typename ArrowType>
0270 ::arrow::enable_if_base_binary<ArrowType, Status> NullableArray(
0271 size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
0272 std::vector<uint8_t> valid_bytes(size, 1);
0273
0274 for (size_t i = 0; i < num_nulls; i++) {
0275 valid_bytes[i * 2] = 0;
0276 }
0277
0278 using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
0279 BuilderType builder;
0280
0281 const int kBufferSize = 10;
0282 uint8_t buffer[kBufferSize];
0283 for (size_t i = 0; i < size; i++) {
0284 if (!valid_bytes[i]) {
0285 RETURN_NOT_OK(builder.AppendNull());
0286 } else {
0287 ::arrow::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
0288 if (ArrowType::is_utf8) {
0289
0290 for (auto& byte : buffer) {
0291 byte &= 0x7f;
0292 }
0293 }
0294 RETURN_NOT_OK(builder.Append(buffer, kBufferSize));
0295 }
0296 }
0297 return builder.Finish(out);
0298 }
0299
0300
0301
0302 template <typename ArrowType>
0303 ::arrow::enable_if_fixed_size_binary<ArrowType, Status> NullableArray(
0304 size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
0305 std::vector<uint8_t> valid_bytes(size, 1);
0306
0307 for (size_t i = 0; i < num_nulls; i++) {
0308 valid_bytes[i * 2] = 0;
0309 }
0310
0311 using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
0312 const int byte_width = 10;
0313 BuilderType builder(::arrow::fixed_size_binary(byte_width));
0314
0315 const int kBufferSize = byte_width;
0316 uint8_t buffer[kBufferSize];
0317 for (size_t i = 0; i < size; i++) {
0318 if (!valid_bytes[i]) {
0319 RETURN_NOT_OK(builder.AppendNull());
0320 } else {
0321 ::arrow::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
0322 RETURN_NOT_OK(builder.Append(buffer));
0323 }
0324 }
0325 return builder.Finish(out);
0326 }
0327
0328 template <typename ArrowType, int32_t precision = ArrowType::precision>
0329 ::arrow::enable_if_t<std::is_same_v<ArrowType, DecimalWithPrecisionAndScale<
0330 typename ArrowType::type, precision>>,
0331 Status>
0332 NullableArray(size_t size, size_t num_nulls, uint32_t seed,
0333 std::shared_ptr<::arrow::Array>* out) {
0334 std::vector<uint8_t> valid_bytes(size, '\1');
0335
0336 for (size_t i = 0; i < num_nulls; ++i) {
0337 valid_bytes[i * 2] = '\0';
0338 }
0339
0340 constexpr int32_t kDecimalPrecision = precision;
0341 constexpr int32_t kDecimalScale = ArrowType::scale;
0342
0343 const auto type =
0344 std::make_shared<typename ArrowType::type>(kDecimalPrecision, kDecimalScale);
0345 const int32_t byte_width = type->byte_width();
0346
0347 ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
0348 random_decimals<ArrowType::type::kByteWidth>(size, seed, precision,
0349 out_buf->mutable_data());
0350
0351 using Builder = typename ::arrow::TypeTraits<typename ArrowType::type>::BuilderType;
0352 Builder builder(type);
0353 RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size, valid_bytes.data()));
0354 return builder.Finish(out);
0355 }
0356
0357
0358 template <class ArrowType>
0359 ::arrow::enable_if_boolean<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
0360 uint32_t seed,
0361 std::shared_ptr<Array>* out) {
0362 std::vector<uint8_t> values;
0363
0364
0365 (void)seed;
0366
0367 ::arrow::randint(size, 0, 1, &values);
0368 std::vector<uint8_t> valid_bytes(size, 1);
0369
0370 for (size_t i = 0; i < num_nulls; i++) {
0371 valid_bytes[i * 2] = 0;
0372 }
0373
0374 ::arrow::BooleanBuilder builder;
0375 RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
0376 return builder.Finish(out);
0377 }
0378
0379
0380
0381
0382 Status MakeListArray(const std::shared_ptr<Array>& values, int64_t size,
0383 int64_t null_count, const std::string& item_name,
0384 bool nullable_values, std::shared_ptr<::arrow::ListArray>* out) {
0385
0386 int64_t non_null_entries = size - null_count - 1;
0387 int64_t length_per_entry = values->length() / non_null_entries;
0388
0389 auto offsets = AllocateBuffer();
0390 RETURN_NOT_OK(offsets->Resize((size + 1) * sizeof(int32_t)));
0391 int32_t* offsets_ptr = reinterpret_cast<int32_t*>(offsets->mutable_data());
0392
0393 auto null_bitmap = AllocateBuffer();
0394 int64_t bitmap_size = ::arrow::bit_util::BytesForBits(size);
0395 RETURN_NOT_OK(null_bitmap->Resize(bitmap_size));
0396 uint8_t* null_bitmap_ptr = null_bitmap->mutable_data();
0397 memset(null_bitmap_ptr, 0, bitmap_size);
0398
0399 int32_t current_offset = 0;
0400 for (int64_t i = 0; i < size; i++) {
0401 offsets_ptr[i] = current_offset;
0402 if (!(((i % 2) == 0) && ((i / 2) < null_count))) {
0403
0404 ::arrow::bit_util::SetBit(null_bitmap_ptr, i);
0405 if (i != 1) {
0406 current_offset += static_cast<int32_t>(length_per_entry);
0407 }
0408 }
0409 }
0410 offsets_ptr[size] = static_cast<int32_t>(values->length());
0411
0412 auto value_field = ::arrow::field(item_name, values->type(), nullable_values);
0413 *out = std::make_shared<::arrow::ListArray>(::arrow::list(value_field), size, offsets,
0414 values, null_bitmap, null_count);
0415
0416 return Status::OK();
0417 }
0418
0419
0420 Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
0421
0422 const int64_t offsets_nbytes = (size + 1) * sizeof(int32_t);
0423 ARROW_ASSIGN_OR_RAISE(auto offsets_buffer, ::arrow::AllocateBuffer(offsets_nbytes));
0424 memset(offsets_buffer->mutable_data(), 0, offsets_nbytes);
0425
0426 auto value_field =
0427 ::arrow::field("item", ::arrow::float64(), false );
0428 auto list_type = ::arrow::list(value_field);
0429
0430 std::vector<std::shared_ptr<Buffer>> child_buffers = {nullptr ,
0431 nullptr };
0432 auto child_data =
0433 ::arrow::ArrayData::Make(value_field->type(), 0, std::move(child_buffers));
0434
0435 std::vector<std::shared_ptr<Buffer>> buffers = {nullptr ,
0436 std::move(offsets_buffer)};
0437 auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers));
0438 array_data->child_data.push_back(child_data);
0439
0440 *out_array = ::arrow::MakeArray(array_data);
0441 return Status::OK();
0442 }
0443
0444 std::shared_ptr<::arrow::Table> MakeSimpleTable(
0445 const std::shared_ptr<ChunkedArray>& values, bool nullable) {
0446 auto schema = ::arrow::schema({::arrow::field("col", values->type(), nullable)});
0447 return ::arrow::Table::Make(schema, {values});
0448 }
0449
0450 std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& values,
0451 bool nullable) {
0452 auto carr = std::make_shared<::arrow::ChunkedArray>(values);
0453 return MakeSimpleTable(carr, nullable);
0454 }
0455
0456 template <typename T>
0457 void ExpectArray(T* expected, Array* result) {
0458 auto p_array = static_cast<::arrow::PrimitiveArray*>(result);
0459 for (int i = 0; i < result->length(); i++) {
0460 EXPECT_EQ(expected[i], reinterpret_cast<const T*>(p_array->values()->data())[i]);
0461 }
0462 }
0463
0464 template <typename ArrowType>
0465 void ExpectArrayT(void* expected, Array* result) {
0466 ::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result);
0467 for (int64_t i = 0; i < result->length(); i++) {
0468 EXPECT_EQ(reinterpret_cast<typename ArrowType::c_type*>(expected)[i],
0469 reinterpret_cast<const typename ArrowType::c_type*>(
0470 p_array->values()->data())[i]);
0471 }
0472 }
0473
0474 template <>
0475 void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result) {
0476 ::arrow::BooleanBuilder builder;
0477 ARROW_EXPECT_OK(
0478 builder.AppendValues(reinterpret_cast<uint8_t*>(expected), result->length()));
0479
0480 std::shared_ptr<Array> expected_array;
0481 ARROW_EXPECT_OK(builder.Finish(&expected_array));
0482 EXPECT_TRUE(result->Equals(*expected_array));
0483 }
0484
0485 }
0486
0487 }