File indexing completed on 2025-08-28 08:27:01
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <vector>
0024
0025 #include "arrow/array/array_base.h"
0026 #include "arrow/compute/type_fwd.h"
0027 #include "arrow/testing/gtest_util.h"
0028 #include "arrow/testing/visibility.h"
0029 #include "arrow/type_fwd.h"
0030
0031 namespace arrow {
0032
0033 class ARROW_TESTING_EXPORT ConstantArrayGenerator {
0034 public:
0035
0036
0037
0038
0039
0040
0041 static std::shared_ptr<Array> Boolean(int64_t size, bool value = false);
0042
0043
0044
0045
0046
0047
0048
0049 static std::shared_ptr<Array> UInt8(int64_t size, uint8_t value = 0);
0050
0051
0052
0053
0054
0055
0056
0057 static std::shared_ptr<Array> Int8(int64_t size, int8_t value = 0);
0058
0059
0060
0061
0062
0063
0064
0065 static std::shared_ptr<Array> UInt16(int64_t size, uint16_t value = 0);
0066
0067
0068
0069
0070
0071
0072
0073 static std::shared_ptr<Array> Int16(int64_t size, int16_t value = 0);
0074
0075
0076
0077
0078
0079
0080
0081 static std::shared_ptr<Array> UInt32(int64_t size, uint32_t value = 0);
0082
0083
0084
0085
0086
0087
0088
0089 static std::shared_ptr<Array> Int32(int64_t size, int32_t value = 0);
0090
0091
0092
0093
0094
0095
0096
0097 static std::shared_ptr<Array> UInt64(int64_t size, uint64_t value = 0);
0098
0099
0100
0101
0102
0103
0104
0105 static std::shared_ptr<Array> Int64(int64_t size, int64_t value = 0);
0106
0107
0108
0109
0110
0111
0112
0113 static std::shared_ptr<Array> Float32(int64_t size, float value = 0);
0114
0115
0116
0117
0118
0119
0120
0121 static std::shared_ptr<Array> Float64(int64_t size, double value = 0);
0122
0123
0124
0125
0126
0127
0128
0129 static std::shared_ptr<Array> String(int64_t size, std::string value = "");
0130
0131 template <typename ArrowType, typename CType = typename ArrowType::c_type>
0132 static std::shared_ptr<Array> Numeric(int64_t size, CType value = 0) {
0133 switch (ArrowType::type_id) {
0134 case Type::BOOL:
0135 return Boolean(size, static_cast<bool>(value));
0136 case Type::UINT8:
0137 return UInt8(size, static_cast<uint8_t>(value));
0138 case Type::INT8:
0139 return Int8(size, static_cast<int8_t>(value));
0140 case Type::UINT16:
0141 return UInt16(size, static_cast<uint16_t>(value));
0142 case Type::INT16:
0143 return Int16(size, static_cast<int16_t>(value));
0144 case Type::UINT32:
0145 return UInt32(size, static_cast<uint32_t>(value));
0146 case Type::INT32:
0147 return Int32(size, static_cast<int32_t>(value));
0148 case Type::UINT64:
0149 return UInt64(size, static_cast<uint64_t>(value));
0150 case Type::INT64:
0151 return Int64(size, static_cast<int64_t>(value));
0152 case Type::FLOAT:
0153 return Float32(size, static_cast<float>(value));
0154 case Type::DOUBLE:
0155 return Float64(size, static_cast<double>(value));
0156 case Type::INTERVAL_DAY_TIME:
0157 case Type::DATE32: {
0158 EXPECT_OK_AND_ASSIGN(auto viewed,
0159 Int32(size, static_cast<uint32_t>(value))->View(date32()));
0160 return viewed;
0161 }
0162 case Type::INTERVAL_MONTHS: {
0163 EXPECT_OK_AND_ASSIGN(auto viewed,
0164 Int32(size, static_cast<uint32_t>(value))
0165 ->View(std::make_shared<MonthIntervalType>()));
0166 return viewed;
0167 }
0168 case Type::TIME32: {
0169 EXPECT_OK_AND_ASSIGN(auto viewed,
0170 Int32(size, static_cast<uint32_t>(value))
0171 ->View(std::make_shared<Time32Type>(TimeUnit::SECOND)));
0172 return viewed;
0173 }
0174 case Type::TIME64: {
0175 EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast<uint64_t>(value))
0176 ->View(std::make_shared<Time64Type>()));
0177 return viewed;
0178 }
0179 case Type::DATE64: {
0180 EXPECT_OK_AND_ASSIGN(auto viewed,
0181 Int64(size, static_cast<uint64_t>(value))->View(date64()));
0182 return viewed;
0183 }
0184 case Type::TIMESTAMP: {
0185 EXPECT_OK_AND_ASSIGN(
0186 auto viewed, Int64(size, static_cast<int64_t>(value))
0187 ->View(std::make_shared<TimestampType>(TimeUnit::SECOND)));
0188 return viewed;
0189 }
0190 default:
0191 return nullptr;
0192 }
0193 }
0194
0195
0196
0197
0198
0199
0200
0201 static std::shared_ptr<Array> Zeroes(int64_t size,
0202 const std::shared_ptr<DataType>& type);
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212 static std::shared_ptr<RecordBatch> Zeroes(int64_t size,
0213 const std::shared_ptr<Schema>& schema);
0214
0215
0216
0217
0218
0219
0220
0221 static std::shared_ptr<RecordBatchReader> Repeat(
0222 int64_t n_batch, const std::shared_ptr<RecordBatch> batch);
0223
0224
0225
0226
0227
0228
0229
0230
0231 static std::shared_ptr<RecordBatchReader> Zeroes(int64_t n_batch, int64_t batch_size,
0232 const std::shared_ptr<Schema>& schema);
0233 };
0234
0235 ARROW_TESTING_EXPORT
0236 Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars);
0237
0238 namespace gen {
0239
0240 class ARROW_TESTING_EXPORT ArrayGenerator {
0241 public:
0242 virtual ~ArrayGenerator() = default;
0243 virtual Result<std::shared_ptr<Array>> Generate(int64_t num_rows) = 0;
0244 virtual std::shared_ptr<DataType> type() const = 0;
0245 };
0246
0247
0248 class ARROW_TESTING_EXPORT GTestDataGenerator {
0249 public:
0250 virtual ~GTestDataGenerator() = default;
0251 virtual std::shared_ptr<::arrow::RecordBatch> RecordBatch(int64_t num_rows) = 0;
0252 virtual std::vector<std::shared_ptr<::arrow::RecordBatch>> RecordBatches(
0253 int64_t rows_per_batch, int num_batches) = 0;
0254
0255 virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0;
0256 virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t rows_per_batch,
0257 int num_batches) = 0;
0258
0259 virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk,
0260 int num_chunks = 1) = 0;
0261 virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
0262 };
0263
0264 class ARROW_TESTING_EXPORT DataGenerator {
0265 public:
0266 virtual ~DataGenerator() = default;
0267 virtual Result<std::shared_ptr<::arrow::RecordBatch>> RecordBatch(int64_t num_rows) = 0;
0268 virtual Result<std::vector<std::shared_ptr<::arrow::RecordBatch>>> RecordBatches(
0269 int64_t rows_per_batch, int num_batches) = 0;
0270
0271 virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0;
0272 virtual Result<std::vector<::arrow::compute::ExecBatch>> ExecBatches(
0273 int64_t rows_per_batch, int num_batches) = 0;
0274
0275 virtual Result<std::shared_ptr<::arrow::Table>> Table(int64_t rows_per_chunk,
0276 int num_chunks = 1) = 0;
0277 virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
0278
0279
0280 virtual std::unique_ptr<GTestDataGenerator> FailOnError() = 0;
0281 };
0282
0283
0284
0285
0286 struct ARROW_TESTING_EXPORT GeneratorField {
0287 public:
0288 GeneratorField(std::shared_ptr<ArrayGenerator> gen)
0289 : name(), gen(std::move(gen)) {}
0290 GeneratorField(std::string name, std::shared_ptr<ArrayGenerator> gen)
0291 : name(std::move(name)), gen(std::move(gen)) {}
0292
0293 std::optional<std::string> name;
0294 std::shared_ptr<ArrayGenerator> gen;
0295 };
0296
0297
0298 ARROW_TESTING_EXPORT std::shared_ptr<DataGenerator> Gen(
0299 std::vector<GeneratorField> column_gens);
0300
0301
0302 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Constant(
0303 std::shared_ptr<Scalar> value);
0304
0305
0306
0307 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Step(uint32_t start = 0,
0308 uint32_t step = 1,
0309 bool signed_int = false);
0310
0311 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Random(
0312 std::shared_ptr<DataType> type);
0313
0314
0315
0316
0317
0318
0319 }
0320
0321 }