![]() |
|
|||
File indexing completed on 2025-08-28 08:27:01
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <algorithm> 0021 #include <cassert> 0022 #include <cstdint> 0023 #include <limits> 0024 #include <memory> 0025 #include <random> 0026 #include <vector> 0027 0028 #include "arrow/testing/uniform_real.h" 0029 #include "arrow/testing/visibility.h" 0030 #include "arrow/type.h" 0031 0032 namespace arrow { 0033 0034 class Array; 0035 0036 namespace random { 0037 0038 using SeedType = int32_t; 0039 constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max(); 0040 0041 class ARROW_TESTING_EXPORT RandomArrayGenerator { 0042 public: 0043 explicit RandomArrayGenerator(SeedType seed) 0044 : seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {} 0045 0046 /// \brief Generate a null bitmap 0047 /// 0048 /// \param[in] size the size of the bitmap to generate 0049 /// \param[in] null_probability the probability of a bit being zero 0050 /// \param[in] alignment alignment for memory allocations (in bytes) 0051 /// \param[in] memory_pool memory pool to allocate memory from 0052 /// 0053 /// \return a generated Buffer 0054 std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0, 0055 int64_t alignment = kDefaultBufferAlignment, 0056 MemoryPool* memory_pool = default_memory_pool()); 0057 0058 /// \brief Generate a random BooleanArray 0059 /// 0060 /// \param[in] size the size of the array to generate 0061 /// \param[in] true_probability the probability of a value being 1 / bit-set 0062 /// \param[in] null_probability the probability of a value being null 0063 /// \param[in] alignment alignment for memory allocations (in bytes) 0064 /// \param[in] memory_pool memory pool to allocate memory from 0065 /// 0066 /// \return a generated Array 0067 std::shared_ptr<Array> Boolean(int64_t size, double true_probability, 0068 double null_probability = 0, 0069 int64_t alignment = kDefaultBufferAlignment, 0070 MemoryPool* memory_pool = default_memory_pool()); 0071 /// \brief Generate a random UInt8Array 0072 /// 0073 /// \param[in] size the size of the array to generate 0074 /// \param[in] min the lower bound of the uniform distribution 0075 /// \param[in] max the upper bound of the uniform distribution 0076 /// \param[in] null_probability the probability of a value being null 0077 /// \param[in] alignment alignment for memory allocations (in bytes) 0078 /// \param[in] memory_pool memory pool to allocate memory from 0079 /// 0080 /// \return a generated Array 0081 std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max, 0082 double null_probability = 0, 0083 int64_t alignment = kDefaultBufferAlignment, 0084 MemoryPool* memory_pool = default_memory_pool()); 0085 0086 /// \brief Generate a random Int8Array 0087 /// 0088 /// \param[in] size the size of the array to generate 0089 /// \param[in] min the lower bound of the uniform distribution 0090 /// \param[in] max the upper bound of the uniform distribution 0091 /// \param[in] null_probability the probability of a value being null 0092 /// \param[in] alignment alignment for memory allocations (in bytes) 0093 /// \param[in] memory_pool memory pool to allocate memory from 0094 /// 0095 /// \return a generated Array 0096 std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max, 0097 double null_probability = 0, 0098 int64_t alignment = kDefaultBufferAlignment, 0099 MemoryPool* memory_pool = default_memory_pool()); 0100 0101 /// \brief Generate a random UInt16Array 0102 /// 0103 /// \param[in] size the size of the array to generate 0104 /// \param[in] min the lower bound of the uniform distribution 0105 /// \param[in] max the upper bound of the uniform distribution 0106 /// \param[in] null_probability the probability of a value being null 0107 /// \param[in] alignment alignment for memory allocations (in bytes) 0108 /// \param[in] memory_pool memory pool to allocate memory from 0109 /// 0110 /// \return a generated Array 0111 std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max, 0112 double null_probability = 0, 0113 int64_t alignment = kDefaultBufferAlignment, 0114 MemoryPool* memory_pool = default_memory_pool()); 0115 0116 /// \brief Generate a random Int16Array 0117 /// 0118 /// \param[in] size the size of the array to generate 0119 /// \param[in] min the lower bound of the uniform distribution 0120 /// \param[in] max the upper bound of the uniform distribution 0121 /// \param[in] null_probability the probability of a value being null 0122 /// \param[in] alignment alignment for memory allocations (in bytes) 0123 /// \param[in] memory_pool memory pool to allocate memory from 0124 /// 0125 /// \return a generated Array 0126 std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max, 0127 double null_probability = 0, 0128 int64_t alignment = kDefaultBufferAlignment, 0129 MemoryPool* memory_pool = default_memory_pool()); 0130 0131 /// \brief Generate a random UInt32Array 0132 /// 0133 /// \param[in] size the size of the array to generate 0134 /// \param[in] min the lower bound of the uniform distribution 0135 /// \param[in] max the upper bound of the uniform distribution 0136 /// \param[in] null_probability the probability of a value being null 0137 /// \param[in] alignment alignment for memory allocations (in bytes) 0138 /// \param[in] memory_pool memory pool to allocate memory from 0139 /// 0140 /// \return a generated Array 0141 std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max, 0142 double null_probability = 0, 0143 int64_t alignment = kDefaultBufferAlignment, 0144 MemoryPool* memory_pool = default_memory_pool()); 0145 0146 /// \brief Generate a random Int32Array 0147 /// 0148 /// \param[in] size the size of the array to generate 0149 /// \param[in] min the lower bound of the uniform distribution 0150 /// \param[in] max the upper bound of the uniform distribution 0151 /// \param[in] null_probability the probability of a value being null 0152 /// \param[in] alignment alignment for memory allocations (in bytes) 0153 /// \param[in] memory_pool memory pool to allocate memory from 0154 /// 0155 /// \return a generated Array 0156 std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max, 0157 double null_probability = 0, 0158 int64_t alignment = kDefaultBufferAlignment, 0159 MemoryPool* memory_pool = default_memory_pool()); 0160 0161 /// \brief Generate a random UInt64Array 0162 /// 0163 /// \param[in] size the size of the array to generate 0164 /// \param[in] min the lower bound of the uniform distribution 0165 /// \param[in] max the upper bound of the uniform distribution 0166 /// \param[in] null_probability the probability of a value being null 0167 /// \param[in] alignment alignment for memory allocations (in bytes) 0168 /// \param[in] memory_pool memory pool to allocate memory from 0169 /// 0170 /// \return a generated Array 0171 std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max, 0172 double null_probability = 0, 0173 int64_t alignment = kDefaultBufferAlignment, 0174 MemoryPool* memory_pool = default_memory_pool()); 0175 0176 /// \brief Generate a random Int64Array 0177 /// 0178 /// \param[in] size the size of the array to generate 0179 /// \param[in] min the lower bound of the uniform distribution 0180 /// \param[in] max the upper bound of the uniform distribution 0181 /// \param[in] null_probability the probability of a value being null 0182 /// \param[in] alignment alignment for memory allocations (in bytes) 0183 /// \param[in] memory_pool memory pool to allocate memory from 0184 /// 0185 /// \return a generated Array 0186 std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max, 0187 double null_probability = 0, 0188 int64_t alignment = kDefaultBufferAlignment, 0189 MemoryPool* memory_pool = default_memory_pool()); 0190 0191 /// \brief Generate a random HalfFloatArray 0192 /// 0193 /// \param[in] size the size of the array to generate 0194 /// \param[in] min the lower bound of the distribution 0195 /// \param[in] max the upper bound of the distribution 0196 /// \param[in] null_probability the probability of a value being null 0197 /// \param[in] alignment alignment for memory allocations (in bytes) 0198 /// \param[in] memory_pool memory pool to allocate memory from 0199 /// 0200 /// \return a generated Array 0201 std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max, 0202 double null_probability = 0, 0203 int64_t alignment = kDefaultBufferAlignment, 0204 MemoryPool* memory_pool = default_memory_pool()); 0205 0206 /// \brief Generate a random FloatArray 0207 /// 0208 /// \param[in] size the size of the array to generate 0209 /// \param[in] min the lower bound of the uniform distribution 0210 /// \param[in] max the upper bound of the uniform distribution 0211 /// \param[in] null_probability the probability of a value being null 0212 /// \param[in] nan_probability the probability of a value being NaN 0213 /// \param[in] alignment alignment for memory allocations (in bytes) 0214 /// \param[in] memory_pool memory pool to allocate memory from 0215 /// 0216 /// \return a generated Array 0217 std::shared_ptr<Array> Float32(int64_t size, float min, float max, 0218 double null_probability = 0, double nan_probability = 0, 0219 int64_t alignment = kDefaultBufferAlignment, 0220 MemoryPool* memory_pool = default_memory_pool()); 0221 0222 /// \brief Generate a random DoubleArray 0223 /// 0224 /// \param[in] size the size of the array to generate 0225 /// \param[in] min the lower bound of the uniform distribution 0226 /// \param[in] max the upper bound of the uniform distribution 0227 /// \param[in] null_probability the probability of a value being null 0228 /// \param[in] nan_probability the probability of a value being NaN 0229 /// \param[in] alignment alignment for memory allocations (in bytes) 0230 /// \param[in] memory_pool memory pool to allocate memory from 0231 /// 0232 /// \return a generated Array 0233 std::shared_ptr<Array> Float64(int64_t size, double min, double max, 0234 double null_probability = 0, double nan_probability = 0, 0235 int64_t alignment = kDefaultBufferAlignment, 0236 MemoryPool* memory_pool = default_memory_pool()); 0237 0238 /// \brief Generate a random Date64Array 0239 /// 0240 /// \param[in] size the size of the array to generate 0241 /// \param[in] min the lower bound of the uniform distribution 0242 /// \param[in] max the upper bound of the uniform distribution 0243 /// \param[in] null_probability the probability of a value being null 0244 /// \param[in] alignment alignment for memory allocations (in bytes) 0245 /// \param[in] memory_pool memory pool to allocate memory from 0246 /// 0247 /// \return a generated Array 0248 std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max, 0249 double null_probability = 0, 0250 int64_t alignment = kDefaultBufferAlignment, 0251 MemoryPool* memory_pool = default_memory_pool()); 0252 0253 template <typename ArrowType, typename CType = typename ArrowType::c_type> 0254 std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max, 0255 double null_probability = 0, 0256 int64_t alignment = kDefaultBufferAlignment, 0257 MemoryPool* memory_pool = default_memory_pool()) { 0258 switch (ArrowType::type_id) { 0259 case Type::UINT8: 0260 return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max), 0261 null_probability, alignment, memory_pool); 0262 case Type::INT8: 0263 return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max), 0264 null_probability, alignment, memory_pool); 0265 case Type::UINT16: 0266 return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max), 0267 null_probability, alignment, memory_pool); 0268 case Type::INT16: 0269 return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max), 0270 null_probability, alignment, memory_pool); 0271 case Type::UINT32: 0272 return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max), 0273 null_probability, alignment, memory_pool); 0274 case Type::INT32: 0275 return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max), 0276 null_probability, alignment, memory_pool); 0277 case Type::UINT64: 0278 return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max), 0279 null_probability, alignment, memory_pool); 0280 case Type::INT64: 0281 return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max), 0282 null_probability, alignment, memory_pool); 0283 case Type::HALF_FLOAT: 0284 return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max), 0285 null_probability, alignment, memory_pool); 0286 case Type::FLOAT: 0287 return Float32(size, static_cast<float>(min), static_cast<float>(max), 0288 null_probability, /*nan_probability=*/0, alignment, memory_pool); 0289 case Type::DOUBLE: 0290 return Float64(size, static_cast<double>(min), static_cast<double>(max), 0291 null_probability, /*nan_probability=*/0, alignment, memory_pool); 0292 case Type::DATE64: 0293 return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max), 0294 null_probability, alignment, memory_pool); 0295 default: 0296 return nullptr; 0297 } 0298 } 0299 0300 /// \brief Generate a random Decimal32Array 0301 /// 0302 /// \param[in] type the type of the array to generate 0303 /// (must be an instance of Decimal32Type) 0304 /// \param[in] size the size of the array to generate 0305 /// \param[in] null_probability the probability of a value being null 0306 /// \param[in] alignment alignment for memory allocations (in bytes) 0307 /// \param[in] memory_pool memory pool to allocate memory from 0308 /// 0309 /// \return a generated Array 0310 std::shared_ptr<Array> Decimal32(std::shared_ptr<DataType> type, int64_t size, 0311 double null_probability = 0, 0312 int64_t alignment = kDefaultBufferAlignment, 0313 MemoryPool* memory_pool = default_memory_pool()); 0314 0315 /// \brief Generate a random Decimal64Array 0316 /// 0317 /// \param[in] type the type of the array to generate 0318 /// (must be an instance of Decimal64Type) 0319 /// \param[in] size the size of the array to generate 0320 /// \param[in] null_probability the probability of a value being null 0321 /// \param[in] alignment alignment for memory allocations (in bytes) 0322 /// \param[in] memory_pool memory pool to allocate memory from 0323 /// 0324 /// \return a generated Array 0325 std::shared_ptr<Array> Decimal64(std::shared_ptr<DataType> type, int64_t size, 0326 double null_probability = 0, 0327 int64_t alignment = kDefaultBufferAlignment, 0328 MemoryPool* memory_pool = default_memory_pool()); 0329 0330 /// \brief Generate a random Decimal128Array 0331 /// 0332 /// \param[in] type the type of the array to generate 0333 /// (must be an instance of Decimal128Type) 0334 /// \param[in] size the size of the array to generate 0335 /// \param[in] null_probability the probability of a value being null 0336 /// \param[in] alignment alignment for memory allocations (in bytes) 0337 /// \param[in] memory_pool memory pool to allocate memory from 0338 /// 0339 /// \return a generated Array 0340 std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size, 0341 double null_probability = 0, 0342 int64_t alignment = kDefaultBufferAlignment, 0343 MemoryPool* memory_pool = default_memory_pool()); 0344 0345 /// \brief Generate a random Decimal256Array 0346 /// 0347 /// \param[in] type the type of the array to generate 0348 /// (must be an instance of Decimal256Type) 0349 /// \param[in] size the size of the array to generate 0350 /// \param[in] null_probability the probability of a value being null 0351 /// \param[in] alignment alignment for memory allocations (in bytes) 0352 /// \param[in] memory_pool memory pool to allocate memory from 0353 /// 0354 /// \return a generated Array 0355 std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size, 0356 double null_probability = 0, 0357 int64_t alignment = kDefaultBufferAlignment, 0358 MemoryPool* memory_pool = default_memory_pool()); 0359 0360 /// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays) 0361 /// 0362 /// \param[in] size the size of the array to generate 0363 /// \param[in] first_offset the first offset value (usually 0) 0364 /// \param[in] last_offset the last offset value (usually the size of the child array) 0365 /// \param[in] null_probability the probability of an offset being null 0366 /// \param[in] force_empty_nulls if true, null offsets must have 0 "length" 0367 /// \param[in] alignment alignment for memory allocations (in bytes) 0368 /// \param[in] memory_pool memory pool to allocate memory from 0369 /// 0370 /// \return a generated Array 0371 std::shared_ptr<Array> Offsets(int64_t size, int32_t first_offset, int32_t last_offset, 0372 double null_probability = 0, 0373 bool force_empty_nulls = false, 0374 int64_t alignment = kDefaultBufferAlignment, 0375 MemoryPool* memory_pool = default_memory_pool()); 0376 0377 std::shared_ptr<Array> LargeOffsets(int64_t size, int64_t first_offset, 0378 int64_t last_offset, double null_probability = 0, 0379 bool force_empty_nulls = false, 0380 int64_t alignment = kDefaultBufferAlignment, 0381 MemoryPool* memory_pool = default_memory_pool()); 0382 0383 /// \brief Generate a random StringArray 0384 /// 0385 /// \param[in] size the size of the array to generate 0386 /// \param[in] min_length the lower bound of the string length 0387 /// determined by the uniform distribution 0388 /// \param[in] max_length the upper bound of the string length 0389 /// determined by the uniform distribution 0390 /// \param[in] null_probability the probability of a value being null 0391 /// \param[in] alignment alignment for memory allocations (in bytes) 0392 /// \param[in] memory_pool memory pool to allocate memory from 0393 /// 0394 /// \return a generated Array 0395 std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length, 0396 double null_probability = 0, 0397 int64_t alignment = kDefaultBufferAlignment, 0398 MemoryPool* memory_pool = default_memory_pool()); 0399 0400 /// \brief Generate a random StringViewArray 0401 /// 0402 /// \param[in] size the size of the array to generate 0403 /// \param[in] min_length the lower bound of the string length 0404 /// determined by the uniform distribution 0405 /// \param[in] max_length the upper bound of the string length 0406 /// determined by the uniform distribution 0407 /// \param[in] null_probability the probability of a value being null 0408 /// \param[in] max_data_buffer_length the data buffer size at which 0409 /// a new chunk will be generated 0410 /// \param[in] alignment alignment for memory allocations (in bytes) 0411 /// \param[in] memory_pool memory pool to allocate memory from 0412 /// 0413 /// \return a generated Array 0414 std::shared_ptr<Array> StringView(int64_t size, int32_t min_length, int32_t max_length, 0415 double null_probability = 0, 0416 std::optional<int64_t> max_data_buffer_length = {}, 0417 int64_t alignment = kDefaultBufferAlignment, 0418 MemoryPool* memory_pool = default_memory_pool()); 0419 0420 /// \brief Generate a random LargeStringArray 0421 /// 0422 /// \param[in] size the size of the array to generate 0423 /// \param[in] min_length the lower bound of the string length 0424 /// determined by the uniform distribution 0425 /// \param[in] max_length the upper bound of the string length 0426 /// determined by the uniform distribution 0427 /// \param[in] null_probability the probability of a value being null 0428 /// \param[in] alignment alignment for memory allocations (in bytes) 0429 /// \param[in] memory_pool memory pool to allocate memory from 0430 /// 0431 /// \return a generated Array 0432 std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length, 0433 double null_probability = 0, 0434 int64_t alignment = kDefaultBufferAlignment, 0435 MemoryPool* memory_pool = default_memory_pool()); 0436 0437 /// \brief Generate a random StringArray with repeated values 0438 /// 0439 /// \param[in] size the size of the array to generate 0440 /// \param[in] unique the number of unique string values used 0441 /// to populate the array 0442 /// \param[in] min_length the lower bound of the string length 0443 /// determined by the uniform distribution 0444 /// \param[in] max_length the upper bound of the string length 0445 /// determined by the uniform distribution 0446 /// \param[in] null_probability the probability of a value being null 0447 /// \param[in] alignment alignment for memory allocations (in bytes) 0448 /// \param[in] memory_pool memory pool to allocate memory from 0449 /// 0450 /// \return a generated Array 0451 std::shared_ptr<Array> StringWithRepeats( 0452 int64_t size, int64_t unique, int32_t min_length, int32_t max_length, 0453 double null_probability = 0, int64_t alignment = kDefaultBufferAlignment, 0454 MemoryPool* memory_pool = default_memory_pool()); 0455 0456 /// \brief Like StringWithRepeats but return BinaryArray 0457 std::shared_ptr<Array> BinaryWithRepeats( 0458 int64_t size, int64_t unique, int32_t min_length, int32_t max_length, 0459 double null_probability = 0, int64_t alignment = kDefaultBufferAlignment, 0460 MemoryPool* memory_pool = default_memory_pool()); 0461 0462 /// \brief Generate a random FixedSizeBinaryArray 0463 /// 0464 /// \param[in] size the size of the array to generate 0465 /// \param[in] byte_width the byte width of fixed-size binary items 0466 /// \param[in] null_probability the probability of a value being null 0467 /// \param[in] min_byte the lower bound of each byte in the binary determined by the 0468 /// uniform distribution 0469 /// \param[in] max_byte the upper bound of each byte in the binary determined by the 0470 /// uniform distribution 0471 /// \param[in] alignment alignment for memory allocations (in bytes) 0472 /// \param[in] memory_pool memory pool to allocate memory from 0473 /// 0474 /// \return a generated Array 0475 std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width, 0476 double null_probability = 0, 0477 uint8_t min_byte = static_cast<uint8_t>('A'), 0478 uint8_t max_byte = static_cast<uint8_t>('z'), 0479 int64_t alignment = kDefaultBufferAlignment, 0480 MemoryPool* memory_pool = default_memory_pool()); 0481 0482 /// \brief Generate a random ListArray 0483 /// 0484 /// \param[in] values The underlying values array 0485 /// \param[in] size The size of the generated list array 0486 /// \param[in] null_probability the probability of a list value being null 0487 /// \param[in] force_empty_nulls if true, null list entries must have 0 length 0488 /// \param[in] alignment alignment for memory allocations (in bytes) 0489 /// \param[in] memory_pool memory pool to allocate memory from 0490 /// 0491 /// \return a generated Array 0492 std::shared_ptr<Array> List(const Array& values, int64_t size, 0493 double null_probability = 0, bool force_empty_nulls = false, 0494 int64_t alignment = kDefaultBufferAlignment, 0495 MemoryPool* memory_pool = default_memory_pool()); 0496 0497 /// \brief Generate a random ListViewArray 0498 /// 0499 /// \param[in] values The underlying values array 0500 /// \param[in] size The size of the generated list array 0501 /// \param[in] null_probability the probability of a list value being null 0502 /// \param[in] force_empty_nulls if true, null list entries must have 0 length 0503 /// must be set to 0 0504 /// \param[in] coverage proportion of the values array covered by list-views 0505 /// \param[in] alignment alignment for memory allocations (in bytes) 0506 /// \param[in] memory_pool memory pool to allocate memory from 0507 /// 0508 /// \return a generated Array 0509 std::shared_ptr<Array> ListView(const Array& values, int64_t size, 0510 double null_probability = 0, 0511 bool force_empty_nulls = false, double coverage = 1.0, 0512 int64_t alignment = kDefaultBufferAlignment, 0513 MemoryPool* memory_pool = default_memory_pool()); 0514 0515 /// \brief Generate a random LargeListViewArray 0516 /// 0517 /// \param[in] values The underlying values array 0518 /// \param[in] size The size of the generated list array 0519 /// \param[in] null_probability the probability of a list value being null 0520 /// \param[in] force_empty_nulls if true, null list entries must have 0 length 0521 /// must be set to 0 0522 /// \param[in] coverage proportion of the values array covered by list-views 0523 /// \param[in] alignment alignment for memory allocations (in bytes) 0524 /// \param[in] memory_pool memory pool to allocate memory from 0525 /// 0526 /// \return a generated Array 0527 std::shared_ptr<Array> LargeListView(const Array& values, int64_t size, 0528 double null_probability = 0, 0529 bool force_empty_nulls = false, 0530 double coverage = 1.0, 0531 int64_t alignment = kDefaultBufferAlignment, 0532 MemoryPool* memory_pool = default_memory_pool()); 0533 0534 /// \brief Generate a random MapArray 0535 /// 0536 /// \param[in] keys The underlying keys array 0537 /// \param[in] items The underlying items array 0538 /// \param[in] size The size of the generated map array 0539 /// \param[in] null_probability the probability of a map value being null 0540 /// \param[in] force_empty_nulls if true, null map entries must have 0 length 0541 /// \param[in] alignment alignment for memory allocations (in bytes) 0542 /// \param[in] memory_pool memory pool to allocate memory from 0543 /// 0544 /// \return a generated Array 0545 std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys, 0546 const std::shared_ptr<Array>& items, int64_t size, 0547 double null_probability = 0, bool force_empty_nulls = false, 0548 int64_t alignment = kDefaultBufferAlignment, 0549 MemoryPool* memory_pool = default_memory_pool()); 0550 0551 /// \brief Generate a random RunEndEncodedArray 0552 /// 0553 /// \param[in] value_type The DataType of the encoded values 0554 /// \param[in] logical_size The logical length of the generated array 0555 /// \param[in] null_probability the probability of a value being null 0556 /// 0557 /// \return a generated Array 0558 std::shared_ptr<Array> RunEndEncoded(std::shared_ptr<DataType> value_type, 0559 int64_t logical_size, 0560 double null_probability = 0.0); 0561 0562 /// \brief Generate a random SparseUnionArray 0563 /// 0564 /// The type ids are chosen randomly, according to a uniform distribution, 0565 /// amongst the given child fields. 0566 /// 0567 /// \param[in] fields Vector of Arrays containing the data for each union field 0568 /// \param[in] size The size of the generated sparse union array 0569 /// \param[in] alignment alignment for memory allocations (in bytes) 0570 /// \param[in] memory_pool memory pool to allocate memory from 0571 std::shared_ptr<Array> SparseUnion(const ArrayVector& fields, int64_t size, 0572 int64_t alignment = kDefaultBufferAlignment, 0573 MemoryPool* memory_pool = default_memory_pool()); 0574 0575 /// \brief Generate a random DenseUnionArray 0576 /// 0577 /// The type ids are chosen randomly, according to a uniform distribution, 0578 /// amongst the given child fields. The offsets are incremented along 0579 /// each child field. 0580 /// 0581 /// \param[in] fields Vector of Arrays containing the data for each union field 0582 /// \param[in] size The size of the generated sparse union array 0583 /// \param[in] alignment alignment for memory allocations (in bytes) 0584 /// \param[in] memory_pool memory pool to allocate memory from 0585 std::shared_ptr<Array> DenseUnion(const ArrayVector& fields, int64_t size, 0586 int64_t alignment = kDefaultBufferAlignment, 0587 MemoryPool* memory_pool = default_memory_pool()); 0588 0589 /// \brief Generate a random Array of the specified type, size, and null_probability. 0590 /// 0591 /// Generation parameters other than size and null_probability are determined based on 0592 /// the type of Array to be generated. 0593 /// If boolean the probabilities of true,false values are 0.25,0.75 respectively. 0594 /// If numeric min,max will be the least and greatest representable values. 0595 /// If string min_length,max_length will be 0,sqrt(size) respectively. 0596 /// 0597 /// \param[in] type the type of Array to generate 0598 /// \param[in] size the size of the Array to generate 0599 /// \param[in] null_probability the probability of a slot being null 0600 /// \param[in] alignment alignment for memory allocations (in bytes) 0601 /// \param[in] memory_pool memory pool to allocate memory from 0602 /// \return a generated Array 0603 std::shared_ptr<Array> ArrayOf(std::shared_ptr<DataType> type, int64_t size, 0604 double null_probability = 0, 0605 int64_t alignment = kDefaultBufferAlignment, 0606 MemoryPool* memory_pool = default_memory_pool()); 0607 0608 /// \brief Generate an array with random data based on the given field. See BatchOf 0609 /// for usage info. 0610 std::shared_ptr<Array> ArrayOf(const Field& field, int64_t size, 0611 int64_t alignment = kDefaultBufferAlignment, 0612 MemoryPool* memory_pool = default_memory_pool()); 0613 0614 /// \brief Generate a record batch with random data of the specified length. 0615 /// 0616 /// Generation options are read from key-value metadata for each field, and may be 0617 /// specified at any nesting level. For example, generation options for the child 0618 /// values of a list array can be specified by constructing the list type with 0619 /// list(field("item", int8(), options_metadata)) 0620 /// 0621 /// The following options are supported: 0622 /// 0623 /// For all types except NullType: 0624 /// - null_probability (double): range [0.0, 1.0] the probability of a null value. 0625 /// Default/value is 0.0 if the field is marked non-nullable, else it is 0.01 0626 /// 0627 /// For all numeric types T: 0628 /// - min (T::c_type): the minimum value to generate (inclusive), default 0629 /// std::numeric_limits<T::c_type>::min() 0630 /// - max (T::c_type): the maximum value to generate (inclusive), default 0631 /// std::numeric_limits<T::c_type>::max() 0632 /// Note this means that, for example, min/max are int16_t values for HalfFloatType. 0633 /// 0634 /// For floating point types T for which is_physical_floating_type<T>: 0635 /// - nan_probability (double): range [0.0, 1.0] the probability of a NaN value. 0636 /// 0637 /// For BooleanType: 0638 /// - true_probability (double): range [0.0, 1.0] the probability of a true. 0639 /// 0640 /// For DictionaryType: 0641 /// - values (int32_t): the size of the dictionary. 0642 /// Other properties are passed to the generator for the dictionary indices. However, 0643 /// min and max cannot be specified. Note it is not possible to otherwise customize 0644 /// the generation of dictionary values. 0645 /// 0646 /// For list, string, and binary types T, including their large variants: 0647 /// - min_length (T::offset_type): the minimum length of the child to generate, 0648 /// default 0 0649 /// - max_length (T::offset_type): the minimum length of the child to generate, 0650 /// default 1024 0651 /// 0652 /// For string and binary types T (not including their large or view variants): 0653 /// - unique (int32_t): if positive, this many distinct values will be generated 0654 /// and all array values will be one of these values, default -1 0655 /// 0656 /// For string and binary view types T: 0657 /// - max_data_buffer_length (int64_t): the data buffer size at which a new chunk 0658 /// will be generated, default 32KB 0659 /// 0660 /// For MapType: 0661 /// - values (int32_t): the number of key-value pairs to generate, which will be 0662 /// partitioned among the array values. 0663 /// 0664 /// For extension types: 0665 /// - extension_allow_random_storage (bool): in general an extension array may have 0666 /// invariants on its storage beyond those already imposed by the arrow format, 0667 /// which may result in an invalid array if we just wrap randomly generated 0668 /// storage. Set this flag to explicitly allow wrapping of randomly generated 0669 /// storage. 0670 std::shared_ptr<arrow::RecordBatch> BatchOf( 0671 const FieldVector& fields, int64_t size, 0672 int64_t alignment = kDefaultBufferAlignment, 0673 MemoryPool* memory_pool = default_memory_pool()); 0674 0675 SeedType seed() { return seed_distribution_(seed_rng_); } 0676 0677 private: 0678 std::uniform_int_distribution<SeedType> seed_distribution_; 0679 std::default_random_engine seed_rng_; 0680 }; 0681 0682 /// Generate a batch with random data. See RandomArrayGenerator::BatchOf. 0683 ARROW_TESTING_EXPORT 0684 std::shared_ptr<arrow::RecordBatch> GenerateBatch( 0685 const FieldVector& fields, int64_t size, SeedType seed, 0686 int64_t alignment = kDefaultBufferAlignment, 0687 MemoryPool* memory_pool = default_memory_pool()); 0688 0689 /// Generate an array with random data. See RandomArrayGenerator::BatchOf. 0690 ARROW_TESTING_EXPORT 0691 std::shared_ptr<arrow::Array> GenerateArray( 0692 const Field& field, int64_t size, SeedType seed, 0693 int64_t alignment = kDefaultBufferAlignment, 0694 MemoryPool* memory_pool = default_memory_pool()); 0695 0696 } // namespace random 0697 0698 // 0699 // Assorted functions 0700 // 0701 0702 ARROW_TESTING_EXPORT 0703 void rand_day_millis(int64_t N, std::vector<DayTimeIntervalType::DayMilliseconds>* out); 0704 ARROW_TESTING_EXPORT 0705 void rand_month_day_nanos(int64_t N, 0706 std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out); 0707 0708 template <typename T, typename U> 0709 void randint(int64_t N, T lower, T upper, std::vector<U>* out) { 0710 const int random_seed = 0; 0711 std::default_random_engine gen(random_seed); 0712 std::uniform_int_distribution<T> d(lower, upper); 0713 out->resize(N, static_cast<T>(0)); 0714 std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); }); 0715 } 0716 0717 template <typename T, typename U> 0718 void random_real(int64_t n, uint32_t seed, T min_value, T max_value, 0719 std::vector<U>* out) { 0720 std::default_random_engine gen(seed); 0721 ::arrow::random::uniform_real_distribution<T> d(min_value, max_value); 0722 out->resize(n, static_cast<T>(0)); 0723 std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); }); 0724 } 0725 0726 template <typename T, typename U> 0727 void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) { 0728 assert(out || (n == 0)); 0729 std::default_random_engine gen(seed); 0730 std::uniform_int_distribution<T> d(min_value, max_value); 0731 std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); }); 0732 } 0733 0734 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |