Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:01

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <algorithm>
0021 #include <cassert>
0022 #include <cstdint>
0023 #include <limits>
0024 #include <memory>
0025 #include <random>
0026 #include <vector>
0027 
0028 #include "arrow/testing/uniform_real.h"
0029 #include "arrow/testing/visibility.h"
0030 #include "arrow/type.h"
0031 
0032 namespace arrow {
0033 
0034 class Array;
0035 
0036 namespace random {
0037 
0038 using SeedType = int32_t;
0039 constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();
0040 
0041 class ARROW_TESTING_EXPORT RandomArrayGenerator {
0042  public:
0043   explicit RandomArrayGenerator(SeedType seed)
0044       : seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {}
0045 
0046   /// \brief Generate a null bitmap
0047   ///
0048   /// \param[in] size the size of the bitmap to generate
0049   /// \param[in] null_probability the probability of a bit being zero
0050   /// \param[in] alignment alignment for memory allocations (in bytes)
0051   /// \param[in] memory_pool memory pool to allocate memory from
0052   ///
0053   /// \return a generated Buffer
0054   std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0,
0055                                      int64_t alignment = kDefaultBufferAlignment,
0056                                      MemoryPool* memory_pool = default_memory_pool());
0057 
0058   /// \brief Generate a random BooleanArray
0059   ///
0060   /// \param[in] size the size of the array to generate
0061   /// \param[in] true_probability the probability of a value being 1 / bit-set
0062   /// \param[in] null_probability the probability of a value being null
0063   /// \param[in] alignment alignment for memory allocations (in bytes)
0064   /// \param[in] memory_pool memory pool to allocate memory from
0065   ///
0066   /// \return a generated Array
0067   std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
0068                                  double null_probability = 0,
0069                                  int64_t alignment = kDefaultBufferAlignment,
0070                                  MemoryPool* memory_pool = default_memory_pool());
0071   /// \brief Generate a random UInt8Array
0072   ///
0073   /// \param[in] size the size of the array to generate
0074   /// \param[in] min the lower bound of the uniform distribution
0075   /// \param[in] max the upper bound of the uniform distribution
0076   /// \param[in] null_probability the probability of a value being null
0077   /// \param[in] alignment alignment for memory allocations (in bytes)
0078   /// \param[in] memory_pool memory pool to allocate memory from
0079   ///
0080   /// \return a generated Array
0081   std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
0082                                double null_probability = 0,
0083                                int64_t alignment = kDefaultBufferAlignment,
0084                                MemoryPool* memory_pool = default_memory_pool());
0085 
0086   /// \brief Generate a random Int8Array
0087   ///
0088   /// \param[in] size the size of the array to generate
0089   /// \param[in] min the lower bound of the uniform distribution
0090   /// \param[in] max the upper bound of the uniform distribution
0091   /// \param[in] null_probability the probability of a value being null
0092   /// \param[in] alignment alignment for memory allocations (in bytes)
0093   /// \param[in] memory_pool memory pool to allocate memory from
0094   ///
0095   /// \return a generated Array
0096   std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
0097                               double null_probability = 0,
0098                               int64_t alignment = kDefaultBufferAlignment,
0099                               MemoryPool* memory_pool = default_memory_pool());
0100 
0101   /// \brief Generate a random UInt16Array
0102   ///
0103   /// \param[in] size the size of the array to generate
0104   /// \param[in] min the lower bound of the uniform distribution
0105   /// \param[in] max the upper bound of the uniform distribution
0106   /// \param[in] null_probability the probability of a value being null
0107   /// \param[in] alignment alignment for memory allocations (in bytes)
0108   /// \param[in] memory_pool memory pool to allocate memory from
0109   ///
0110   /// \return a generated Array
0111   std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
0112                                 double null_probability = 0,
0113                                 int64_t alignment = kDefaultBufferAlignment,
0114                                 MemoryPool* memory_pool = default_memory_pool());
0115 
0116   /// \brief Generate a random Int16Array
0117   ///
0118   /// \param[in] size the size of the array to generate
0119   /// \param[in] min the lower bound of the uniform distribution
0120   /// \param[in] max the upper bound of the uniform distribution
0121   /// \param[in] null_probability the probability of a value being null
0122   /// \param[in] alignment alignment for memory allocations (in bytes)
0123   /// \param[in] memory_pool memory pool to allocate memory from
0124   ///
0125   /// \return a generated Array
0126   std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
0127                                double null_probability = 0,
0128                                int64_t alignment = kDefaultBufferAlignment,
0129                                MemoryPool* memory_pool = default_memory_pool());
0130 
0131   /// \brief Generate a random UInt32Array
0132   ///
0133   /// \param[in] size the size of the array to generate
0134   /// \param[in] min the lower bound of the uniform distribution
0135   /// \param[in] max the upper bound of the uniform distribution
0136   /// \param[in] null_probability the probability of a value being null
0137   /// \param[in] alignment alignment for memory allocations (in bytes)
0138   /// \param[in] memory_pool memory pool to allocate memory from
0139   ///
0140   /// \return a generated Array
0141   std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
0142                                 double null_probability = 0,
0143                                 int64_t alignment = kDefaultBufferAlignment,
0144                                 MemoryPool* memory_pool = default_memory_pool());
0145 
0146   /// \brief Generate a random Int32Array
0147   ///
0148   /// \param[in] size the size of the array to generate
0149   /// \param[in] min the lower bound of the uniform distribution
0150   /// \param[in] max the upper bound of the uniform distribution
0151   /// \param[in] null_probability the probability of a value being null
0152   /// \param[in] alignment alignment for memory allocations (in bytes)
0153   /// \param[in] memory_pool memory pool to allocate memory from
0154   ///
0155   /// \return a generated Array
0156   std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
0157                                double null_probability = 0,
0158                                int64_t alignment = kDefaultBufferAlignment,
0159                                MemoryPool* memory_pool = default_memory_pool());
0160 
0161   /// \brief Generate a random UInt64Array
0162   ///
0163   /// \param[in] size the size of the array to generate
0164   /// \param[in] min the lower bound of the uniform distribution
0165   /// \param[in] max the upper bound of the uniform distribution
0166   /// \param[in] null_probability the probability of a value being null
0167   /// \param[in] alignment alignment for memory allocations (in bytes)
0168   /// \param[in] memory_pool memory pool to allocate memory from
0169   ///
0170   /// \return a generated Array
0171   std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
0172                                 double null_probability = 0,
0173                                 int64_t alignment = kDefaultBufferAlignment,
0174                                 MemoryPool* memory_pool = default_memory_pool());
0175 
0176   /// \brief Generate a random Int64Array
0177   ///
0178   /// \param[in] size the size of the array to generate
0179   /// \param[in] min the lower bound of the uniform distribution
0180   /// \param[in] max the upper bound of the uniform distribution
0181   /// \param[in] null_probability the probability of a value being null
0182   /// \param[in] alignment alignment for memory allocations (in bytes)
0183   /// \param[in] memory_pool memory pool to allocate memory from
0184   ///
0185   /// \return a generated Array
0186   std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
0187                                double null_probability = 0,
0188                                int64_t alignment = kDefaultBufferAlignment,
0189                                MemoryPool* memory_pool = default_memory_pool());
0190 
0191   /// \brief Generate a random HalfFloatArray
0192   ///
0193   /// \param[in] size the size of the array to generate
0194   /// \param[in] min the lower bound of the distribution
0195   /// \param[in] max the upper bound of the distribution
0196   /// \param[in] null_probability the probability of a value being null
0197   /// \param[in] alignment alignment for memory allocations (in bytes)
0198   /// \param[in] memory_pool memory pool to allocate memory from
0199   ///
0200   /// \return a generated Array
0201   std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
0202                                  double null_probability = 0,
0203                                  int64_t alignment = kDefaultBufferAlignment,
0204                                  MemoryPool* memory_pool = default_memory_pool());
0205 
0206   /// \brief Generate a random FloatArray
0207   ///
0208   /// \param[in] size the size of the array to generate
0209   /// \param[in] min the lower bound of the uniform distribution
0210   /// \param[in] max the upper bound of the uniform distribution
0211   /// \param[in] null_probability the probability of a value being null
0212   /// \param[in] nan_probability the probability of a value being NaN
0213   /// \param[in] alignment alignment for memory allocations (in bytes)
0214   /// \param[in] memory_pool memory pool to allocate memory from
0215   ///
0216   /// \return a generated Array
0217   std::shared_ptr<Array> Float32(int64_t size, float min, float max,
0218                                  double null_probability = 0, double nan_probability = 0,
0219                                  int64_t alignment = kDefaultBufferAlignment,
0220                                  MemoryPool* memory_pool = default_memory_pool());
0221 
0222   /// \brief Generate a random DoubleArray
0223   ///
0224   /// \param[in] size the size of the array to generate
0225   /// \param[in] min the lower bound of the uniform distribution
0226   /// \param[in] max the upper bound of the uniform distribution
0227   /// \param[in] null_probability the probability of a value being null
0228   /// \param[in] nan_probability the probability of a value being NaN
0229   /// \param[in] alignment alignment for memory allocations (in bytes)
0230   /// \param[in] memory_pool memory pool to allocate memory from
0231   ///
0232   /// \return a generated Array
0233   std::shared_ptr<Array> Float64(int64_t size, double min, double max,
0234                                  double null_probability = 0, double nan_probability = 0,
0235                                  int64_t alignment = kDefaultBufferAlignment,
0236                                  MemoryPool* memory_pool = default_memory_pool());
0237 
0238   /// \brief Generate a random Date64Array
0239   ///
0240   /// \param[in] size the size of the array to generate
0241   /// \param[in] min the lower bound of the uniform distribution
0242   /// \param[in] max the upper bound of the uniform distribution
0243   /// \param[in] null_probability the probability of a value being null
0244   /// \param[in] alignment alignment for memory allocations (in bytes)
0245   /// \param[in] memory_pool memory pool to allocate memory from
0246   ///
0247   /// \return a generated Array
0248   std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max,
0249                                 double null_probability = 0,
0250                                 int64_t alignment = kDefaultBufferAlignment,
0251                                 MemoryPool* memory_pool = default_memory_pool());
0252 
0253   template <typename ArrowType, typename CType = typename ArrowType::c_type>
0254   std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max,
0255                                  double null_probability = 0,
0256                                  int64_t alignment = kDefaultBufferAlignment,
0257                                  MemoryPool* memory_pool = default_memory_pool()) {
0258     switch (ArrowType::type_id) {
0259       case Type::UINT8:
0260         return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max),
0261                      null_probability, alignment, memory_pool);
0262       case Type::INT8:
0263         return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max),
0264                     null_probability, alignment, memory_pool);
0265       case Type::UINT16:
0266         return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max),
0267                       null_probability, alignment, memory_pool);
0268       case Type::INT16:
0269         return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
0270                      null_probability, alignment, memory_pool);
0271       case Type::UINT32:
0272         return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max),
0273                       null_probability, alignment, memory_pool);
0274       case Type::INT32:
0275         return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max),
0276                      null_probability, alignment, memory_pool);
0277       case Type::UINT64:
0278         return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max),
0279                       null_probability, alignment, memory_pool);
0280       case Type::INT64:
0281         return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
0282                      null_probability, alignment, memory_pool);
0283       case Type::HALF_FLOAT:
0284         return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
0285                        null_probability, alignment, memory_pool);
0286       case Type::FLOAT:
0287         return Float32(size, static_cast<float>(min), static_cast<float>(max),
0288                        null_probability, /*nan_probability=*/0, alignment, memory_pool);
0289       case Type::DOUBLE:
0290         return Float64(size, static_cast<double>(min), static_cast<double>(max),
0291                        null_probability, /*nan_probability=*/0, alignment, memory_pool);
0292       case Type::DATE64:
0293         return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
0294                       null_probability, alignment, memory_pool);
0295       default:
0296         return nullptr;
0297     }
0298   }
0299 
0300   /// \brief Generate a random Decimal32Array
0301   ///
0302   /// \param[in] type the type of the array to generate
0303   ///            (must be an instance of Decimal32Type)
0304   /// \param[in] size the size of the array to generate
0305   /// \param[in] null_probability the probability of a value being null
0306   /// \param[in] alignment alignment for memory allocations (in bytes)
0307   /// \param[in] memory_pool memory pool to allocate memory from
0308   ///
0309   /// \return a generated Array
0310   std::shared_ptr<Array> Decimal32(std::shared_ptr<DataType> type, int64_t size,
0311                                    double null_probability = 0,
0312                                    int64_t alignment = kDefaultBufferAlignment,
0313                                    MemoryPool* memory_pool = default_memory_pool());
0314 
0315   /// \brief Generate a random Decimal64Array
0316   ///
0317   /// \param[in] type the type of the array to generate
0318   ///            (must be an instance of Decimal64Type)
0319   /// \param[in] size the size of the array to generate
0320   /// \param[in] null_probability the probability of a value being null
0321   /// \param[in] alignment alignment for memory allocations (in bytes)
0322   /// \param[in] memory_pool memory pool to allocate memory from
0323   ///
0324   /// \return a generated Array
0325   std::shared_ptr<Array> Decimal64(std::shared_ptr<DataType> type, int64_t size,
0326                                    double null_probability = 0,
0327                                    int64_t alignment = kDefaultBufferAlignment,
0328                                    MemoryPool* memory_pool = default_memory_pool());
0329 
0330   /// \brief Generate a random Decimal128Array
0331   ///
0332   /// \param[in] type the type of the array to generate
0333   ///            (must be an instance of Decimal128Type)
0334   /// \param[in] size the size of the array to generate
0335   /// \param[in] null_probability the probability of a value being null
0336   /// \param[in] alignment alignment for memory allocations (in bytes)
0337   /// \param[in] memory_pool memory pool to allocate memory from
0338   ///
0339   /// \return a generated Array
0340   std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
0341                                     double null_probability = 0,
0342                                     int64_t alignment = kDefaultBufferAlignment,
0343                                     MemoryPool* memory_pool = default_memory_pool());
0344 
0345   /// \brief Generate a random Decimal256Array
0346   ///
0347   /// \param[in] type the type of the array to generate
0348   ///            (must be an instance of Decimal256Type)
0349   /// \param[in] size the size of the array to generate
0350   /// \param[in] null_probability the probability of a value being null
0351   /// \param[in] alignment alignment for memory allocations (in bytes)
0352   /// \param[in] memory_pool memory pool to allocate memory from
0353   ///
0354   /// \return a generated Array
0355   std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
0356                                     double null_probability = 0,
0357                                     int64_t alignment = kDefaultBufferAlignment,
0358                                     MemoryPool* memory_pool = default_memory_pool());
0359 
0360   /// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
0361   ///
0362   /// \param[in] size the size of the array to generate
0363   /// \param[in] first_offset the first offset value (usually 0)
0364   /// \param[in] last_offset the last offset value (usually the size of the child array)
0365   /// \param[in] null_probability the probability of an offset being null
0366   /// \param[in] force_empty_nulls if true, null offsets must have 0 "length"
0367   /// \param[in] alignment alignment for memory allocations (in bytes)
0368   /// \param[in] memory_pool memory pool to allocate memory from
0369   ///
0370   /// \return a generated Array
0371   std::shared_ptr<Array> Offsets(int64_t size, int32_t first_offset, int32_t last_offset,
0372                                  double null_probability = 0,
0373                                  bool force_empty_nulls = false,
0374                                  int64_t alignment = kDefaultBufferAlignment,
0375                                  MemoryPool* memory_pool = default_memory_pool());
0376 
0377   std::shared_ptr<Array> LargeOffsets(int64_t size, int64_t first_offset,
0378                                       int64_t last_offset, double null_probability = 0,
0379                                       bool force_empty_nulls = false,
0380                                       int64_t alignment = kDefaultBufferAlignment,
0381                                       MemoryPool* memory_pool = default_memory_pool());
0382 
0383   /// \brief Generate a random StringArray
0384   ///
0385   /// \param[in] size the size of the array to generate
0386   /// \param[in] min_length the lower bound of the string length
0387   ///            determined by the uniform distribution
0388   /// \param[in] max_length the upper bound of the string length
0389   ///            determined by the uniform distribution
0390   /// \param[in] null_probability the probability of a value being null
0391   /// \param[in] alignment alignment for memory allocations (in bytes)
0392   /// \param[in] memory_pool memory pool to allocate memory from
0393   ///
0394   /// \return a generated Array
0395   std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
0396                                 double null_probability = 0,
0397                                 int64_t alignment = kDefaultBufferAlignment,
0398                                 MemoryPool* memory_pool = default_memory_pool());
0399 
0400   /// \brief Generate a random StringViewArray
0401   ///
0402   /// \param[in] size the size of the array to generate
0403   /// \param[in] min_length the lower bound of the string length
0404   ///            determined by the uniform distribution
0405   /// \param[in] max_length the upper bound of the string length
0406   ///            determined by the uniform distribution
0407   /// \param[in] null_probability the probability of a value being null
0408   /// \param[in] max_data_buffer_length the data buffer size at which
0409   ///            a new chunk will be generated
0410   /// \param[in] alignment alignment for memory allocations (in bytes)
0411   /// \param[in] memory_pool memory pool to allocate memory from
0412   ///
0413   /// \return a generated Array
0414   std::shared_ptr<Array> StringView(int64_t size, int32_t min_length, int32_t max_length,
0415                                     double null_probability = 0,
0416                                     std::optional<int64_t> max_data_buffer_length = {},
0417                                     int64_t alignment = kDefaultBufferAlignment,
0418                                     MemoryPool* memory_pool = default_memory_pool());
0419 
0420   /// \brief Generate a random LargeStringArray
0421   ///
0422   /// \param[in] size the size of the array to generate
0423   /// \param[in] min_length the lower bound of the string length
0424   ///            determined by the uniform distribution
0425   /// \param[in] max_length the upper bound of the string length
0426   ///            determined by the uniform distribution
0427   /// \param[in] null_probability the probability of a value being null
0428   /// \param[in] alignment alignment for memory allocations (in bytes)
0429   /// \param[in] memory_pool memory pool to allocate memory from
0430   ///
0431   /// \return a generated Array
0432   std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
0433                                      double null_probability = 0,
0434                                      int64_t alignment = kDefaultBufferAlignment,
0435                                      MemoryPool* memory_pool = default_memory_pool());
0436 
0437   /// \brief Generate a random StringArray with repeated values
0438   ///
0439   /// \param[in] size the size of the array to generate
0440   /// \param[in] unique the number of unique string values used
0441   ///            to populate the array
0442   /// \param[in] min_length the lower bound of the string length
0443   ///            determined by the uniform distribution
0444   /// \param[in] max_length the upper bound of the string length
0445   ///            determined by the uniform distribution
0446   /// \param[in] null_probability the probability of a value being null
0447   /// \param[in] alignment alignment for memory allocations (in bytes)
0448   /// \param[in] memory_pool memory pool to allocate memory from
0449   ///
0450   /// \return a generated Array
0451   std::shared_ptr<Array> StringWithRepeats(
0452       int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
0453       double null_probability = 0, int64_t alignment = kDefaultBufferAlignment,
0454       MemoryPool* memory_pool = default_memory_pool());
0455 
0456   /// \brief Like StringWithRepeats but return BinaryArray
0457   std::shared_ptr<Array> BinaryWithRepeats(
0458       int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
0459       double null_probability = 0, int64_t alignment = kDefaultBufferAlignment,
0460       MemoryPool* memory_pool = default_memory_pool());
0461 
0462   /// \brief Generate a random FixedSizeBinaryArray
0463   ///
0464   /// \param[in] size the size of the array to generate
0465   /// \param[in] byte_width the byte width of fixed-size binary items
0466   /// \param[in] null_probability the probability of a value being null
0467   /// \param[in] min_byte the lower bound of each byte in the binary determined by the
0468   ///            uniform distribution
0469   /// \param[in] max_byte the upper bound of each byte in the binary determined by the
0470   ///            uniform distribution
0471   /// \param[in] alignment alignment for memory allocations (in bytes)
0472   /// \param[in] memory_pool memory pool to allocate memory from
0473   ///
0474   /// \return a generated Array
0475   std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
0476                                          double null_probability = 0,
0477                                          uint8_t min_byte = static_cast<uint8_t>('A'),
0478                                          uint8_t max_byte = static_cast<uint8_t>('z'),
0479                                          int64_t alignment = kDefaultBufferAlignment,
0480                                          MemoryPool* memory_pool = default_memory_pool());
0481 
0482   /// \brief Generate a random ListArray
0483   ///
0484   /// \param[in] values The underlying values array
0485   /// \param[in] size The size of the generated list array
0486   /// \param[in] null_probability the probability of a list value being null
0487   /// \param[in] force_empty_nulls if true, null list entries must have 0 length
0488   /// \param[in] alignment alignment for memory allocations (in bytes)
0489   /// \param[in] memory_pool memory pool to allocate memory from
0490   ///
0491   /// \return a generated Array
0492   std::shared_ptr<Array> List(const Array& values, int64_t size,
0493                               double null_probability = 0, bool force_empty_nulls = false,
0494                               int64_t alignment = kDefaultBufferAlignment,
0495                               MemoryPool* memory_pool = default_memory_pool());
0496 
0497   /// \brief Generate a random ListViewArray
0498   ///
0499   /// \param[in] values The underlying values array
0500   /// \param[in] size The size of the generated list array
0501   /// \param[in] null_probability the probability of a list value being null
0502   /// \param[in] force_empty_nulls if true, null list entries must have 0 length
0503   /// must be set to 0
0504   /// \param[in] coverage proportion of the values array covered by list-views
0505   /// \param[in] alignment alignment for memory allocations (in bytes)
0506   /// \param[in] memory_pool memory pool to allocate memory from
0507   ///
0508   /// \return a generated Array
0509   std::shared_ptr<Array> ListView(const Array& values, int64_t size,
0510                                   double null_probability = 0,
0511                                   bool force_empty_nulls = false, double coverage = 1.0,
0512                                   int64_t alignment = kDefaultBufferAlignment,
0513                                   MemoryPool* memory_pool = default_memory_pool());
0514 
0515   /// \brief Generate a random LargeListViewArray
0516   ///
0517   /// \param[in] values The underlying values array
0518   /// \param[in] size The size of the generated list array
0519   /// \param[in] null_probability the probability of a list value being null
0520   /// \param[in] force_empty_nulls if true, null list entries must have 0 length
0521   /// must be set to 0
0522   /// \param[in] coverage proportion of the values array covered by list-views
0523   /// \param[in] alignment alignment for memory allocations (in bytes)
0524   /// \param[in] memory_pool memory pool to allocate memory from
0525   ///
0526   /// \return a generated Array
0527   std::shared_ptr<Array> LargeListView(const Array& values, int64_t size,
0528                                        double null_probability = 0,
0529                                        bool force_empty_nulls = false,
0530                                        double coverage = 1.0,
0531                                        int64_t alignment = kDefaultBufferAlignment,
0532                                        MemoryPool* memory_pool = default_memory_pool());
0533 
0534   /// \brief Generate a random MapArray
0535   ///
0536   /// \param[in] keys The underlying keys array
0537   /// \param[in] items The underlying items array
0538   /// \param[in] size The size of the generated map array
0539   /// \param[in] null_probability the probability of a map value being null
0540   /// \param[in] force_empty_nulls if true, null map entries must have 0 length
0541   /// \param[in] alignment alignment for memory allocations (in bytes)
0542   /// \param[in] memory_pool memory pool to allocate memory from
0543   ///
0544   /// \return a generated Array
0545   std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
0546                              const std::shared_ptr<Array>& items, int64_t size,
0547                              double null_probability = 0, bool force_empty_nulls = false,
0548                              int64_t alignment = kDefaultBufferAlignment,
0549                              MemoryPool* memory_pool = default_memory_pool());
0550 
0551   /// \brief Generate a random RunEndEncodedArray
0552   ///
0553   /// \param[in] value_type The DataType of the encoded values
0554   /// \param[in] logical_size The logical length of the generated array
0555   /// \param[in] null_probability the probability of a value being null
0556   ///
0557   /// \return a generated Array
0558   std::shared_ptr<Array> RunEndEncoded(std::shared_ptr<DataType> value_type,
0559                                        int64_t logical_size,
0560                                        double null_probability = 0.0);
0561 
0562   /// \brief Generate a random SparseUnionArray
0563   ///
0564   /// The type ids are chosen randomly, according to a uniform distribution,
0565   /// amongst the given child fields.
0566   ///
0567   /// \param[in] fields Vector of Arrays containing the data for each union field
0568   /// \param[in] size The size of the generated sparse union array
0569   /// \param[in] alignment alignment for memory allocations (in bytes)
0570   /// \param[in] memory_pool memory pool to allocate memory from
0571   std::shared_ptr<Array> SparseUnion(const ArrayVector& fields, int64_t size,
0572                                      int64_t alignment = kDefaultBufferAlignment,
0573                                      MemoryPool* memory_pool = default_memory_pool());
0574 
0575   /// \brief Generate a random DenseUnionArray
0576   ///
0577   /// The type ids are chosen randomly, according to a uniform distribution,
0578   /// amongst the given child fields.  The offsets are incremented along
0579   /// each child field.
0580   ///
0581   /// \param[in] fields Vector of Arrays containing the data for each union field
0582   /// \param[in] size The size of the generated sparse union array
0583   /// \param[in] alignment alignment for memory allocations (in bytes)
0584   /// \param[in] memory_pool memory pool to allocate memory from
0585   std::shared_ptr<Array> DenseUnion(const ArrayVector& fields, int64_t size,
0586                                     int64_t alignment = kDefaultBufferAlignment,
0587                                     MemoryPool* memory_pool = default_memory_pool());
0588 
0589   /// \brief Generate a random Array of the specified type, size, and null_probability.
0590   ///
0591   /// Generation parameters other than size and null_probability are determined based on
0592   /// the type of Array to be generated.
0593   /// If boolean the probabilities of true,false values are 0.25,0.75 respectively.
0594   /// If numeric min,max will be the least and greatest representable values.
0595   /// If string min_length,max_length will be 0,sqrt(size) respectively.
0596   ///
0597   /// \param[in] type the type of Array to generate
0598   /// \param[in] size the size of the Array to generate
0599   /// \param[in] null_probability the probability of a slot being null
0600   /// \param[in] alignment alignment for memory allocations (in bytes)
0601   /// \param[in] memory_pool memory pool to allocate memory from
0602   /// \return a generated Array
0603   std::shared_ptr<Array> ArrayOf(std::shared_ptr<DataType> type, int64_t size,
0604                                  double null_probability = 0,
0605                                  int64_t alignment = kDefaultBufferAlignment,
0606                                  MemoryPool* memory_pool = default_memory_pool());
0607 
0608   /// \brief Generate an array with random data based on the given field. See BatchOf
0609   /// for usage info.
0610   std::shared_ptr<Array> ArrayOf(const Field& field, int64_t size,
0611                                  int64_t alignment = kDefaultBufferAlignment,
0612                                  MemoryPool* memory_pool = default_memory_pool());
0613 
0614   /// \brief Generate a record batch with random data of the specified length.
0615   ///
0616   /// Generation options are read from key-value metadata for each field, and may be
0617   /// specified at any nesting level. For example, generation options for the child
0618   /// values of a list array can be specified by constructing the list type with
0619   /// list(field("item", int8(), options_metadata))
0620   ///
0621   /// The following options are supported:
0622   ///
0623   /// For all types except NullType:
0624   /// - null_probability (double): range [0.0, 1.0] the probability of a null value.
0625   /// Default/value is 0.0 if the field is marked non-nullable, else it is 0.01
0626   ///
0627   /// For all numeric types T:
0628   /// - min (T::c_type): the minimum value to generate (inclusive), default
0629   ///   std::numeric_limits<T::c_type>::min()
0630   /// - max (T::c_type): the maximum value to generate (inclusive), default
0631   ///   std::numeric_limits<T::c_type>::max()
0632   /// Note this means that, for example, min/max are int16_t values for HalfFloatType.
0633   ///
0634   /// For floating point types T for which is_physical_floating_type<T>:
0635   /// - nan_probability (double): range [0.0, 1.0] the probability of a NaN value.
0636   ///
0637   /// For BooleanType:
0638   /// - true_probability (double): range [0.0, 1.0] the probability of a true.
0639   ///
0640   /// For DictionaryType:
0641   /// - values (int32_t): the size of the dictionary.
0642   /// Other properties are passed to the generator for the dictionary indices. However,
0643   /// min and max cannot be specified. Note it is not possible to otherwise customize
0644   /// the generation of dictionary values.
0645   ///
0646   /// For list, string, and binary types T, including their large variants:
0647   /// - min_length (T::offset_type): the minimum length of the child to generate,
0648   ///   default 0
0649   /// - max_length (T::offset_type): the minimum length of the child to generate,
0650   ///   default 1024
0651   ///
0652   /// For string and binary types T (not including their large or view variants):
0653   /// - unique (int32_t): if positive, this many distinct values will be generated
0654   ///   and all array values will be one of these values, default -1
0655   ///
0656   /// For string and binary view types T:
0657   /// - max_data_buffer_length (int64_t): the data buffer size at which a new chunk
0658   ///   will be generated, default 32KB
0659   ///
0660   /// For MapType:
0661   /// - values (int32_t): the number of key-value pairs to generate, which will be
0662   ///   partitioned among the array values.
0663   ///
0664   /// For extension types:
0665   /// - extension_allow_random_storage (bool): in general an extension array may have
0666   ///   invariants on its storage beyond those already imposed by the arrow format,
0667   ///   which may result in an invalid array if we just wrap randomly generated
0668   ///   storage. Set this flag to explicitly allow wrapping of randomly generated
0669   ///   storage.
0670   std::shared_ptr<arrow::RecordBatch> BatchOf(
0671       const FieldVector& fields, int64_t size,
0672       int64_t alignment = kDefaultBufferAlignment,
0673       MemoryPool* memory_pool = default_memory_pool());
0674 
0675   SeedType seed() { return seed_distribution_(seed_rng_); }
0676 
0677  private:
0678   std::uniform_int_distribution<SeedType> seed_distribution_;
0679   std::default_random_engine seed_rng_;
0680 };
0681 
0682 /// Generate a batch with random data. See RandomArrayGenerator::BatchOf.
0683 ARROW_TESTING_EXPORT
0684 std::shared_ptr<arrow::RecordBatch> GenerateBatch(
0685     const FieldVector& fields, int64_t size, SeedType seed,
0686     int64_t alignment = kDefaultBufferAlignment,
0687     MemoryPool* memory_pool = default_memory_pool());
0688 
0689 /// Generate an array with random data. See RandomArrayGenerator::BatchOf.
0690 ARROW_TESTING_EXPORT
0691 std::shared_ptr<arrow::Array> GenerateArray(
0692     const Field& field, int64_t size, SeedType seed,
0693     int64_t alignment = kDefaultBufferAlignment,
0694     MemoryPool* memory_pool = default_memory_pool());
0695 
0696 }  // namespace random
0697 
0698 //
0699 // Assorted functions
0700 //
0701 
0702 ARROW_TESTING_EXPORT
0703 void rand_day_millis(int64_t N, std::vector<DayTimeIntervalType::DayMilliseconds>* out);
0704 ARROW_TESTING_EXPORT
0705 void rand_month_day_nanos(int64_t N,
0706                           std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out);
0707 
0708 template <typename T, typename U>
0709 void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
0710   const int random_seed = 0;
0711   std::default_random_engine gen(random_seed);
0712   std::uniform_int_distribution<T> d(lower, upper);
0713   out->resize(N, static_cast<T>(0));
0714   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
0715 }
0716 
0717 template <typename T, typename U>
0718 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
0719                  std::vector<U>* out) {
0720   std::default_random_engine gen(seed);
0721   ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
0722   out->resize(n, static_cast<T>(0));
0723   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
0724 }
0725 
0726 template <typename T, typename U>
0727 void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
0728   assert(out || (n == 0));
0729   std::default_random_engine gen(seed);
0730   std::uniform_int_distribution<T> d(min_value, max_value);
0731   std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
0732 }
0733 
0734 }  // namespace arrow