Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:01

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <vector>
0024 
0025 #include "arrow/array/array_base.h"
0026 #include "arrow/compute/type_fwd.h"
0027 #include "arrow/testing/gtest_util.h"
0028 #include "arrow/testing/visibility.h"
0029 #include "arrow/type_fwd.h"
0030 
0031 namespace arrow {
0032 
0033 class ARROW_TESTING_EXPORT ConstantArrayGenerator {
0034  public:
0035   /// \brief Generates a constant BooleanArray
0036   ///
0037   /// \param[in] size the size of the array to generate
0038   /// \param[in] value to repeat
0039   ///
0040   /// \return a generated Array
0041   static std::shared_ptr<Array> Boolean(int64_t size, bool value = false);
0042 
0043   /// \brief Generates a constant UInt8Array
0044   ///
0045   /// \param[in] size the size of the array to generate
0046   /// \param[in] value to repeat
0047   ///
0048   /// \return a generated Array
0049   static std::shared_ptr<Array> UInt8(int64_t size, uint8_t value = 0);
0050 
0051   /// \brief Generates a constant Int8Array
0052   ///
0053   /// \param[in] size the size of the array to generate
0054   /// \param[in] value to repeat
0055   ///
0056   /// \return a generated Array
0057   static std::shared_ptr<Array> Int8(int64_t size, int8_t value = 0);
0058 
0059   /// \brief Generates a constant UInt16Array
0060   ///
0061   /// \param[in] size the size of the array to generate
0062   /// \param[in] value to repeat
0063   ///
0064   /// \return a generated Array
0065   static std::shared_ptr<Array> UInt16(int64_t size, uint16_t value = 0);
0066 
0067   /// \brief Generates a constant UInt16Array
0068   ///
0069   /// \param[in] size the size of the array to generate
0070   /// \param[in] value to repeat
0071   ///
0072   /// \return a generated Array
0073   static std::shared_ptr<Array> Int16(int64_t size, int16_t value = 0);
0074 
0075   /// \brief Generates a constant UInt32Array
0076   ///
0077   /// \param[in] size the size of the array to generate
0078   /// \param[in] value to repeat
0079   ///
0080   /// \return a generated Array
0081   static std::shared_ptr<Array> UInt32(int64_t size, uint32_t value = 0);
0082 
0083   /// \brief Generates a constant UInt32Array
0084   ///
0085   /// \param[in] size the size of the array to generate
0086   /// \param[in] value to repeat
0087   ///
0088   /// \return a generated Array
0089   static std::shared_ptr<Array> Int32(int64_t size, int32_t value = 0);
0090 
0091   /// \brief Generates a constant UInt64Array
0092   ///
0093   /// \param[in] size the size of the array to generate
0094   /// \param[in] value to repeat
0095   ///
0096   /// \return a generated Array
0097   static std::shared_ptr<Array> UInt64(int64_t size, uint64_t value = 0);
0098 
0099   /// \brief Generates a constant UInt64Array
0100   ///
0101   /// \param[in] size the size of the array to generate
0102   /// \param[in] value to repeat
0103   ///
0104   /// \return a generated Array
0105   static std::shared_ptr<Array> Int64(int64_t size, int64_t value = 0);
0106 
0107   /// \brief Generates a constant Float32Array
0108   ///
0109   /// \param[in] size the size of the array to generate
0110   /// \param[in] value to repeat
0111   ///
0112   /// \return a generated Array
0113   static std::shared_ptr<Array> Float32(int64_t size, float value = 0);
0114 
0115   /// \brief Generates a constant Float64Array
0116   ///
0117   /// \param[in] size the size of the array to generate
0118   /// \param[in] value to repeat
0119   ///
0120   /// \return a generated Array
0121   static std::shared_ptr<Array> Float64(int64_t size, double value = 0);
0122 
0123   /// \brief Generates a constant StringArray
0124   ///
0125   /// \param[in] size the size of the array to generate
0126   /// \param[in] value to repeat
0127   ///
0128   /// \return a generated Array
0129   static std::shared_ptr<Array> String(int64_t size, std::string value = "");
0130 
0131   template <typename ArrowType, typename CType = typename ArrowType::c_type>
0132   static std::shared_ptr<Array> Numeric(int64_t size, CType value = 0) {
0133     switch (ArrowType::type_id) {
0134       case Type::BOOL:
0135         return Boolean(size, static_cast<bool>(value));
0136       case Type::UINT8:
0137         return UInt8(size, static_cast<uint8_t>(value));
0138       case Type::INT8:
0139         return Int8(size, static_cast<int8_t>(value));
0140       case Type::UINT16:
0141         return UInt16(size, static_cast<uint16_t>(value));
0142       case Type::INT16:
0143         return Int16(size, static_cast<int16_t>(value));
0144       case Type::UINT32:
0145         return UInt32(size, static_cast<uint32_t>(value));
0146       case Type::INT32:
0147         return Int32(size, static_cast<int32_t>(value));
0148       case Type::UINT64:
0149         return UInt64(size, static_cast<uint64_t>(value));
0150       case Type::INT64:
0151         return Int64(size, static_cast<int64_t>(value));
0152       case Type::FLOAT:
0153         return Float32(size, static_cast<float>(value));
0154       case Type::DOUBLE:
0155         return Float64(size, static_cast<double>(value));
0156       case Type::INTERVAL_DAY_TIME:
0157       case Type::DATE32: {
0158         EXPECT_OK_AND_ASSIGN(auto viewed,
0159                              Int32(size, static_cast<uint32_t>(value))->View(date32()));
0160         return viewed;
0161       }
0162       case Type::INTERVAL_MONTHS: {
0163         EXPECT_OK_AND_ASSIGN(auto viewed,
0164                              Int32(size, static_cast<uint32_t>(value))
0165                                  ->View(std::make_shared<MonthIntervalType>()));
0166         return viewed;
0167       }
0168       case Type::TIME32: {
0169         EXPECT_OK_AND_ASSIGN(auto viewed,
0170                              Int32(size, static_cast<uint32_t>(value))
0171                                  ->View(std::make_shared<Time32Type>(TimeUnit::SECOND)));
0172         return viewed;
0173       }
0174       case Type::TIME64: {
0175         EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast<uint64_t>(value))
0176                                               ->View(std::make_shared<Time64Type>()));
0177         return viewed;
0178       }
0179       case Type::DATE64: {
0180         EXPECT_OK_AND_ASSIGN(auto viewed,
0181                              Int64(size, static_cast<uint64_t>(value))->View(date64()));
0182         return viewed;
0183       }
0184       case Type::TIMESTAMP: {
0185         EXPECT_OK_AND_ASSIGN(
0186             auto viewed, Int64(size, static_cast<int64_t>(value))
0187                              ->View(std::make_shared<TimestampType>(TimeUnit::SECOND)));
0188         return viewed;
0189       }
0190       default:
0191         return nullptr;
0192     }
0193   }
0194 
0195   /// \brief Generates a constant Array of zeroes
0196   ///
0197   /// \param[in] size the size of the array to generate
0198   /// \param[in] type the type of the Array
0199   ///
0200   /// \return a generated Array
0201   static std::shared_ptr<Array> Zeroes(int64_t size,
0202                                        const std::shared_ptr<DataType>& type);
0203 
0204   /// \brief Generates a RecordBatch of zeroes
0205   ///
0206   /// \param[in] size the size of the array to generate
0207   /// \param[in] schema to conform to
0208   ///
0209   /// This function is handy to return of RecordBatch of a desired shape.
0210   ///
0211   /// \return a generated RecordBatch
0212   static std::shared_ptr<RecordBatch> Zeroes(int64_t size,
0213                                              const std::shared_ptr<Schema>& schema);
0214 
0215   /// \brief Generates a RecordBatchReader by repeating a RecordBatch
0216   ///
0217   /// \param[in] n_batch the number of times it repeats batch
0218   /// \param[in] batch the RecordBatch to repeat
0219   ///
0220   /// \return a generated RecordBatchReader
0221   static std::shared_ptr<RecordBatchReader> Repeat(
0222       int64_t n_batch, const std::shared_ptr<RecordBatch> batch);
0223 
0224   /// \brief Generates a RecordBatchReader of zeroes batches
0225   ///
0226   /// \param[in] n_batch the number of RecordBatch
0227   /// \param[in] batch_size the size of each RecordBatch
0228   /// \param[in] schema to conform to
0229   ///
0230   /// \return a generated RecordBatchReader
0231   static std::shared_ptr<RecordBatchReader> Zeroes(int64_t n_batch, int64_t batch_size,
0232                                                    const std::shared_ptr<Schema>& schema);
0233 };
0234 
0235 ARROW_TESTING_EXPORT
0236 Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars);
0237 
0238 namespace gen {
0239 
0240 class ARROW_TESTING_EXPORT ArrayGenerator {
0241  public:
0242   virtual ~ArrayGenerator() = default;
0243   virtual Result<std::shared_ptr<Array>> Generate(int64_t num_rows) = 0;
0244   virtual std::shared_ptr<DataType> type() const = 0;
0245 };
0246 
0247 // Same as DataGenerator below but instead of returning Result an ok status is EXPECT'd
0248 class ARROW_TESTING_EXPORT GTestDataGenerator {
0249  public:
0250   virtual ~GTestDataGenerator() = default;
0251   virtual std::shared_ptr<::arrow::RecordBatch> RecordBatch(int64_t num_rows) = 0;
0252   virtual std::vector<std::shared_ptr<::arrow::RecordBatch>> RecordBatches(
0253       int64_t rows_per_batch, int num_batches) = 0;
0254 
0255   virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0;
0256   virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t rows_per_batch,
0257                                                                int num_batches) = 0;
0258 
0259   virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk,
0260                                                 int num_chunks = 1) = 0;
0261   virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
0262 };
0263 
0264 class ARROW_TESTING_EXPORT DataGenerator {
0265  public:
0266   virtual ~DataGenerator() = default;
0267   virtual Result<std::shared_ptr<::arrow::RecordBatch>> RecordBatch(int64_t num_rows) = 0;
0268   virtual Result<std::vector<std::shared_ptr<::arrow::RecordBatch>>> RecordBatches(
0269       int64_t rows_per_batch, int num_batches) = 0;
0270 
0271   virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0;
0272   virtual Result<std::vector<::arrow::compute::ExecBatch>> ExecBatches(
0273       int64_t rows_per_batch, int num_batches) = 0;
0274 
0275   virtual Result<std::shared_ptr<::arrow::Table>> Table(int64_t rows_per_chunk,
0276                                                         int num_chunks = 1) = 0;
0277   virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
0278   /// @brief Converts this generator to a variant that fails (in a googletest sense)
0279   ///        if any error is encountered.
0280   virtual std::unique_ptr<GTestDataGenerator> FailOnError() = 0;
0281 };
0282 
0283 /// @brief A potentially named field
0284 ///
0285 /// If name is not specified then a name will be generated automatically (e.g. f0, f1)
0286 struct ARROW_TESTING_EXPORT GeneratorField {
0287  public:
0288   GeneratorField(std::shared_ptr<ArrayGenerator> gen)  // NOLINT implicit conversion
0289       : name(), gen(std::move(gen)) {}
0290   GeneratorField(std::string name, std::shared_ptr<ArrayGenerator> gen)
0291       : name(std::move(name)), gen(std::move(gen)) {}
0292 
0293   std::optional<std::string> name;
0294   std::shared_ptr<ArrayGenerator> gen;
0295 };
0296 
0297 /// Create a table generator with the given fields
0298 ARROW_TESTING_EXPORT std::shared_ptr<DataGenerator> Gen(
0299     std::vector<GeneratorField> column_gens);
0300 
0301 /// make a generator that returns a constant value
0302 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Constant(
0303     std::shared_ptr<Scalar> value);
0304 /// make a generator that returns an incrementing value
0305 ///
0306 /// Note: overflow is not prevented standard unsigned integer overflow applies
0307 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Step(uint32_t start = 0,
0308                                                           uint32_t step = 1,
0309                                                           bool signed_int = false);
0310 /// make a generator that returns a random value
0311 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Random(
0312     std::shared_ptr<DataType> type);
0313 /// TODO(if-needed) could add a repeat-scalars generator, e.g. Repeat({1, 2, 3}) for
0314 /// 1,2,3,1,2,3,1
0315 ///
0316 /// TODO(if-needed) could add a repeat-from-json generator e.g. Repeat(int32(), "[1, 2,
0317 /// 3]")), same behavior as repeat-scalars
0318 
0319 }  // namespace gen
0320 
0321 }  // namespace arrow