Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:00

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <memory>
0021 
0022 #include "arrow/io/type_fwd.h"
0023 #include "arrow/json/options.h"
0024 #include "arrow/record_batch.h"
0025 #include "arrow/result.h"
0026 #include "arrow/status.h"
0027 #include "arrow/util/macros.h"
0028 #include "arrow/util/type_fwd.h"
0029 #include "arrow/util/visibility.h"
0030 
0031 namespace arrow {
0032 namespace json {
0033 
0034 /// A class that reads an entire JSON file into a Arrow Table
0035 ///
0036 /// The file is expected to consist of individual line-separated JSON objects
0037 class ARROW_EXPORT TableReader {
0038  public:
0039   virtual ~TableReader() = default;
0040 
0041   /// Read the entire JSON file and convert it to a Arrow Table
0042   virtual Result<std::shared_ptr<Table>> Read() = 0;
0043 
0044   /// Create a TableReader instance
0045   static Result<std::shared_ptr<TableReader>> Make(MemoryPool* pool,
0046                                                    std::shared_ptr<io::InputStream> input,
0047                                                    const ReadOptions&,
0048                                                    const ParseOptions&);
0049 };
0050 
0051 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
0052                                                            std::shared_ptr<Buffer> json);
0053 
0054 /// \brief A class that reads a JSON file incrementally
0055 ///
0056 /// JSON data is read from a stream in fixed-size blocks (configurable with
0057 /// `ReadOptions::block_size`). Each block is converted to a `RecordBatch`. Yielded
0058 /// batches have a consistent schema but may differ in row count.
0059 ///
0060 /// The supplied `ParseOptions` are used to determine a schema, based either on a
0061 /// provided explicit schema or inferred from the first non-empty block.
0062 /// Afterwards, the target schema is frozen. If `UnexpectedFieldBehavior::InferType` is
0063 /// specified, unexpected fields will only be inferred for the first block. Afterwards
0064 /// they'll be treated as errors.
0065 ///
0066 /// If `ReadOptions::use_threads` is `true`, each block's parsing/decoding task will be
0067 /// parallelized on the given `cpu_executor` (with readahead corresponding to the
0068 /// executor's capacity). If an executor isn't provided, the global thread pool will be
0069 /// used.
0070 ///
0071 /// If `ReadOptions::use_threads` is `false`, computations will be run on the calling
0072 /// thread and `cpu_executor` will be ignored.
0073 class ARROW_EXPORT StreamingReader : public RecordBatchReader {
0074  public:
0075   virtual ~StreamingReader() = default;
0076 
0077   /// \brief Read the next `RecordBatch` asynchronously
0078   /// This function is async-reentrant (but not synchronously reentrant). However, if
0079   /// threading is disabled, this will block until completion.
0080   virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
0081 
0082   /// Get the number of bytes which have been successfully converted to record batches
0083   /// and consumed
0084   [[nodiscard]] virtual int64_t bytes_processed() const = 0;
0085 
0086   /// \brief Create a `StreamingReader` from an `InputStream`
0087   /// Blocks until the initial batch is loaded
0088   ///
0089   /// \param[in] stream JSON source stream
0090   /// \param[in] read_options Options for reading
0091   /// \param[in] parse_options Options for chunking, parsing, and conversion
0092   /// \param[in] io_context Context for IO operations (optional)
0093   /// \param[in] cpu_executor Executor for computation tasks (optional)
0094   /// \return The initialized reader
0095   static Result<std::shared_ptr<StreamingReader>> Make(
0096       std::shared_ptr<io::InputStream> stream, const ReadOptions& read_options,
0097       const ParseOptions& parse_options,
0098       const io::IOContext& io_context = io::default_io_context(),
0099       ::arrow::internal::Executor* cpu_executor = NULLPTR);
0100 
0101   /// \brief Create a `StreamingReader` from an `InputStream` asynchronously
0102   /// Returned future completes after loading the first batch
0103   ///
0104   /// \param[in] stream JSON source stream
0105   /// \param[in] read_options Options for reading
0106   /// \param[in] parse_options Options for chunking, parsing, and conversion
0107   /// \param[in] io_context Context for IO operations (optional)
0108   /// \param[in] cpu_executor Executor for computation tasks (optional)
0109   /// \return Future for the initialized reader
0110   static Future<std::shared_ptr<StreamingReader>> MakeAsync(
0111       std::shared_ptr<io::InputStream> stream, const ReadOptions& read_options,
0112       const ParseOptions& parse_options,
0113       const io::IOContext& io_context = io::default_io_context(),
0114       ::arrow::internal::Executor* cpu_executor = NULLPTR);
0115 };
0116 
0117 }  // namespace json
0118 }  // namespace arrow