![]() |
|
|||
File indexing completed on 2025-08-28 08:27:00
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <memory> 0021 0022 #include "arrow/io/type_fwd.h" 0023 #include "arrow/json/options.h" 0024 #include "arrow/record_batch.h" 0025 #include "arrow/result.h" 0026 #include "arrow/status.h" 0027 #include "arrow/util/macros.h" 0028 #include "arrow/util/type_fwd.h" 0029 #include "arrow/util/visibility.h" 0030 0031 namespace arrow { 0032 namespace json { 0033 0034 /// A class that reads an entire JSON file into a Arrow Table 0035 /// 0036 /// The file is expected to consist of individual line-separated JSON objects 0037 class ARROW_EXPORT TableReader { 0038 public: 0039 virtual ~TableReader() = default; 0040 0041 /// Read the entire JSON file and convert it to a Arrow Table 0042 virtual Result<std::shared_ptr<Table>> Read() = 0; 0043 0044 /// Create a TableReader instance 0045 static Result<std::shared_ptr<TableReader>> Make(MemoryPool* pool, 0046 std::shared_ptr<io::InputStream> input, 0047 const ReadOptions&, 0048 const ParseOptions&); 0049 }; 0050 0051 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options, 0052 std::shared_ptr<Buffer> json); 0053 0054 /// \brief A class that reads a JSON file incrementally 0055 /// 0056 /// JSON data is read from a stream in fixed-size blocks (configurable with 0057 /// `ReadOptions::block_size`). Each block is converted to a `RecordBatch`. Yielded 0058 /// batches have a consistent schema but may differ in row count. 0059 /// 0060 /// The supplied `ParseOptions` are used to determine a schema, based either on a 0061 /// provided explicit schema or inferred from the first non-empty block. 0062 /// Afterwards, the target schema is frozen. If `UnexpectedFieldBehavior::InferType` is 0063 /// specified, unexpected fields will only be inferred for the first block. Afterwards 0064 /// they'll be treated as errors. 0065 /// 0066 /// If `ReadOptions::use_threads` is `true`, each block's parsing/decoding task will be 0067 /// parallelized on the given `cpu_executor` (with readahead corresponding to the 0068 /// executor's capacity). If an executor isn't provided, the global thread pool will be 0069 /// used. 0070 /// 0071 /// If `ReadOptions::use_threads` is `false`, computations will be run on the calling 0072 /// thread and `cpu_executor` will be ignored. 0073 class ARROW_EXPORT StreamingReader : public RecordBatchReader { 0074 public: 0075 virtual ~StreamingReader() = default; 0076 0077 /// \brief Read the next `RecordBatch` asynchronously 0078 /// This function is async-reentrant (but not synchronously reentrant). However, if 0079 /// threading is disabled, this will block until completion. 0080 virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0; 0081 0082 /// Get the number of bytes which have been successfully converted to record batches 0083 /// and consumed 0084 [[nodiscard]] virtual int64_t bytes_processed() const = 0; 0085 0086 /// \brief Create a `StreamingReader` from an `InputStream` 0087 /// Blocks until the initial batch is loaded 0088 /// 0089 /// \param[in] stream JSON source stream 0090 /// \param[in] read_options Options for reading 0091 /// \param[in] parse_options Options for chunking, parsing, and conversion 0092 /// \param[in] io_context Context for IO operations (optional) 0093 /// \param[in] cpu_executor Executor for computation tasks (optional) 0094 /// \return The initialized reader 0095 static Result<std::shared_ptr<StreamingReader>> Make( 0096 std::shared_ptr<io::InputStream> stream, const ReadOptions& read_options, 0097 const ParseOptions& parse_options, 0098 const io::IOContext& io_context = io::default_io_context(), 0099 ::arrow::internal::Executor* cpu_executor = NULLPTR); 0100 0101 /// \brief Create a `StreamingReader` from an `InputStream` asynchronously 0102 /// Returned future completes after loading the first batch 0103 /// 0104 /// \param[in] stream JSON source stream 0105 /// \param[in] read_options Options for reading 0106 /// \param[in] parse_options Options for chunking, parsing, and conversion 0107 /// \param[in] io_context Context for IO operations (optional) 0108 /// \param[in] cpu_executor Executor for computation tasks (optional) 0109 /// \return Future for the initialized reader 0110 static Future<std::shared_ptr<StreamingReader>> MakeAsync( 0111 std::shared_ptr<io::InputStream> stream, const ReadOptions& read_options, 0112 const ParseOptions& parse_options, 0113 const io::IOContext& io_context = io::default_io_context(), 0114 ::arrow::internal::Executor* cpu_executor = NULLPTR); 0115 }; 0116 0117 } // namespace json 0118 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |