![]() |
|
|||
File indexing completed on 2025-08-28 08:27:00
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 // Read Arrow files and streams 0019 0020 #pragma once 0021 0022 #include <cstddef> 0023 #include <cstdint> 0024 #include <memory> 0025 #include <utility> 0026 #include <vector> 0027 0028 #include "arrow/io/caching.h" 0029 #include "arrow/io/type_fwd.h" 0030 #include "arrow/ipc/message.h" 0031 #include "arrow/ipc/options.h" 0032 #include "arrow/record_batch.h" 0033 #include "arrow/result.h" 0034 #include "arrow/type_fwd.h" 0035 #include "arrow/util/async_generator.h" 0036 #include "arrow/util/macros.h" 0037 #include "arrow/util/visibility.h" 0038 0039 namespace arrow { 0040 namespace ipc { 0041 0042 class DictionaryMemo; 0043 struct IpcPayload; 0044 0045 using RecordBatchReader = ::arrow::RecordBatchReader; 0046 0047 struct ReadStats { 0048 /// Number of IPC messages read. 0049 int64_t num_messages = 0; 0050 /// Number of record batches read. 0051 int64_t num_record_batches = 0; 0052 /// Number of dictionary batches read. 0053 /// 0054 /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries 0055 int64_t num_dictionary_batches = 0; 0056 0057 /// Number of dictionary deltas read. 0058 int64_t num_dictionary_deltas = 0; 0059 /// Number of replaced dictionaries (i.e. where a dictionary batch replaces 0060 /// an existing dictionary with an unrelated new dictionary). 0061 int64_t num_replaced_dictionaries = 0; 0062 }; 0063 0064 /// \brief Synchronous batch stream reader that reads from io::InputStream 0065 /// 0066 /// This class reads the schema (plus any dictionaries) as the first messages 0067 /// in the stream, followed by record batches. For more granular zero-copy 0068 /// reads see the ReadRecordBatch functions 0069 class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader { 0070 public: 0071 /// Create batch reader from generic MessageReader. 0072 /// This will take ownership of the given MessageReader. 0073 /// 0074 /// \param[in] message_reader a MessageReader implementation 0075 /// \param[in] options any IPC reading options (optional) 0076 /// \return the created batch reader 0077 static Result<std::shared_ptr<RecordBatchStreamReader>> Open( 0078 std::unique_ptr<MessageReader> message_reader, 0079 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0080 0081 /// \brief Record batch stream reader from InputStream 0082 /// 0083 /// \param[in] stream an input stream instance. Must stay alive throughout 0084 /// lifetime of stream reader 0085 /// \param[in] options any IPC reading options (optional) 0086 /// \return the created batch reader 0087 static Result<std::shared_ptr<RecordBatchStreamReader>> Open( 0088 io::InputStream* stream, 0089 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0090 0091 /// \brief Open stream and retain ownership of stream object 0092 /// \param[in] stream the input stream 0093 /// \param[in] options any IPC reading options (optional) 0094 /// \return the created batch reader 0095 static Result<std::shared_ptr<RecordBatchStreamReader>> Open( 0096 const std::shared_ptr<io::InputStream>& stream, 0097 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0098 0099 /// \brief Return current read statistics 0100 virtual ReadStats stats() const = 0; 0101 }; 0102 0103 /// \brief Reads the record batch file format 0104 class ARROW_EXPORT RecordBatchFileReader 0105 : public std::enable_shared_from_this<RecordBatchFileReader> { 0106 public: 0107 virtual ~RecordBatchFileReader() = default; 0108 0109 /// \brief Open a RecordBatchFileReader 0110 /// 0111 /// Open a file-like object that is assumed to be self-contained; i.e., the 0112 /// end of the file interface is the end of the Arrow file. Note that there 0113 /// can be any amount of data preceding the Arrow-formatted data, because we 0114 /// need only locate the end of the Arrow file stream to discover the metadata 0115 /// and then proceed to read the data into memory. 0116 static Result<std::shared_ptr<RecordBatchFileReader>> Open( 0117 io::RandomAccessFile* file, 0118 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0119 0120 /// \brief Open a RecordBatchFileReader 0121 /// If the file is embedded within some larger file or memory region, you can 0122 /// pass the absolute memory offset to the end of the file (which contains the 0123 /// metadata footer). The metadata must have been written with memory offsets 0124 /// relative to the start of the containing file 0125 /// 0126 /// \param[in] file the data source 0127 /// \param[in] footer_offset the position of the end of the Arrow file 0128 /// \param[in] options options for IPC reading 0129 /// \return the returned reader 0130 static Result<std::shared_ptr<RecordBatchFileReader>> Open( 0131 io::RandomAccessFile* file, int64_t footer_offset, 0132 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0133 0134 /// \brief Version of Open that retains ownership of file 0135 /// 0136 /// \param[in] file the data source 0137 /// \param[in] options options for IPC reading 0138 /// \return the returned reader 0139 static Result<std::shared_ptr<RecordBatchFileReader>> Open( 0140 const std::shared_ptr<io::RandomAccessFile>& file, 0141 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0142 0143 /// \brief Version of Open that retains ownership of file 0144 /// 0145 /// \param[in] file the data source 0146 /// \param[in] footer_offset the position of the end of the Arrow file 0147 /// \param[in] options options for IPC reading 0148 /// \return the returned reader 0149 static Result<std::shared_ptr<RecordBatchFileReader>> Open( 0150 const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset, 0151 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0152 0153 /// \brief Open a file asynchronously (owns the file). 0154 static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 0155 const std::shared_ptr<io::RandomAccessFile>& file, 0156 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0157 0158 /// \brief Open a file asynchronously (borrows the file). 0159 static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 0160 io::RandomAccessFile* file, 0161 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0162 0163 /// \brief Open a file asynchronously (owns the file). 0164 static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 0165 const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset, 0166 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0167 0168 /// \brief Open a file asynchronously (borrows the file). 0169 static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 0170 io::RandomAccessFile* file, int64_t footer_offset, 0171 const IpcReadOptions& options = IpcReadOptions::Defaults()); 0172 0173 /// \brief The schema read from the file 0174 virtual std::shared_ptr<Schema> schema() const = 0; 0175 0176 /// \brief Returns the number of record batches in the file 0177 virtual int num_record_batches() const = 0; 0178 0179 /// \brief Return the metadata version from the file metadata 0180 virtual MetadataVersion version() const = 0; 0181 0182 /// \brief Return the contents of the custom_metadata field from the file's 0183 /// Footer 0184 virtual std::shared_ptr<const KeyValueMetadata> metadata() const = 0; 0185 0186 /// \brief Read a particular record batch from the file. Does not copy memory 0187 /// if the input source supports zero-copy. 0188 /// 0189 /// \param[in] i the index of the record batch to return 0190 /// \return the read batch 0191 virtual Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) = 0; 0192 0193 /// \brief Read a particular record batch along with its custom metadata from the file. 0194 /// Does not copy memory if the input source supports zero-copy. 0195 /// 0196 /// \param[in] i the index of the record batch to return 0197 /// \return a struct containing the read batch and its custom metadata 0198 virtual Result<RecordBatchWithMetadata> ReadRecordBatchWithCustomMetadata(int i) = 0; 0199 0200 /// \brief Return current read statistics 0201 virtual ReadStats stats() const = 0; 0202 0203 /// \brief Computes the total number of rows in the file. 0204 virtual Result<int64_t> CountRows() = 0; 0205 0206 /// \brief Begin loading metadata for the desired batches into memory. 0207 /// 0208 /// This method will also begin loading all dictionaries messages into memory. 0209 /// 0210 /// For a regular file this will immediately begin disk I/O in the background on a 0211 /// thread on the IOContext's thread pool. If the file is memory mapped this will 0212 /// ensure the memory needed for the metadata is paged from disk into memory 0213 /// 0214 /// \param indices Indices of the batches to prefetch 0215 /// If empty then all batches will be prefetched. 0216 virtual Status PreBufferMetadata(const std::vector<int>& indices) = 0; 0217 0218 /// \brief Get a reentrant generator of record batches. 0219 /// 0220 /// \param[in] coalesce If true, enable I/O coalescing. 0221 /// \param[in] io_context The IOContext to use (controls which thread pool 0222 /// is used for I/O). 0223 /// \param[in] cache_options Options for coalescing (if enabled). 0224 /// \param[in] executor Optionally, an executor to use for decoding record 0225 /// batches. This is generally only a benefit for very wide and/or 0226 /// compressed batches. 0227 virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator( 0228 const bool coalesce = false, 0229 const io::IOContext& io_context = io::default_io_context(), 0230 const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(), 0231 arrow::internal::Executor* executor = NULLPTR) = 0; 0232 0233 /// \brief Collect all batches as a vector of record batches 0234 Result<RecordBatchVector> ToRecordBatches(); 0235 0236 /// \brief Collect all batches and concatenate as arrow::Table 0237 Result<std::shared_ptr<Table>> ToTable(); 0238 }; 0239 0240 /// \brief A general listener class to receive events. 0241 /// 0242 /// You must implement callback methods for interested events. 0243 /// 0244 /// This API is EXPERIMENTAL. 0245 /// 0246 /// \since 0.17.0 0247 class ARROW_EXPORT Listener { 0248 public: 0249 virtual ~Listener() = default; 0250 0251 /// \brief Called when end-of-stream is received. 0252 /// 0253 /// The default implementation just returns arrow::Status::OK(). 0254 /// 0255 /// \return Status 0256 /// 0257 /// \see StreamDecoder 0258 virtual Status OnEOS(); 0259 0260 /// \brief Called when a record batch is decoded and 0261 /// OnRecordBatchWithMetadataDecoded() isn't overridden. 0262 /// 0263 /// The default implementation just returns 0264 /// arrow::Status::NotImplemented(). 0265 /// 0266 /// \param[in] record_batch a record batch decoded 0267 /// \return Status 0268 /// 0269 /// \see StreamDecoder 0270 virtual Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch); 0271 0272 /// \brief Called when a record batch with custom metadata is decoded. 0273 /// 0274 /// The default implementation just calls OnRecordBatchDecoded() 0275 /// without custom metadata. 0276 /// 0277 /// \param[in] record_batch_with_metadata a record batch with custom 0278 /// metadata decoded 0279 /// \return Status 0280 /// 0281 /// \see StreamDecoder 0282 /// 0283 /// \since 13.0.0 0284 virtual Status OnRecordBatchWithMetadataDecoded( 0285 RecordBatchWithMetadata record_batch_with_metadata); 0286 0287 /// \brief Called when a schema is decoded. 0288 /// 0289 /// The default implementation just returns arrow::Status::OK(). 0290 /// 0291 /// \param[in] schema a schema decoded 0292 /// \return Status 0293 /// 0294 /// \see StreamDecoder 0295 virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema); 0296 0297 /// \brief Called when a schema is decoded. 0298 /// 0299 /// The default implementation just calls OnSchemaDecoded(schema) 0300 /// (without filtered_schema) to keep backward compatibility. 0301 /// 0302 /// \param[in] schema a schema decoded 0303 /// \param[in] filtered_schema a filtered schema that only has read fields 0304 /// \return Status 0305 /// 0306 /// \see StreamDecoder 0307 /// 0308 /// \since 13.0.0 0309 virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema, 0310 std::shared_ptr<Schema> filtered_schema); 0311 }; 0312 0313 /// \brief Collect schema and record batches decoded by StreamDecoder. 0314 /// 0315 /// This API is EXPERIMENTAL. 0316 /// 0317 /// \since 0.17.0 0318 class ARROW_EXPORT CollectListener : public Listener { 0319 public: 0320 CollectListener() : schema_(), filtered_schema_(), record_batches_(), metadatas_() {} 0321 virtual ~CollectListener() = default; 0322 0323 Status OnSchemaDecoded(std::shared_ptr<Schema> schema, 0324 std::shared_ptr<Schema> filtered_schema) override { 0325 schema_ = std::move(schema); 0326 filtered_schema_ = std::move(filtered_schema); 0327 return Status::OK(); 0328 } 0329 0330 Status OnRecordBatchWithMetadataDecoded( 0331 RecordBatchWithMetadata record_batch_with_metadata) override { 0332 record_batches_.push_back(std::move(record_batch_with_metadata.batch)); 0333 metadatas_.push_back(std::move(record_batch_with_metadata.custom_metadata)); 0334 return Status::OK(); 0335 } 0336 0337 /// \return the decoded schema 0338 std::shared_ptr<Schema> schema() const { return schema_; } 0339 0340 /// \return the filtered schema 0341 std::shared_ptr<Schema> filtered_schema() const { return filtered_schema_; } 0342 0343 /// \return the all decoded record batches 0344 const std::vector<std::shared_ptr<RecordBatch>>& record_batches() const { 0345 return record_batches_; 0346 } 0347 0348 /// \return the all decoded metadatas 0349 const std::vector<std::shared_ptr<KeyValueMetadata>>& metadatas() const { 0350 return metadatas_; 0351 } 0352 0353 /// \return the number of collected record batches 0354 int64_t num_record_batches() const { return record_batches_.size(); } 0355 0356 /// \return the last decoded record batch and remove it from 0357 /// record_batches 0358 std::shared_ptr<RecordBatch> PopRecordBatch() { 0359 auto record_batch_with_metadata = PopRecordBatchWithMetadata(); 0360 return std::move(record_batch_with_metadata.batch); 0361 } 0362 0363 /// \return the last decoded record batch with custom metadata and 0364 /// remove it from record_batches 0365 RecordBatchWithMetadata PopRecordBatchWithMetadata() { 0366 RecordBatchWithMetadata record_batch_with_metadata; 0367 if (record_batches_.empty()) { 0368 return record_batch_with_metadata; 0369 } 0370 record_batch_with_metadata.batch = std::move(record_batches_.back()); 0371 record_batch_with_metadata.custom_metadata = std::move(metadatas_.back()); 0372 record_batches_.pop_back(); 0373 metadatas_.pop_back(); 0374 return record_batch_with_metadata; 0375 } 0376 0377 private: 0378 std::shared_ptr<Schema> schema_; 0379 std::shared_ptr<Schema> filtered_schema_; 0380 std::vector<std::shared_ptr<RecordBatch>> record_batches_; 0381 std::vector<std::shared_ptr<KeyValueMetadata>> metadatas_; 0382 }; 0383 0384 /// \brief Push style stream decoder that receives data from user. 0385 /// 0386 /// This class decodes the Apache Arrow IPC streaming format data. 0387 /// 0388 /// This API is EXPERIMENTAL. 0389 /// 0390 /// \see https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format 0391 /// 0392 /// \since 0.17.0 0393 class ARROW_EXPORT StreamDecoder { 0394 public: 0395 /// \brief Construct a stream decoder. 0396 /// 0397 /// \param[in] listener a Listener that must implement 0398 /// Listener::OnRecordBatchDecoded() to receive decoded record batches 0399 /// \param[in] options any IPC reading options (optional) 0400 StreamDecoder(std::shared_ptr<Listener> listener, 0401 IpcReadOptions options = IpcReadOptions::Defaults()); 0402 0403 virtual ~StreamDecoder(); 0404 0405 /// \brief Feed data to the decoder as a raw data. 0406 /// 0407 /// If the decoder can read one or more record batches by the data, 0408 /// the decoder calls listener->OnRecordBatchDecoded() with a 0409 /// decoded record batch multiple times. 0410 /// 0411 /// \param[in] data a raw data to be processed. This data isn't 0412 /// copied. The passed memory must be kept alive through record 0413 /// batch processing. 0414 /// \param[in] size raw data size. 0415 /// \return Status 0416 Status Consume(const uint8_t* data, int64_t size); 0417 0418 /// \brief Feed data to the decoder as a Buffer. 0419 /// 0420 /// If the decoder can read one or more record batches by the 0421 /// Buffer, the decoder calls listener->RecordBatchReceived() with a 0422 /// decoded record batch multiple times. 0423 /// 0424 /// \param[in] buffer a Buffer to be processed. 0425 /// \return Status 0426 Status Consume(std::shared_ptr<Buffer> buffer); 0427 0428 /// \brief Reset the internal status. 0429 /// 0430 /// You can reuse this decoder for new stream after calling 0431 /// this. 0432 /// 0433 /// \return Status 0434 Status Reset(); 0435 0436 /// \return the shared schema of the record batches in the stream 0437 std::shared_ptr<Schema> schema() const; 0438 0439 /// \brief Return the number of bytes needed to advance the state of 0440 /// the decoder. 0441 /// 0442 /// This method is provided for users who want to optimize performance. 0443 /// Normal users don't need to use this method. 0444 /// 0445 /// Here is an example usage for normal users: 0446 /// 0447 /// ~~~{.cpp} 0448 /// decoder.Consume(buffer1); 0449 /// decoder.Consume(buffer2); 0450 /// decoder.Consume(buffer3); 0451 /// ~~~ 0452 /// 0453 /// Decoder has internal buffer. If consumed data isn't enough to 0454 /// advance the state of the decoder, consumed data is buffered to 0455 /// the internal buffer. It causes performance overhead. 0456 /// 0457 /// If you pass next_required_size() size data to each Consume() 0458 /// call, the decoder doesn't use its internal buffer. It improves 0459 /// performance. 0460 /// 0461 /// Here is an example usage to avoid using internal buffer: 0462 /// 0463 /// ~~~{.cpp} 0464 /// buffer1 = get_data(decoder.next_required_size()); 0465 /// decoder.Consume(buffer1); 0466 /// buffer2 = get_data(decoder.next_required_size()); 0467 /// decoder.Consume(buffer2); 0468 /// ~~~ 0469 /// 0470 /// Users can use this method to avoid creating small chunks. Record 0471 /// batch data must be contiguous data. If users pass small chunks 0472 /// to the decoder, the decoder needs concatenate small chunks 0473 /// internally. It causes performance overhead. 0474 /// 0475 /// Here is an example usage to reduce small chunks: 0476 /// 0477 /// ~~~{.cpp} 0478 /// buffer = AllocateResizableBuffer(); 0479 /// while ((small_chunk = get_data(&small_chunk_size))) { 0480 /// auto current_buffer_size = buffer->size(); 0481 /// buffer->Resize(current_buffer_size + small_chunk_size); 0482 /// memcpy(buffer->mutable_data() + current_buffer_size, 0483 /// small_chunk, 0484 /// small_chunk_size); 0485 /// if (buffer->size() < decoder.next_required_size()) { 0486 /// continue; 0487 /// } 0488 /// std::shared_ptr<arrow::Buffer> chunk(buffer.release()); 0489 /// decoder.Consume(chunk); 0490 /// buffer = AllocateResizableBuffer(); 0491 /// } 0492 /// if (buffer->size() > 0) { 0493 /// std::shared_ptr<arrow::Buffer> chunk(buffer.release()); 0494 /// decoder.Consume(chunk); 0495 /// } 0496 /// ~~~ 0497 /// 0498 /// \return the number of bytes needed to advance the state of the 0499 /// decoder 0500 int64_t next_required_size() const; 0501 0502 /// \brief Return current read statistics 0503 ReadStats stats() const; 0504 0505 private: 0506 class StreamDecoderImpl; 0507 std::unique_ptr<StreamDecoderImpl> impl_; 0508 0509 ARROW_DISALLOW_COPY_AND_ASSIGN(StreamDecoder); 0510 }; 0511 0512 // Generic read functions; does not copy data if the input supports zero copy reads 0513 0514 /// \brief Read Schema from stream serialized as a single IPC message 0515 /// and populate any dictionary-encoded fields into a DictionaryMemo 0516 /// 0517 /// \param[in] stream an InputStream 0518 /// \param[in] dictionary_memo for recording dictionary-encoded fields 0519 /// \return the output Schema 0520 /// 0521 /// If record batches follow the schema, it is better to use 0522 /// RecordBatchStreamReader 0523 ARROW_EXPORT 0524 Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream, 0525 DictionaryMemo* dictionary_memo); 0526 0527 /// \brief Read Schema from encapsulated Message 0528 /// 0529 /// \param[in] message the message containing the Schema IPC metadata 0530 /// \param[in] dictionary_memo DictionaryMemo for recording dictionary-encoded 0531 /// fields. Can be nullptr if you are sure there are no 0532 /// dictionary-encoded fields 0533 /// \return the resulting Schema 0534 ARROW_EXPORT 0535 Result<std::shared_ptr<Schema>> ReadSchema(const Message& message, 0536 DictionaryMemo* dictionary_memo); 0537 0538 /// Read record batch as encapsulated IPC message with metadata size prefix and 0539 /// header 0540 /// 0541 /// \param[in] schema the record batch schema 0542 /// \param[in] dictionary_memo DictionaryMemo which has any 0543 /// dictionaries. Can be nullptr if you are sure there are no 0544 /// dictionary-encoded fields 0545 /// \param[in] options IPC options for reading 0546 /// \param[in] stream the file where the batch is located 0547 /// \return the read record batch 0548 ARROW_EXPORT 0549 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch( 0550 const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo, 0551 const IpcReadOptions& options, io::InputStream* stream); 0552 0553 /// \brief Read record batch from message 0554 /// 0555 /// \param[in] message a Message containing the record batch metadata 0556 /// \param[in] schema the record batch schema 0557 /// \param[in] dictionary_memo DictionaryMemo which has any 0558 /// dictionaries. Can be nullptr if you are sure there are no 0559 /// dictionary-encoded fields 0560 /// \param[in] options IPC options for reading 0561 /// \return the read record batch 0562 ARROW_EXPORT 0563 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch( 0564 const Message& message, const std::shared_ptr<Schema>& schema, 0565 const DictionaryMemo* dictionary_memo, const IpcReadOptions& options); 0566 0567 /// Read record batch from file given metadata and schema 0568 /// 0569 /// \param[in] metadata a Message containing the record batch metadata 0570 /// \param[in] schema the record batch schema 0571 /// \param[in] dictionary_memo DictionaryMemo which has any 0572 /// dictionaries. Can be nullptr if you are sure there are no 0573 /// dictionary-encoded fields 0574 /// \param[in] file a random access file 0575 /// \param[in] options options for deserialization 0576 /// \return the read record batch 0577 ARROW_EXPORT 0578 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch( 0579 const Buffer& metadata, const std::shared_ptr<Schema>& schema, 0580 const DictionaryMemo* dictionary_memo, const IpcReadOptions& options, 0581 io::RandomAccessFile* file); 0582 0583 /// \brief Read arrow::Tensor as encapsulated IPC message in file 0584 /// 0585 /// \param[in] file an InputStream pointed at the start of the message 0586 /// \return the read tensor 0587 ARROW_EXPORT 0588 Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file); 0589 0590 /// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message 0591 /// 0592 /// \param[in] message a Message containing the tensor metadata and body 0593 /// \return the read tensor 0594 ARROW_EXPORT 0595 Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message); 0596 0597 /// \brief EXPERIMENTAL: Read arrow::SparseTensor as encapsulated IPC message in file 0598 /// 0599 /// \param[in] file an InputStream pointed at the start of the message 0600 /// \return the read sparse tensor 0601 ARROW_EXPORT 0602 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file); 0603 0604 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message 0605 /// 0606 /// \param[in] message a Message containing the tensor metadata and body 0607 /// \return the read sparse tensor 0608 ARROW_EXPORT 0609 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message); 0610 0611 namespace internal { 0612 0613 // These internal APIs may change without warning or deprecation 0614 0615 /// \brief EXPERIMENTAL: Read arrow::SparseTensorFormat::type from a metadata 0616 /// \param[in] metadata a Buffer containing the sparse tensor metadata 0617 /// \return the count of the body buffers 0618 ARROW_EXPORT 0619 Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata); 0620 0621 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from an IpcPayload 0622 /// \param[in] payload a IpcPayload contains a serialized SparseTensor 0623 /// \return the read sparse tensor 0624 ARROW_EXPORT 0625 Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload); 0626 0627 // For fuzzing targets 0628 ARROW_EXPORT 0629 Status FuzzIpcStream(const uint8_t* data, int64_t size); 0630 ARROW_EXPORT 0631 Status FuzzIpcTensorStream(const uint8_t* data, int64_t size); 0632 ARROW_EXPORT 0633 Status FuzzIpcFile(const uint8_t* data, int64_t size); 0634 0635 } // namespace internal 0636 0637 } // namespace ipc 0638 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |