Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:00

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 // Read Arrow files and streams
0019 
0020 #pragma once
0021 
0022 #include <cstddef>
0023 #include <cstdint>
0024 #include <memory>
0025 #include <utility>
0026 #include <vector>
0027 
0028 #include "arrow/io/caching.h"
0029 #include "arrow/io/type_fwd.h"
0030 #include "arrow/ipc/message.h"
0031 #include "arrow/ipc/options.h"
0032 #include "arrow/record_batch.h"
0033 #include "arrow/result.h"
0034 #include "arrow/type_fwd.h"
0035 #include "arrow/util/async_generator.h"
0036 #include "arrow/util/macros.h"
0037 #include "arrow/util/visibility.h"
0038 
0039 namespace arrow {
0040 namespace ipc {
0041 
0042 class DictionaryMemo;
0043 struct IpcPayload;
0044 
0045 using RecordBatchReader = ::arrow::RecordBatchReader;
0046 
0047 struct ReadStats {
0048   /// Number of IPC messages read.
0049   int64_t num_messages = 0;
0050   /// Number of record batches read.
0051   int64_t num_record_batches = 0;
0052   /// Number of dictionary batches read.
0053   ///
0054   /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries
0055   int64_t num_dictionary_batches = 0;
0056 
0057   /// Number of dictionary deltas read.
0058   int64_t num_dictionary_deltas = 0;
0059   /// Number of replaced dictionaries (i.e. where a dictionary batch replaces
0060   /// an existing dictionary with an unrelated new dictionary).
0061   int64_t num_replaced_dictionaries = 0;
0062 };
0063 
0064 /// \brief Synchronous batch stream reader that reads from io::InputStream
0065 ///
0066 /// This class reads the schema (plus any dictionaries) as the first messages
0067 /// in the stream, followed by record batches. For more granular zero-copy
0068 /// reads see the ReadRecordBatch functions
0069 class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
0070  public:
0071   /// Create batch reader from generic MessageReader.
0072   /// This will take ownership of the given MessageReader.
0073   ///
0074   /// \param[in] message_reader a MessageReader implementation
0075   /// \param[in] options any IPC reading options (optional)
0076   /// \return the created batch reader
0077   static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
0078       std::unique_ptr<MessageReader> message_reader,
0079       const IpcReadOptions& options = IpcReadOptions::Defaults());
0080 
0081   /// \brief Record batch stream reader from InputStream
0082   ///
0083   /// \param[in] stream an input stream instance. Must stay alive throughout
0084   /// lifetime of stream reader
0085   /// \param[in] options any IPC reading options (optional)
0086   /// \return the created batch reader
0087   static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
0088       io::InputStream* stream,
0089       const IpcReadOptions& options = IpcReadOptions::Defaults());
0090 
0091   /// \brief Open stream and retain ownership of stream object
0092   /// \param[in] stream the input stream
0093   /// \param[in] options any IPC reading options (optional)
0094   /// \return the created batch reader
0095   static Result<std::shared_ptr<RecordBatchStreamReader>> Open(
0096       const std::shared_ptr<io::InputStream>& stream,
0097       const IpcReadOptions& options = IpcReadOptions::Defaults());
0098 
0099   /// \brief Return current read statistics
0100   virtual ReadStats stats() const = 0;
0101 };
0102 
0103 /// \brief Reads the record batch file format
0104 class ARROW_EXPORT RecordBatchFileReader
0105     : public std::enable_shared_from_this<RecordBatchFileReader> {
0106  public:
0107   virtual ~RecordBatchFileReader() = default;
0108 
0109   /// \brief Open a RecordBatchFileReader
0110   ///
0111   /// Open a file-like object that is assumed to be self-contained; i.e., the
0112   /// end of the file interface is the end of the Arrow file. Note that there
0113   /// can be any amount of data preceding the Arrow-formatted data, because we
0114   /// need only locate the end of the Arrow file stream to discover the metadata
0115   /// and then proceed to read the data into memory.
0116   static Result<std::shared_ptr<RecordBatchFileReader>> Open(
0117       io::RandomAccessFile* file,
0118       const IpcReadOptions& options = IpcReadOptions::Defaults());
0119 
0120   /// \brief Open a RecordBatchFileReader
0121   /// If the file is embedded within some larger file or memory region, you can
0122   /// pass the absolute memory offset to the end of the file (which contains the
0123   /// metadata footer). The metadata must have been written with memory offsets
0124   /// relative to the start of the containing file
0125   ///
0126   /// \param[in] file the data source
0127   /// \param[in] footer_offset the position of the end of the Arrow file
0128   /// \param[in] options options for IPC reading
0129   /// \return the returned reader
0130   static Result<std::shared_ptr<RecordBatchFileReader>> Open(
0131       io::RandomAccessFile* file, int64_t footer_offset,
0132       const IpcReadOptions& options = IpcReadOptions::Defaults());
0133 
0134   /// \brief Version of Open that retains ownership of file
0135   ///
0136   /// \param[in] file the data source
0137   /// \param[in] options options for IPC reading
0138   /// \return the returned reader
0139   static Result<std::shared_ptr<RecordBatchFileReader>> Open(
0140       const std::shared_ptr<io::RandomAccessFile>& file,
0141       const IpcReadOptions& options = IpcReadOptions::Defaults());
0142 
0143   /// \brief Version of Open that retains ownership of file
0144   ///
0145   /// \param[in] file the data source
0146   /// \param[in] footer_offset the position of the end of the Arrow file
0147   /// \param[in] options options for IPC reading
0148   /// \return the returned reader
0149   static Result<std::shared_ptr<RecordBatchFileReader>> Open(
0150       const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
0151       const IpcReadOptions& options = IpcReadOptions::Defaults());
0152 
0153   /// \brief Open a file asynchronously (owns the file).
0154   static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
0155       const std::shared_ptr<io::RandomAccessFile>& file,
0156       const IpcReadOptions& options = IpcReadOptions::Defaults());
0157 
0158   /// \brief Open a file asynchronously (borrows the file).
0159   static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
0160       io::RandomAccessFile* file,
0161       const IpcReadOptions& options = IpcReadOptions::Defaults());
0162 
0163   /// \brief Open a file asynchronously (owns the file).
0164   static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
0165       const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
0166       const IpcReadOptions& options = IpcReadOptions::Defaults());
0167 
0168   /// \brief Open a file asynchronously (borrows the file).
0169   static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
0170       io::RandomAccessFile* file, int64_t footer_offset,
0171       const IpcReadOptions& options = IpcReadOptions::Defaults());
0172 
0173   /// \brief The schema read from the file
0174   virtual std::shared_ptr<Schema> schema() const = 0;
0175 
0176   /// \brief Returns the number of record batches in the file
0177   virtual int num_record_batches() const = 0;
0178 
0179   /// \brief Return the metadata version from the file metadata
0180   virtual MetadataVersion version() const = 0;
0181 
0182   /// \brief Return the contents of the custom_metadata field from the file's
0183   /// Footer
0184   virtual std::shared_ptr<const KeyValueMetadata> metadata() const = 0;
0185 
0186   /// \brief Read a particular record batch from the file. Does not copy memory
0187   /// if the input source supports zero-copy.
0188   ///
0189   /// \param[in] i the index of the record batch to return
0190   /// \return the read batch
0191   virtual Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(int i) = 0;
0192 
0193   /// \brief Read a particular record batch along with its custom metadata from the file.
0194   /// Does not copy memory if the input source supports zero-copy.
0195   ///
0196   /// \param[in] i the index of the record batch to return
0197   /// \return a struct containing the read batch and its custom metadata
0198   virtual Result<RecordBatchWithMetadata> ReadRecordBatchWithCustomMetadata(int i) = 0;
0199 
0200   /// \brief Return current read statistics
0201   virtual ReadStats stats() const = 0;
0202 
0203   /// \brief Computes the total number of rows in the file.
0204   virtual Result<int64_t> CountRows() = 0;
0205 
0206   /// \brief Begin loading metadata for the desired batches into memory.
0207   ///
0208   /// This method will also begin loading all dictionaries messages into memory.
0209   ///
0210   /// For a regular file this will immediately begin disk I/O in the background on a
0211   /// thread on the IOContext's thread pool.  If the file is memory mapped this will
0212   /// ensure the memory needed for the metadata is paged from disk into memory
0213   ///
0214   /// \param indices Indices of the batches to prefetch
0215   ///                If empty then all batches will be prefetched.
0216   virtual Status PreBufferMetadata(const std::vector<int>& indices) = 0;
0217 
0218   /// \brief Get a reentrant generator of record batches.
0219   ///
0220   /// \param[in] coalesce If true, enable I/O coalescing.
0221   /// \param[in] io_context The IOContext to use (controls which thread pool
0222   ///     is used for I/O).
0223   /// \param[in] cache_options Options for coalescing (if enabled).
0224   /// \param[in] executor Optionally, an executor to use for decoding record
0225   ///     batches. This is generally only a benefit for very wide and/or
0226   ///     compressed batches.
0227   virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
0228       const bool coalesce = false,
0229       const io::IOContext& io_context = io::default_io_context(),
0230       const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(),
0231       arrow::internal::Executor* executor = NULLPTR) = 0;
0232 
0233   /// \brief Collect all batches as a vector of record batches
0234   Result<RecordBatchVector> ToRecordBatches();
0235 
0236   /// \brief Collect all batches and concatenate as arrow::Table
0237   Result<std::shared_ptr<Table>> ToTable();
0238 };
0239 
0240 /// \brief A general listener class to receive events.
0241 ///
0242 /// You must implement callback methods for interested events.
0243 ///
0244 /// This API is EXPERIMENTAL.
0245 ///
0246 /// \since 0.17.0
0247 class ARROW_EXPORT Listener {
0248  public:
0249   virtual ~Listener() = default;
0250 
0251   /// \brief Called when end-of-stream is received.
0252   ///
0253   /// The default implementation just returns arrow::Status::OK().
0254   ///
0255   /// \return Status
0256   ///
0257   /// \see StreamDecoder
0258   virtual Status OnEOS();
0259 
0260   /// \brief Called when a record batch is decoded and
0261   /// OnRecordBatchWithMetadataDecoded() isn't overridden.
0262   ///
0263   /// The default implementation just returns
0264   /// arrow::Status::NotImplemented().
0265   ///
0266   /// \param[in] record_batch a record batch decoded
0267   /// \return Status
0268   ///
0269   /// \see StreamDecoder
0270   virtual Status OnRecordBatchDecoded(std::shared_ptr<RecordBatch> record_batch);
0271 
0272   /// \brief Called when a record batch with custom metadata is decoded.
0273   ///
0274   /// The default implementation just calls OnRecordBatchDecoded()
0275   /// without custom metadata.
0276   ///
0277   /// \param[in] record_batch_with_metadata a record batch with custom
0278   /// metadata decoded
0279   /// \return Status
0280   ///
0281   /// \see StreamDecoder
0282   ///
0283   /// \since 13.0.0
0284   virtual Status OnRecordBatchWithMetadataDecoded(
0285       RecordBatchWithMetadata record_batch_with_metadata);
0286 
0287   /// \brief Called when a schema is decoded.
0288   ///
0289   /// The default implementation just returns arrow::Status::OK().
0290   ///
0291   /// \param[in] schema a schema decoded
0292   /// \return Status
0293   ///
0294   /// \see StreamDecoder
0295   virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema);
0296 
0297   /// \brief Called when a schema is decoded.
0298   ///
0299   /// The default implementation just calls OnSchemaDecoded(schema)
0300   /// (without filtered_schema) to keep backward compatibility.
0301   ///
0302   /// \param[in] schema a schema decoded
0303   /// \param[in] filtered_schema a filtered schema that only has read fields
0304   /// \return Status
0305   ///
0306   /// \see StreamDecoder
0307   ///
0308   /// \since 13.0.0
0309   virtual Status OnSchemaDecoded(std::shared_ptr<Schema> schema,
0310                                  std::shared_ptr<Schema> filtered_schema);
0311 };
0312 
0313 /// \brief Collect schema and record batches decoded by StreamDecoder.
0314 ///
0315 /// This API is EXPERIMENTAL.
0316 ///
0317 /// \since 0.17.0
0318 class ARROW_EXPORT CollectListener : public Listener {
0319  public:
0320   CollectListener() : schema_(), filtered_schema_(), record_batches_(), metadatas_() {}
0321   virtual ~CollectListener() = default;
0322 
0323   Status OnSchemaDecoded(std::shared_ptr<Schema> schema,
0324                          std::shared_ptr<Schema> filtered_schema) override {
0325     schema_ = std::move(schema);
0326     filtered_schema_ = std::move(filtered_schema);
0327     return Status::OK();
0328   }
0329 
0330   Status OnRecordBatchWithMetadataDecoded(
0331       RecordBatchWithMetadata record_batch_with_metadata) override {
0332     record_batches_.push_back(std::move(record_batch_with_metadata.batch));
0333     metadatas_.push_back(std::move(record_batch_with_metadata.custom_metadata));
0334     return Status::OK();
0335   }
0336 
0337   /// \return the decoded schema
0338   std::shared_ptr<Schema> schema() const { return schema_; }
0339 
0340   /// \return the filtered schema
0341   std::shared_ptr<Schema> filtered_schema() const { return filtered_schema_; }
0342 
0343   /// \return the all decoded record batches
0344   const std::vector<std::shared_ptr<RecordBatch>>& record_batches() const {
0345     return record_batches_;
0346   }
0347 
0348   /// \return the all decoded metadatas
0349   const std::vector<std::shared_ptr<KeyValueMetadata>>& metadatas() const {
0350     return metadatas_;
0351   }
0352 
0353   /// \return the number of collected record batches
0354   int64_t num_record_batches() const { return record_batches_.size(); }
0355 
0356   /// \return the last decoded record batch and remove it from
0357   /// record_batches
0358   std::shared_ptr<RecordBatch> PopRecordBatch() {
0359     auto record_batch_with_metadata = PopRecordBatchWithMetadata();
0360     return std::move(record_batch_with_metadata.batch);
0361   }
0362 
0363   /// \return the last decoded record batch with custom metadata and
0364   /// remove it from record_batches
0365   RecordBatchWithMetadata PopRecordBatchWithMetadata() {
0366     RecordBatchWithMetadata record_batch_with_metadata;
0367     if (record_batches_.empty()) {
0368       return record_batch_with_metadata;
0369     }
0370     record_batch_with_metadata.batch = std::move(record_batches_.back());
0371     record_batch_with_metadata.custom_metadata = std::move(metadatas_.back());
0372     record_batches_.pop_back();
0373     metadatas_.pop_back();
0374     return record_batch_with_metadata;
0375   }
0376 
0377  private:
0378   std::shared_ptr<Schema> schema_;
0379   std::shared_ptr<Schema> filtered_schema_;
0380   std::vector<std::shared_ptr<RecordBatch>> record_batches_;
0381   std::vector<std::shared_ptr<KeyValueMetadata>> metadatas_;
0382 };
0383 
0384 /// \brief Push style stream decoder that receives data from user.
0385 ///
0386 /// This class decodes the Apache Arrow IPC streaming format data.
0387 ///
0388 /// This API is EXPERIMENTAL.
0389 ///
0390 /// \see https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
0391 ///
0392 /// \since 0.17.0
0393 class ARROW_EXPORT StreamDecoder {
0394  public:
0395   /// \brief Construct a stream decoder.
0396   ///
0397   /// \param[in] listener a Listener that must implement
0398   /// Listener::OnRecordBatchDecoded() to receive decoded record batches
0399   /// \param[in] options any IPC reading options (optional)
0400   StreamDecoder(std::shared_ptr<Listener> listener,
0401                 IpcReadOptions options = IpcReadOptions::Defaults());
0402 
0403   virtual ~StreamDecoder();
0404 
0405   /// \brief Feed data to the decoder as a raw data.
0406   ///
0407   /// If the decoder can read one or more record batches by the data,
0408   /// the decoder calls listener->OnRecordBatchDecoded() with a
0409   /// decoded record batch multiple times.
0410   ///
0411   /// \param[in] data a raw data to be processed. This data isn't
0412   /// copied. The passed memory must be kept alive through record
0413   /// batch processing.
0414   /// \param[in] size raw data size.
0415   /// \return Status
0416   Status Consume(const uint8_t* data, int64_t size);
0417 
0418   /// \brief Feed data to the decoder as a Buffer.
0419   ///
0420   /// If the decoder can read one or more record batches by the
0421   /// Buffer, the decoder calls listener->RecordBatchReceived() with a
0422   /// decoded record batch multiple times.
0423   ///
0424   /// \param[in] buffer a Buffer to be processed.
0425   /// \return Status
0426   Status Consume(std::shared_ptr<Buffer> buffer);
0427 
0428   /// \brief Reset the internal status.
0429   ///
0430   /// You can reuse this decoder for new stream after calling
0431   /// this.
0432   ///
0433   /// \return Status
0434   Status Reset();
0435 
0436   /// \return the shared schema of the record batches in the stream
0437   std::shared_ptr<Schema> schema() const;
0438 
0439   /// \brief Return the number of bytes needed to advance the state of
0440   /// the decoder.
0441   ///
0442   /// This method is provided for users who want to optimize performance.
0443   /// Normal users don't need to use this method.
0444   ///
0445   /// Here is an example usage for normal users:
0446   ///
0447   /// ~~~{.cpp}
0448   /// decoder.Consume(buffer1);
0449   /// decoder.Consume(buffer2);
0450   /// decoder.Consume(buffer3);
0451   /// ~~~
0452   ///
0453   /// Decoder has internal buffer. If consumed data isn't enough to
0454   /// advance the state of the decoder, consumed data is buffered to
0455   /// the internal buffer. It causes performance overhead.
0456   ///
0457   /// If you pass next_required_size() size data to each Consume()
0458   /// call, the decoder doesn't use its internal buffer. It improves
0459   /// performance.
0460   ///
0461   /// Here is an example usage to avoid using internal buffer:
0462   ///
0463   /// ~~~{.cpp}
0464   /// buffer1 = get_data(decoder.next_required_size());
0465   /// decoder.Consume(buffer1);
0466   /// buffer2 = get_data(decoder.next_required_size());
0467   /// decoder.Consume(buffer2);
0468   /// ~~~
0469   ///
0470   /// Users can use this method to avoid creating small chunks. Record
0471   /// batch data must be contiguous data. If users pass small chunks
0472   /// to the decoder, the decoder needs concatenate small chunks
0473   /// internally. It causes performance overhead.
0474   ///
0475   /// Here is an example usage to reduce small chunks:
0476   ///
0477   /// ~~~{.cpp}
0478   /// buffer = AllocateResizableBuffer();
0479   /// while ((small_chunk = get_data(&small_chunk_size))) {
0480   ///   auto current_buffer_size = buffer->size();
0481   ///   buffer->Resize(current_buffer_size + small_chunk_size);
0482   ///   memcpy(buffer->mutable_data() + current_buffer_size,
0483   ///          small_chunk,
0484   ///          small_chunk_size);
0485   ///   if (buffer->size() < decoder.next_required_size()) {
0486   ///     continue;
0487   ///   }
0488   ///   std::shared_ptr<arrow::Buffer> chunk(buffer.release());
0489   ///   decoder.Consume(chunk);
0490   ///   buffer = AllocateResizableBuffer();
0491   /// }
0492   /// if (buffer->size() > 0) {
0493   ///   std::shared_ptr<arrow::Buffer> chunk(buffer.release());
0494   ///   decoder.Consume(chunk);
0495   /// }
0496   /// ~~~
0497   ///
0498   /// \return the number of bytes needed to advance the state of the
0499   /// decoder
0500   int64_t next_required_size() const;
0501 
0502   /// \brief Return current read statistics
0503   ReadStats stats() const;
0504 
0505  private:
0506   class StreamDecoderImpl;
0507   std::unique_ptr<StreamDecoderImpl> impl_;
0508 
0509   ARROW_DISALLOW_COPY_AND_ASSIGN(StreamDecoder);
0510 };
0511 
0512 // Generic read functions; does not copy data if the input supports zero copy reads
0513 
0514 /// \brief Read Schema from stream serialized as a single IPC message
0515 /// and populate any dictionary-encoded fields into a DictionaryMemo
0516 ///
0517 /// \param[in] stream an InputStream
0518 /// \param[in] dictionary_memo for recording dictionary-encoded fields
0519 /// \return the output Schema
0520 ///
0521 /// If record batches follow the schema, it is better to use
0522 /// RecordBatchStreamReader
0523 ARROW_EXPORT
0524 Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
0525                                            DictionaryMemo* dictionary_memo);
0526 
0527 /// \brief Read Schema from encapsulated Message
0528 ///
0529 /// \param[in] message the message containing the Schema IPC metadata
0530 /// \param[in] dictionary_memo DictionaryMemo for recording dictionary-encoded
0531 /// fields. Can be nullptr if you are sure there are no
0532 /// dictionary-encoded fields
0533 /// \return the resulting Schema
0534 ARROW_EXPORT
0535 Result<std::shared_ptr<Schema>> ReadSchema(const Message& message,
0536                                            DictionaryMemo* dictionary_memo);
0537 
0538 /// Read record batch as encapsulated IPC message with metadata size prefix and
0539 /// header
0540 ///
0541 /// \param[in] schema the record batch schema
0542 /// \param[in] dictionary_memo DictionaryMemo which has any
0543 /// dictionaries. Can be nullptr if you are sure there are no
0544 /// dictionary-encoded fields
0545 /// \param[in] options IPC options for reading
0546 /// \param[in] stream the file where the batch is located
0547 /// \return the read record batch
0548 ARROW_EXPORT
0549 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
0550     const std::shared_ptr<Schema>& schema, const DictionaryMemo* dictionary_memo,
0551     const IpcReadOptions& options, io::InputStream* stream);
0552 
0553 /// \brief Read record batch from message
0554 ///
0555 /// \param[in] message a Message containing the record batch metadata
0556 /// \param[in] schema the record batch schema
0557 /// \param[in] dictionary_memo DictionaryMemo which has any
0558 /// dictionaries. Can be nullptr if you are sure there are no
0559 /// dictionary-encoded fields
0560 /// \param[in] options IPC options for reading
0561 /// \return the read record batch
0562 ARROW_EXPORT
0563 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
0564     const Message& message, const std::shared_ptr<Schema>& schema,
0565     const DictionaryMemo* dictionary_memo, const IpcReadOptions& options);
0566 
0567 /// Read record batch from file given metadata and schema
0568 ///
0569 /// \param[in] metadata a Message containing the record batch metadata
0570 /// \param[in] schema the record batch schema
0571 /// \param[in] dictionary_memo DictionaryMemo which has any
0572 /// dictionaries. Can be nullptr if you are sure there are no
0573 /// dictionary-encoded fields
0574 /// \param[in] file a random access file
0575 /// \param[in] options options for deserialization
0576 /// \return the read record batch
0577 ARROW_EXPORT
0578 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
0579     const Buffer& metadata, const std::shared_ptr<Schema>& schema,
0580     const DictionaryMemo* dictionary_memo, const IpcReadOptions& options,
0581     io::RandomAccessFile* file);
0582 
0583 /// \brief Read arrow::Tensor as encapsulated IPC message in file
0584 ///
0585 /// \param[in] file an InputStream pointed at the start of the message
0586 /// \return the read tensor
0587 ARROW_EXPORT
0588 Result<std::shared_ptr<Tensor>> ReadTensor(io::InputStream* file);
0589 
0590 /// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message
0591 ///
0592 /// \param[in] message a Message containing the tensor metadata and body
0593 /// \return the read tensor
0594 ARROW_EXPORT
0595 Result<std::shared_ptr<Tensor>> ReadTensor(const Message& message);
0596 
0597 /// \brief EXPERIMENTAL: Read arrow::SparseTensor as encapsulated IPC message in file
0598 ///
0599 /// \param[in] file an InputStream pointed at the start of the message
0600 /// \return the read sparse tensor
0601 ARROW_EXPORT
0602 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(io::InputStream* file);
0603 
0604 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
0605 ///
0606 /// \param[in] message a Message containing the tensor metadata and body
0607 /// \return the read sparse tensor
0608 ARROW_EXPORT
0609 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message);
0610 
0611 namespace internal {
0612 
0613 // These internal APIs may change without warning or deprecation
0614 
0615 /// \brief EXPERIMENTAL: Read arrow::SparseTensorFormat::type from a metadata
0616 /// \param[in] metadata a Buffer containing the sparse tensor metadata
0617 /// \return the count of the body buffers
0618 ARROW_EXPORT
0619 Result<size_t> ReadSparseTensorBodyBufferCount(const Buffer& metadata);
0620 
0621 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from an IpcPayload
0622 /// \param[in] payload a IpcPayload contains a serialized SparseTensor
0623 /// \return the read sparse tensor
0624 ARROW_EXPORT
0625 Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload& payload);
0626 
0627 // For fuzzing targets
0628 ARROW_EXPORT
0629 Status FuzzIpcStream(const uint8_t* data, int64_t size);
0630 ARROW_EXPORT
0631 Status FuzzIpcTensorStream(const uint8_t* data, int64_t size);
0632 ARROW_EXPORT
0633 Status FuzzIpcFile(const uint8_t* data, int64_t size);
0634 
0635 }  // namespace internal
0636 
0637 }  // namespace ipc
0638 }  // namespace arrow