Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:55

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <functional>
0021 #include <memory>
0022 #include <string>
0023 
0024 #include "arrow/c/abi.h"
0025 #include "arrow/device.h"
0026 #include "arrow/result.h"
0027 #include "arrow/status.h"
0028 #include "arrow/type_fwd.h"
0029 #include "arrow/util/async_generator_fwd.h"
0030 #include "arrow/util/macros.h"
0031 #include "arrow/util/visibility.h"
0032 
0033 namespace arrow {
0034 
0035 /// \defgroup c-data-interface Functions for working with the C data interface.
0036 ///
0037 /// @{
0038 
0039 /// \brief Export C++ DataType using the C data interface format.
0040 ///
0041 /// The root type is considered to have empty name and metadata.
0042 /// If you want the root type to have a name and/or metadata, pass
0043 /// a Field instead.
0044 ///
0045 /// \param[in] type DataType object to export
0046 /// \param[out] out C struct where to export the datatype
0047 ARROW_EXPORT
0048 Status ExportType(const DataType& type, struct ArrowSchema* out);
0049 
0050 /// \brief Export C++ Field using the C data interface format.
0051 ///
0052 /// \param[in] field Field object to export
0053 /// \param[out] out C struct where to export the field
0054 ARROW_EXPORT
0055 Status ExportField(const Field& field, struct ArrowSchema* out);
0056 
0057 /// \brief Export C++ Schema using the C data interface format.
0058 ///
0059 /// \param[in] schema Schema object to export
0060 /// \param[out] out C struct where to export the field
0061 ARROW_EXPORT
0062 Status ExportSchema(const Schema& schema, struct ArrowSchema* out);
0063 
0064 /// \brief Export C++ Array using the C data interface format.
0065 ///
0066 /// The resulting ArrowArray struct keeps the array data and buffers alive
0067 /// until its release callback is called by the consumer.
0068 ///
0069 /// \param[in] array Array object to export
0070 /// \param[out] out C struct where to export the array
0071 /// \param[out] out_schema optional C struct where to export the array type
0072 ARROW_EXPORT
0073 Status ExportArray(const Array& array, struct ArrowArray* out,
0074                    struct ArrowSchema* out_schema = NULLPTR);
0075 
0076 /// \brief Export C++ RecordBatch using the C data interface format.
0077 ///
0078 /// The record batch is exported as if it were a struct array.
0079 /// The resulting ArrowArray struct keeps the record batch data and buffers alive
0080 /// until its release callback is called by the consumer.
0081 ///
0082 /// \param[in] batch Record batch to export
0083 /// \param[out] out C struct where to export the record batch
0084 /// \param[out] out_schema optional C struct where to export the record batch schema
0085 ARROW_EXPORT
0086 Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
0087                          struct ArrowSchema* out_schema = NULLPTR);
0088 
0089 /// \brief Import C++ DataType from the C data interface.
0090 ///
0091 /// The given ArrowSchema struct is released (as per the C data interface
0092 /// specification), even if this function fails.
0093 ///
0094 /// \param[in,out] schema C data interface struct representing the data type
0095 /// \return Imported type object
0096 ARROW_EXPORT
0097 Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema);
0098 
0099 /// \brief Import C++ Field from the C data interface.
0100 ///
0101 /// The given ArrowSchema struct is released (as per the C data interface
0102 /// specification), even if this function fails.
0103 ///
0104 /// \param[in,out] schema C data interface struct representing the field
0105 /// \return Imported field object
0106 ARROW_EXPORT
0107 Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema);
0108 
0109 /// \brief Import C++ Schema from the C data interface.
0110 ///
0111 /// The given ArrowSchema struct is released (as per the C data interface
0112 /// specification), even if this function fails.
0113 ///
0114 /// \param[in,out] schema C data interface struct representing the field
0115 /// \return Imported field object
0116 ARROW_EXPORT
0117 Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema);
0118 
0119 /// \brief Import C++ array from the C data interface.
0120 ///
0121 /// The ArrowArray struct has its contents moved (as per the C data interface
0122 /// specification) to a private object held alive by the resulting array.
0123 ///
0124 /// \param[in,out] array C data interface struct holding the array data
0125 /// \param[in] type type of the imported array
0126 /// \return Imported array object
0127 ARROW_EXPORT
0128 Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
0129                                            std::shared_ptr<DataType> type);
0130 
0131 /// \brief Import C++ array and its type from the C data interface.
0132 ///
0133 /// The ArrowArray struct has its contents moved (as per the C data interface
0134 /// specification) to a private object held alive by the resulting array.
0135 /// The ArrowSchema struct is released, even if this function fails.
0136 ///
0137 /// \param[in,out] array C data interface struct holding the array data
0138 /// \param[in,out] type C data interface struct holding the array type
0139 /// \return Imported array object
0140 ARROW_EXPORT
0141 Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
0142                                            struct ArrowSchema* type);
0143 
0144 /// \brief Import C++ record batch from the C data interface.
0145 ///
0146 /// The ArrowArray struct has its contents moved (as per the C data interface
0147 /// specification) to a private object held alive by the resulting record batch.
0148 ///
0149 /// \param[in,out] array C data interface struct holding the record batch data
0150 /// \param[in] schema schema of the imported record batch
0151 /// \return Imported record batch object
0152 ARROW_EXPORT
0153 Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
0154                                                        std::shared_ptr<Schema> schema);
0155 
0156 /// \brief Import C++ record batch and its schema from the C data interface.
0157 ///
0158 /// The type represented by the ArrowSchema struct must be a struct type array.
0159 /// The ArrowArray struct has its contents moved (as per the C data interface
0160 /// specification) to a private object held alive by the resulting record batch.
0161 /// The ArrowSchema struct is released, even if this function fails.
0162 ///
0163 /// \param[in,out] array C data interface struct holding the record batch data
0164 /// \param[in,out] schema C data interface struct holding the record batch schema
0165 /// \return Imported record batch object
0166 ARROW_EXPORT
0167 Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
0168                                                        struct ArrowSchema* schema);
0169 
0170 /// @}
0171 
0172 /// \defgroup c-data-device-interface Functions for working with the C data device
0173 /// interface.
0174 ///
0175 /// @{
0176 
0177 /// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray.
0178 ///
0179 /// The resulting ArrowDeviceArray struct keeps the array data and buffers alive
0180 /// until its release callback is called by the consumer. All buffers in
0181 /// the provided array MUST have the same device_type, otherwise an error
0182 /// will be returned.
0183 ///
0184 /// If sync is non-null, get_event will be called on it in order to
0185 /// potentially provide an event for consumers to synchronize on.
0186 ///
0187 /// \param[in] array Array object to export
0188 /// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null
0189 /// \param[out] out C struct to export the array to
0190 /// \param[out] out_schema optional C struct to export the array type to
0191 ARROW_EXPORT
0192 Status ExportDeviceArray(const Array& array, std::shared_ptr<Device::SyncEvent> sync,
0193                          struct ArrowDeviceArray* out,
0194                          struct ArrowSchema* out_schema = NULLPTR);
0195 
0196 /// \brief EXPERIMENTAL: Export C++ RecordBatch as an ArrowDeviceArray.
0197 ///
0198 /// The record batch is exported as if it were a struct array.
0199 /// The resulting ArrowDeviceArray struct keeps the record batch data and buffers alive
0200 /// until its release callback is called by the consumer.
0201 ///
0202 /// All buffers of all columns in the record batch must have the same device_type
0203 /// otherwise an error will be returned. If columns are on different devices,
0204 /// they should be exported using different ArrowDeviceArray instances.
0205 ///
0206 /// If sync is non-null, get_event will be called on it in order to
0207 /// potentially provide an event for consumers to synchronize on.
0208 ///
0209 /// \param[in] batch Record batch to export
0210 /// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null
0211 /// \param[out] out C struct where to export the record batch
0212 /// \param[out] out_schema optional C struct where to export the record batch schema
0213 ARROW_EXPORT
0214 Status ExportDeviceRecordBatch(const RecordBatch& batch,
0215                                std::shared_ptr<Device::SyncEvent> sync,
0216                                struct ArrowDeviceArray* out,
0217                                struct ArrowSchema* out_schema = NULLPTR);
0218 
0219 using DeviceMemoryMapper =
0220     std::function<Result<std::shared_ptr<MemoryManager>>(ArrowDeviceType, int64_t)>;
0221 
0222 ARROW_EXPORT
0223 Result<std::shared_ptr<MemoryManager>> DefaultDeviceMemoryMapper(
0224     ArrowDeviceType device_type, int64_t device_id);
0225 
0226 /// \brief EXPERIMENTAL: Import C++ device array from the C data interface.
0227 ///
0228 /// The ArrowArray struct has its contents moved (as per the C data interface
0229 /// specification) to a private object held alive by the resulting array. The
0230 /// buffers of the Array are located on the device indicated by the device_type.
0231 ///
0232 /// \param[in,out] array C data interface struct holding the array data
0233 /// \param[in] type type of the imported array
0234 /// \param[in] mapper A function to map device + id to memory manager. If not
0235 /// specified, defaults to map "cpu" to the built-in default memory manager.
0236 /// \return Imported array object
0237 ARROW_EXPORT
0238 Result<std::shared_ptr<Array>> ImportDeviceArray(
0239     struct ArrowDeviceArray* array, std::shared_ptr<DataType> type,
0240     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0241 
0242 /// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface.
0243 ///
0244 /// The ArrowArray struct has its contents moved (as per the C data interface
0245 /// specification) to a private object held alive by the resulting array.
0246 /// The ArrowSchema struct is released, even if this function fails. The
0247 /// buffers of the Array are located on the device indicated by the device_type.
0248 ///
0249 /// \param[in,out] array C data interface struct holding the array data
0250 /// \param[in,out] type C data interface struct holding the array type
0251 /// \param[in] mapper A function to map device + id to memory manager. If not
0252 /// specified, defaults to map "cpu" to the built-in default memory manager.
0253 /// \return Imported array object
0254 ARROW_EXPORT
0255 Result<std::shared_ptr<Array>> ImportDeviceArray(
0256     struct ArrowDeviceArray* array, struct ArrowSchema* type,
0257     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0258 
0259 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data
0260 /// interface.
0261 ///
0262 /// The ArrowArray struct has its contents moved (as per the C data interface
0263 /// specification) to a private object held alive by the resulting record batch.
0264 /// The buffers of all columns of the record batch are located on the device
0265 /// indicated by the device type.
0266 ///
0267 /// \param[in,out] array C data interface struct holding the record batch data
0268 /// \param[in] schema schema of the imported record batch
0269 /// \param[in] mapper A function to map device + id to memory manager. If not
0270 /// specified, defaults to map "cpu" to the built-in default memory manager.
0271 /// \return Imported record batch object
0272 ARROW_EXPORT
0273 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
0274     struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema,
0275     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0276 
0277 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema
0278 /// from the C data interface.
0279 ///
0280 /// The type represented by the ArrowSchema struct must be a struct type array.
0281 /// The ArrowArray struct has its contents moved (as per the C data interface
0282 /// specification) to a private object held alive by the resulting record batch.
0283 /// The ArrowSchema struct is released, even if this function fails. The buffers
0284 /// of all columns of the record batch are located on the device indicated by the
0285 /// device type.
0286 ///
0287 /// \param[in,out] array C data interface struct holding the record batch data
0288 /// \param[in,out] schema C data interface struct holding the record batch schema
0289 /// \param[in] mapper A function to map device + id to memory manager. If not
0290 /// specified, defaults to map "cpu" to the built-in default memory manager.
0291 /// \return Imported record batch object
0292 ARROW_EXPORT
0293 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
0294     struct ArrowDeviceArray* array, struct ArrowSchema* schema,
0295     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0296 
0297 /// @}
0298 
0299 /// \defgroup c-stream-interface Functions for working with the C data interface.
0300 ///
0301 /// @{
0302 
0303 /// \brief Export C++ RecordBatchReader using the C stream interface.
0304 ///
0305 /// The resulting ArrowArrayStream struct keeps the record batch reader alive
0306 /// until its release callback is called by the consumer.
0307 ///
0308 /// \param[in] reader RecordBatchReader object to export
0309 /// \param[out] out C struct where to export the stream
0310 ARROW_EXPORT
0311 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
0312                                struct ArrowArrayStream* out);
0313 
0314 /// \brief Export C++ ChunkedArray using the C data interface format.
0315 ///
0316 /// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive
0317 /// until its release callback is called by the consumer.
0318 ///
0319 /// \param[in] chunked_array ChunkedArray object to export
0320 /// \param[out] out C struct where to export the stream
0321 ARROW_EXPORT
0322 Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
0323                           struct ArrowArrayStream* out);
0324 
0325 /// \brief Export C++ RecordBatchReader using the C device stream interface
0326 ///
0327 /// The resulting ArrowDeviceArrayStream struct keeps the record batch reader
0328 /// alive until its release callback is called by the consumer. The device
0329 /// type is determined by calling device_type() on the RecordBatchReader.
0330 ///
0331 /// \param[in] reader RecordBatchReader object to export
0332 /// \param[out] out C struct to export the stream to
0333 ARROW_EXPORT
0334 Status ExportDeviceRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
0335                                      struct ArrowDeviceArrayStream* out);
0336 
0337 /// \brief Export C++ ChunkedArray using the C device data interface format.
0338 ///
0339 /// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers
0340 /// alive until its release callback is called by the consumer.
0341 ///
0342 /// \param[in] chunked_array ChunkedArray object to export
0343 /// \param[in] device_type the device type the data is located on
0344 /// \param[out] out C struct to export the stream to
0345 ARROW_EXPORT
0346 Status ExportDeviceChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
0347                                 DeviceAllocationType device_type,
0348                                 struct ArrowDeviceArrayStream* out);
0349 
0350 /// \brief Import C++ RecordBatchReader from the C stream interface.
0351 ///
0352 /// The ArrowArrayStream struct has its contents moved to a private object
0353 /// held alive by the resulting record batch reader.
0354 ///
0355 /// \param[in,out] stream C stream interface struct
0356 /// \return Imported RecordBatchReader object
0357 ARROW_EXPORT
0358 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
0359     struct ArrowArrayStream* stream);
0360 
0361 /// \brief Import C++ ChunkedArray from the C stream interface
0362 ///
0363 /// The ArrowArrayStream struct has its contents moved to a private object,
0364 /// is consumed in its entirity, and released before returning all chunks
0365 /// as a ChunkedArray.
0366 ///
0367 /// \param[in,out] stream C stream interface struct
0368 /// \return Imported ChunkedArray object
0369 ARROW_EXPORT
0370 Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream);
0371 
0372 /// \brief Import C++ RecordBatchReader from the C device stream interface
0373 ///
0374 /// The ArrowDeviceArrayStream struct has its contents moved to a private object
0375 /// held alive by the resulting record batch reader.
0376 ///
0377 /// \note If there was a required sync event, sync events are accessible by individual
0378 /// buffers of columns. We are not yet bubbling the sync events from the buffers up to
0379 /// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future
0380 /// update.
0381 ///
0382 /// \param[in,out] stream C device stream interface struct
0383 /// \param[in] mapper mapping from device type and ID to memory manager
0384 /// \return Imported RecordBatchReader object
0385 ARROW_EXPORT
0386 Result<std::shared_ptr<RecordBatchReader>> ImportDeviceRecordBatchReader(
0387     struct ArrowDeviceArrayStream* stream,
0388     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0389 
0390 /// \brief Import C++ ChunkedArray from the C device stream interface
0391 ///
0392 /// The ArrowDeviceArrayStream struct has its contents moved to a private object,
0393 /// is consumed in its entirety, and released before returning all chunks as a
0394 /// ChunkedArray.
0395 ///
0396 /// \note Any chunks that require synchronization for their device memory will have
0397 /// the SyncEvent objects available by checking the individual buffers of each chunk.
0398 /// These SyncEvents should be checked before accessing the data in those buffers.
0399 ///
0400 /// \param[in,out] stream C device stream interface struct
0401 /// \param[in] mapper mapping from device type and ID to memory manager
0402 /// \return Imported ChunkedArray object
0403 ARROW_EXPORT
0404 Result<std::shared_ptr<ChunkedArray>> ImportDeviceChunkedArray(
0405     struct ArrowDeviceArrayStream* stream,
0406     const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
0407 
0408 /// @}
0409 
0410 /// \defgroup c-async-stream-interface Functions for working with the async C data
0411 /// interface.
0412 ///
0413 /// @{
0414 
0415 /// \brief EXPERIMENTAL: AsyncErrorDetail is a StatusDetail that contains an error code
0416 /// and message from an asynchronous operation.
0417 class AsyncErrorDetail : public StatusDetail {
0418  public:
0419   AsyncErrorDetail(int code, std::string message, std::string metadata)
0420       : code_(code), message_(std::move(message)), metadata_(std::move(metadata)) {}
0421   const char* type_id() const override { return "AsyncErrorDetail"; }
0422   // ToString just returns the error message that was returned with the error
0423   std::string ToString() const override { return message_; }
0424   // code is an errno-compatible error code
0425   int code() const { return code_; }
0426   // returns any metadata that was returned with the error, likely in a
0427   // key-value format similar to ArrowSchema metadata
0428   const std::string& ErrorMetadataString() const { return metadata_; }
0429   std::shared_ptr<KeyValueMetadata> ErrorMetadata() const;
0430 
0431  private:
0432   int code_{0};
0433   std::string message_;
0434   std::string metadata_;
0435 };
0436 
0437 struct AsyncRecordBatchGenerator {
0438   std::shared_ptr<Schema> schema;
0439   DeviceAllocationType device_type;
0440   AsyncGenerator<RecordBatchWithMetadata> generator;
0441 };
0442 
0443 namespace internal {
0444 class Executor;
0445 }
0446 
0447 /// \brief EXPERIMENTAL: Create an AsyncRecordBatchReader and populate a corresponding
0448 /// handler to pass to a producer
0449 ///
0450 /// The ArrowAsyncDeviceStreamHandler struct is intended to have its callbacks populated
0451 /// and then be passed to a producer to call the appropriate callbacks when data is ready.
0452 /// This inverts the traditional flow of control, and so we construct a corresponding
0453 /// AsyncRecordBatchGenerator to provide an interface for the consumer to retrieve data as
0454 /// it is pushed to the handler.
0455 ///
0456 /// \param[in,out] handler C struct to be populated
0457 /// \param[in] executor the executor to use for waiting and populating record batches
0458 /// \param[in] queue_size initial number of record batches to request for queueing
0459 /// \param[in] mapper mapping from device type and ID to memory manager
0460 /// \return Future that resolves to either an error or AsyncRecordBatchGenerator once a
0461 /// schema is available or an error is received.
0462 ARROW_EXPORT
0463 Future<AsyncRecordBatchGenerator> CreateAsyncDeviceStreamHandler(
0464     struct ArrowAsyncDeviceStreamHandler* handler, internal::Executor* executor,
0465     uint64_t queue_size = 5, DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper);
0466 
0467 /// \brief EXPERIMENTAL: Export an AsyncGenerator of record batches using a provided
0468 /// handler
0469 ///
0470 /// This function calls the callbacks on the consumer-provided async handler as record
0471 /// batches become available from the AsyncGenerator which is provided. It will first call
0472 /// on_schema using the provided schema, and then serially visit each record batch from
0473 /// the generator, calling the on_next_task callback. If an error occurs, on_error will be
0474 /// called appropriately.
0475 ///
0476 /// \param[in] schema the schema of the stream being exported
0477 /// \param[in] generator a generator that asynchronously produces record batches
0478 /// \param[in] device_type the device type that the record batches will be located on
0479 /// \param[in] handler the handler whose callbacks to utilize as data is available
0480 /// \return Future that will resolve once the generator is exhausted or an error occurs
0481 ARROW_EXPORT
0482 Future<> ExportAsyncRecordBatchReader(
0483     std::shared_ptr<Schema> schema,
0484     AsyncGenerator<std::shared_ptr<RecordBatch>> generator,
0485     DeviceAllocationType device_type, struct ArrowAsyncDeviceStreamHandler* handler);
0486 
0487 /// @}
0488 
0489 }  // namespace arrow