![]() |
|
|||
File indexing completed on 2025-08-28 08:26:55
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <functional> 0021 #include <memory> 0022 #include <string> 0023 0024 #include "arrow/c/abi.h" 0025 #include "arrow/device.h" 0026 #include "arrow/result.h" 0027 #include "arrow/status.h" 0028 #include "arrow/type_fwd.h" 0029 #include "arrow/util/async_generator_fwd.h" 0030 #include "arrow/util/macros.h" 0031 #include "arrow/util/visibility.h" 0032 0033 namespace arrow { 0034 0035 /// \defgroup c-data-interface Functions for working with the C data interface. 0036 /// 0037 /// @{ 0038 0039 /// \brief Export C++ DataType using the C data interface format. 0040 /// 0041 /// The root type is considered to have empty name and metadata. 0042 /// If you want the root type to have a name and/or metadata, pass 0043 /// a Field instead. 0044 /// 0045 /// \param[in] type DataType object to export 0046 /// \param[out] out C struct where to export the datatype 0047 ARROW_EXPORT 0048 Status ExportType(const DataType& type, struct ArrowSchema* out); 0049 0050 /// \brief Export C++ Field using the C data interface format. 0051 /// 0052 /// \param[in] field Field object to export 0053 /// \param[out] out C struct where to export the field 0054 ARROW_EXPORT 0055 Status ExportField(const Field& field, struct ArrowSchema* out); 0056 0057 /// \brief Export C++ Schema using the C data interface format. 0058 /// 0059 /// \param[in] schema Schema object to export 0060 /// \param[out] out C struct where to export the field 0061 ARROW_EXPORT 0062 Status ExportSchema(const Schema& schema, struct ArrowSchema* out); 0063 0064 /// \brief Export C++ Array using the C data interface format. 0065 /// 0066 /// The resulting ArrowArray struct keeps the array data and buffers alive 0067 /// until its release callback is called by the consumer. 0068 /// 0069 /// \param[in] array Array object to export 0070 /// \param[out] out C struct where to export the array 0071 /// \param[out] out_schema optional C struct where to export the array type 0072 ARROW_EXPORT 0073 Status ExportArray(const Array& array, struct ArrowArray* out, 0074 struct ArrowSchema* out_schema = NULLPTR); 0075 0076 /// \brief Export C++ RecordBatch using the C data interface format. 0077 /// 0078 /// The record batch is exported as if it were a struct array. 0079 /// The resulting ArrowArray struct keeps the record batch data and buffers alive 0080 /// until its release callback is called by the consumer. 0081 /// 0082 /// \param[in] batch Record batch to export 0083 /// \param[out] out C struct where to export the record batch 0084 /// \param[out] out_schema optional C struct where to export the record batch schema 0085 ARROW_EXPORT 0086 Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out, 0087 struct ArrowSchema* out_schema = NULLPTR); 0088 0089 /// \brief Import C++ DataType from the C data interface. 0090 /// 0091 /// The given ArrowSchema struct is released (as per the C data interface 0092 /// specification), even if this function fails. 0093 /// 0094 /// \param[in,out] schema C data interface struct representing the data type 0095 /// \return Imported type object 0096 ARROW_EXPORT 0097 Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema); 0098 0099 /// \brief Import C++ Field from the C data interface. 0100 /// 0101 /// The given ArrowSchema struct is released (as per the C data interface 0102 /// specification), even if this function fails. 0103 /// 0104 /// \param[in,out] schema C data interface struct representing the field 0105 /// \return Imported field object 0106 ARROW_EXPORT 0107 Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema); 0108 0109 /// \brief Import C++ Schema from the C data interface. 0110 /// 0111 /// The given ArrowSchema struct is released (as per the C data interface 0112 /// specification), even if this function fails. 0113 /// 0114 /// \param[in,out] schema C data interface struct representing the field 0115 /// \return Imported field object 0116 ARROW_EXPORT 0117 Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema); 0118 0119 /// \brief Import C++ array from the C data interface. 0120 /// 0121 /// The ArrowArray struct has its contents moved (as per the C data interface 0122 /// specification) to a private object held alive by the resulting array. 0123 /// 0124 /// \param[in,out] array C data interface struct holding the array data 0125 /// \param[in] type type of the imported array 0126 /// \return Imported array object 0127 ARROW_EXPORT 0128 Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array, 0129 std::shared_ptr<DataType> type); 0130 0131 /// \brief Import C++ array and its type from the C data interface. 0132 /// 0133 /// The ArrowArray struct has its contents moved (as per the C data interface 0134 /// specification) to a private object held alive by the resulting array. 0135 /// The ArrowSchema struct is released, even if this function fails. 0136 /// 0137 /// \param[in,out] array C data interface struct holding the array data 0138 /// \param[in,out] type C data interface struct holding the array type 0139 /// \return Imported array object 0140 ARROW_EXPORT 0141 Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array, 0142 struct ArrowSchema* type); 0143 0144 /// \brief Import C++ record batch from the C data interface. 0145 /// 0146 /// The ArrowArray struct has its contents moved (as per the C data interface 0147 /// specification) to a private object held alive by the resulting record batch. 0148 /// 0149 /// \param[in,out] array C data interface struct holding the record batch data 0150 /// \param[in] schema schema of the imported record batch 0151 /// \return Imported record batch object 0152 ARROW_EXPORT 0153 Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array, 0154 std::shared_ptr<Schema> schema); 0155 0156 /// \brief Import C++ record batch and its schema from the C data interface. 0157 /// 0158 /// The type represented by the ArrowSchema struct must be a struct type array. 0159 /// The ArrowArray struct has its contents moved (as per the C data interface 0160 /// specification) to a private object held alive by the resulting record batch. 0161 /// The ArrowSchema struct is released, even if this function fails. 0162 /// 0163 /// \param[in,out] array C data interface struct holding the record batch data 0164 /// \param[in,out] schema C data interface struct holding the record batch schema 0165 /// \return Imported record batch object 0166 ARROW_EXPORT 0167 Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array, 0168 struct ArrowSchema* schema); 0169 0170 /// @} 0171 0172 /// \defgroup c-data-device-interface Functions for working with the C data device 0173 /// interface. 0174 /// 0175 /// @{ 0176 0177 /// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray. 0178 /// 0179 /// The resulting ArrowDeviceArray struct keeps the array data and buffers alive 0180 /// until its release callback is called by the consumer. All buffers in 0181 /// the provided array MUST have the same device_type, otherwise an error 0182 /// will be returned. 0183 /// 0184 /// If sync is non-null, get_event will be called on it in order to 0185 /// potentially provide an event for consumers to synchronize on. 0186 /// 0187 /// \param[in] array Array object to export 0188 /// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null 0189 /// \param[out] out C struct to export the array to 0190 /// \param[out] out_schema optional C struct to export the array type to 0191 ARROW_EXPORT 0192 Status ExportDeviceArray(const Array& array, std::shared_ptr<Device::SyncEvent> sync, 0193 struct ArrowDeviceArray* out, 0194 struct ArrowSchema* out_schema = NULLPTR); 0195 0196 /// \brief EXPERIMENTAL: Export C++ RecordBatch as an ArrowDeviceArray. 0197 /// 0198 /// The record batch is exported as if it were a struct array. 0199 /// The resulting ArrowDeviceArray struct keeps the record batch data and buffers alive 0200 /// until its release callback is called by the consumer. 0201 /// 0202 /// All buffers of all columns in the record batch must have the same device_type 0203 /// otherwise an error will be returned. If columns are on different devices, 0204 /// they should be exported using different ArrowDeviceArray instances. 0205 /// 0206 /// If sync is non-null, get_event will be called on it in order to 0207 /// potentially provide an event for consumers to synchronize on. 0208 /// 0209 /// \param[in] batch Record batch to export 0210 /// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null 0211 /// \param[out] out C struct where to export the record batch 0212 /// \param[out] out_schema optional C struct where to export the record batch schema 0213 ARROW_EXPORT 0214 Status ExportDeviceRecordBatch(const RecordBatch& batch, 0215 std::shared_ptr<Device::SyncEvent> sync, 0216 struct ArrowDeviceArray* out, 0217 struct ArrowSchema* out_schema = NULLPTR); 0218 0219 using DeviceMemoryMapper = 0220 std::function<Result<std::shared_ptr<MemoryManager>>(ArrowDeviceType, int64_t)>; 0221 0222 ARROW_EXPORT 0223 Result<std::shared_ptr<MemoryManager>> DefaultDeviceMemoryMapper( 0224 ArrowDeviceType device_type, int64_t device_id); 0225 0226 /// \brief EXPERIMENTAL: Import C++ device array from the C data interface. 0227 /// 0228 /// The ArrowArray struct has its contents moved (as per the C data interface 0229 /// specification) to a private object held alive by the resulting array. The 0230 /// buffers of the Array are located on the device indicated by the device_type. 0231 /// 0232 /// \param[in,out] array C data interface struct holding the array data 0233 /// \param[in] type type of the imported array 0234 /// \param[in] mapper A function to map device + id to memory manager. If not 0235 /// specified, defaults to map "cpu" to the built-in default memory manager. 0236 /// \return Imported array object 0237 ARROW_EXPORT 0238 Result<std::shared_ptr<Array>> ImportDeviceArray( 0239 struct ArrowDeviceArray* array, std::shared_ptr<DataType> type, 0240 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0241 0242 /// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface. 0243 /// 0244 /// The ArrowArray struct has its contents moved (as per the C data interface 0245 /// specification) to a private object held alive by the resulting array. 0246 /// The ArrowSchema struct is released, even if this function fails. The 0247 /// buffers of the Array are located on the device indicated by the device_type. 0248 /// 0249 /// \param[in,out] array C data interface struct holding the array data 0250 /// \param[in,out] type C data interface struct holding the array type 0251 /// \param[in] mapper A function to map device + id to memory manager. If not 0252 /// specified, defaults to map "cpu" to the built-in default memory manager. 0253 /// \return Imported array object 0254 ARROW_EXPORT 0255 Result<std::shared_ptr<Array>> ImportDeviceArray( 0256 struct ArrowDeviceArray* array, struct ArrowSchema* type, 0257 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0258 0259 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data 0260 /// interface. 0261 /// 0262 /// The ArrowArray struct has its contents moved (as per the C data interface 0263 /// specification) to a private object held alive by the resulting record batch. 0264 /// The buffers of all columns of the record batch are located on the device 0265 /// indicated by the device type. 0266 /// 0267 /// \param[in,out] array C data interface struct holding the record batch data 0268 /// \param[in] schema schema of the imported record batch 0269 /// \param[in] mapper A function to map device + id to memory manager. If not 0270 /// specified, defaults to map "cpu" to the built-in default memory manager. 0271 /// \return Imported record batch object 0272 ARROW_EXPORT 0273 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch( 0274 struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema, 0275 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0276 0277 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema 0278 /// from the C data interface. 0279 /// 0280 /// The type represented by the ArrowSchema struct must be a struct type array. 0281 /// The ArrowArray struct has its contents moved (as per the C data interface 0282 /// specification) to a private object held alive by the resulting record batch. 0283 /// The ArrowSchema struct is released, even if this function fails. The buffers 0284 /// of all columns of the record batch are located on the device indicated by the 0285 /// device type. 0286 /// 0287 /// \param[in,out] array C data interface struct holding the record batch data 0288 /// \param[in,out] schema C data interface struct holding the record batch schema 0289 /// \param[in] mapper A function to map device + id to memory manager. If not 0290 /// specified, defaults to map "cpu" to the built-in default memory manager. 0291 /// \return Imported record batch object 0292 ARROW_EXPORT 0293 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch( 0294 struct ArrowDeviceArray* array, struct ArrowSchema* schema, 0295 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0296 0297 /// @} 0298 0299 /// \defgroup c-stream-interface Functions for working with the C data interface. 0300 /// 0301 /// @{ 0302 0303 /// \brief Export C++ RecordBatchReader using the C stream interface. 0304 /// 0305 /// The resulting ArrowArrayStream struct keeps the record batch reader alive 0306 /// until its release callback is called by the consumer. 0307 /// 0308 /// \param[in] reader RecordBatchReader object to export 0309 /// \param[out] out C struct where to export the stream 0310 ARROW_EXPORT 0311 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader, 0312 struct ArrowArrayStream* out); 0313 0314 /// \brief Export C++ ChunkedArray using the C data interface format. 0315 /// 0316 /// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive 0317 /// until its release callback is called by the consumer. 0318 /// 0319 /// \param[in] chunked_array ChunkedArray object to export 0320 /// \param[out] out C struct where to export the stream 0321 ARROW_EXPORT 0322 Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array, 0323 struct ArrowArrayStream* out); 0324 0325 /// \brief Export C++ RecordBatchReader using the C device stream interface 0326 /// 0327 /// The resulting ArrowDeviceArrayStream struct keeps the record batch reader 0328 /// alive until its release callback is called by the consumer. The device 0329 /// type is determined by calling device_type() on the RecordBatchReader. 0330 /// 0331 /// \param[in] reader RecordBatchReader object to export 0332 /// \param[out] out C struct to export the stream to 0333 ARROW_EXPORT 0334 Status ExportDeviceRecordBatchReader(std::shared_ptr<RecordBatchReader> reader, 0335 struct ArrowDeviceArrayStream* out); 0336 0337 /// \brief Export C++ ChunkedArray using the C device data interface format. 0338 /// 0339 /// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers 0340 /// alive until its release callback is called by the consumer. 0341 /// 0342 /// \param[in] chunked_array ChunkedArray object to export 0343 /// \param[in] device_type the device type the data is located on 0344 /// \param[out] out C struct to export the stream to 0345 ARROW_EXPORT 0346 Status ExportDeviceChunkedArray(std::shared_ptr<ChunkedArray> chunked_array, 0347 DeviceAllocationType device_type, 0348 struct ArrowDeviceArrayStream* out); 0349 0350 /// \brief Import C++ RecordBatchReader from the C stream interface. 0351 /// 0352 /// The ArrowArrayStream struct has its contents moved to a private object 0353 /// held alive by the resulting record batch reader. 0354 /// 0355 /// \param[in,out] stream C stream interface struct 0356 /// \return Imported RecordBatchReader object 0357 ARROW_EXPORT 0358 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader( 0359 struct ArrowArrayStream* stream); 0360 0361 /// \brief Import C++ ChunkedArray from the C stream interface 0362 /// 0363 /// The ArrowArrayStream struct has its contents moved to a private object, 0364 /// is consumed in its entirity, and released before returning all chunks 0365 /// as a ChunkedArray. 0366 /// 0367 /// \param[in,out] stream C stream interface struct 0368 /// \return Imported ChunkedArray object 0369 ARROW_EXPORT 0370 Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream); 0371 0372 /// \brief Import C++ RecordBatchReader from the C device stream interface 0373 /// 0374 /// The ArrowDeviceArrayStream struct has its contents moved to a private object 0375 /// held alive by the resulting record batch reader. 0376 /// 0377 /// \note If there was a required sync event, sync events are accessible by individual 0378 /// buffers of columns. We are not yet bubbling the sync events from the buffers up to 0379 /// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future 0380 /// update. 0381 /// 0382 /// \param[in,out] stream C device stream interface struct 0383 /// \param[in] mapper mapping from device type and ID to memory manager 0384 /// \return Imported RecordBatchReader object 0385 ARROW_EXPORT 0386 Result<std::shared_ptr<RecordBatchReader>> ImportDeviceRecordBatchReader( 0387 struct ArrowDeviceArrayStream* stream, 0388 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0389 0390 /// \brief Import C++ ChunkedArray from the C device stream interface 0391 /// 0392 /// The ArrowDeviceArrayStream struct has its contents moved to a private object, 0393 /// is consumed in its entirety, and released before returning all chunks as a 0394 /// ChunkedArray. 0395 /// 0396 /// \note Any chunks that require synchronization for their device memory will have 0397 /// the SyncEvent objects available by checking the individual buffers of each chunk. 0398 /// These SyncEvents should be checked before accessing the data in those buffers. 0399 /// 0400 /// \param[in,out] stream C device stream interface struct 0401 /// \param[in] mapper mapping from device type and ID to memory manager 0402 /// \return Imported ChunkedArray object 0403 ARROW_EXPORT 0404 Result<std::shared_ptr<ChunkedArray>> ImportDeviceChunkedArray( 0405 struct ArrowDeviceArrayStream* stream, 0406 const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); 0407 0408 /// @} 0409 0410 /// \defgroup c-async-stream-interface Functions for working with the async C data 0411 /// interface. 0412 /// 0413 /// @{ 0414 0415 /// \brief EXPERIMENTAL: AsyncErrorDetail is a StatusDetail that contains an error code 0416 /// and message from an asynchronous operation. 0417 class AsyncErrorDetail : public StatusDetail { 0418 public: 0419 AsyncErrorDetail(int code, std::string message, std::string metadata) 0420 : code_(code), message_(std::move(message)), metadata_(std::move(metadata)) {} 0421 const char* type_id() const override { return "AsyncErrorDetail"; } 0422 // ToString just returns the error message that was returned with the error 0423 std::string ToString() const override { return message_; } 0424 // code is an errno-compatible error code 0425 int code() const { return code_; } 0426 // returns any metadata that was returned with the error, likely in a 0427 // key-value format similar to ArrowSchema metadata 0428 const std::string& ErrorMetadataString() const { return metadata_; } 0429 std::shared_ptr<KeyValueMetadata> ErrorMetadata() const; 0430 0431 private: 0432 int code_{0}; 0433 std::string message_; 0434 std::string metadata_; 0435 }; 0436 0437 struct AsyncRecordBatchGenerator { 0438 std::shared_ptr<Schema> schema; 0439 DeviceAllocationType device_type; 0440 AsyncGenerator<RecordBatchWithMetadata> generator; 0441 }; 0442 0443 namespace internal { 0444 class Executor; 0445 } 0446 0447 /// \brief EXPERIMENTAL: Create an AsyncRecordBatchReader and populate a corresponding 0448 /// handler to pass to a producer 0449 /// 0450 /// The ArrowAsyncDeviceStreamHandler struct is intended to have its callbacks populated 0451 /// and then be passed to a producer to call the appropriate callbacks when data is ready. 0452 /// This inverts the traditional flow of control, and so we construct a corresponding 0453 /// AsyncRecordBatchGenerator to provide an interface for the consumer to retrieve data as 0454 /// it is pushed to the handler. 0455 /// 0456 /// \param[in,out] handler C struct to be populated 0457 /// \param[in] executor the executor to use for waiting and populating record batches 0458 /// \param[in] queue_size initial number of record batches to request for queueing 0459 /// \param[in] mapper mapping from device type and ID to memory manager 0460 /// \return Future that resolves to either an error or AsyncRecordBatchGenerator once a 0461 /// schema is available or an error is received. 0462 ARROW_EXPORT 0463 Future<AsyncRecordBatchGenerator> CreateAsyncDeviceStreamHandler( 0464 struct ArrowAsyncDeviceStreamHandler* handler, internal::Executor* executor, 0465 uint64_t queue_size = 5, DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper); 0466 0467 /// \brief EXPERIMENTAL: Export an AsyncGenerator of record batches using a provided 0468 /// handler 0469 /// 0470 /// This function calls the callbacks on the consumer-provided async handler as record 0471 /// batches become available from the AsyncGenerator which is provided. It will first call 0472 /// on_schema using the provided schema, and then serially visit each record batch from 0473 /// the generator, calling the on_next_task callback. If an error occurs, on_error will be 0474 /// called appropriately. 0475 /// 0476 /// \param[in] schema the schema of the stream being exported 0477 /// \param[in] generator a generator that asynchronously produces record batches 0478 /// \param[in] device_type the device type that the record batches will be located on 0479 /// \param[in] handler the handler whose callbacks to utilize as data is available 0480 /// \return Future that will resolve once the generator is exhausted or an error occurs 0481 ARROW_EXPORT 0482 Future<> ExportAsyncRecordBatchReader( 0483 std::shared_ptr<Schema> schema, 0484 AsyncGenerator<std::shared_ptr<RecordBatch>> generator, 0485 DeviceAllocationType device_type, struct ArrowAsyncDeviceStreamHandler* handler); 0486 0487 /// @} 0488 0489 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |