Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:55

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 /// \file abi.h Arrow C Data Interface
0019 ///
0020 /// The Arrow C Data interface defines a very small, stable set
0021 /// of C definitions which can be easily copied into any project's
0022 /// source code and vendored to be used for columnar data interchange
0023 /// in the Arrow format. For non-C/C++ languages and runtimes,
0024 /// it should be almost as easy to translate the C definitions into
0025 /// the corresponding C FFI declarations.
0026 ///
0027 /// Applications and libraries can therefore work with Arrow memory
0028 /// without necessarily using the Arrow libraries or reinventing
0029 /// the wheel. Developers can choose between tight integration
0030 /// with the Arrow software project or minimal integration with
0031 /// the Arrow format only.
0032 
0033 #pragma once
0034 
0035 #include <stdint.h>
0036 
0037 // Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html
0038 
0039 #ifdef __cplusplus
0040 extern "C" {
0041 #endif
0042 
0043 #ifndef ARROW_C_DATA_INTERFACE
0044 #  define ARROW_C_DATA_INTERFACE
0045 
0046 #  define ARROW_FLAG_DICTIONARY_ORDERED 1
0047 #  define ARROW_FLAG_NULLABLE 2
0048 #  define ARROW_FLAG_MAP_KEYS_SORTED 4
0049 
0050 struct ArrowSchema {
0051   // Array type description
0052   const char* format;
0053   const char* name;
0054   const char* metadata;
0055   int64_t flags;
0056   int64_t n_children;
0057   struct ArrowSchema** children;
0058   struct ArrowSchema* dictionary;
0059 
0060   // Release callback
0061   void (*release)(struct ArrowSchema*);
0062   // Opaque producer-specific data
0063   void* private_data;
0064 };
0065 
0066 struct ArrowArray {
0067   // Array data description
0068   int64_t length;
0069   int64_t null_count;
0070   int64_t offset;
0071   int64_t n_buffers;
0072   int64_t n_children;
0073   const void** buffers;
0074   struct ArrowArray** children;
0075   struct ArrowArray* dictionary;
0076 
0077   // Release callback
0078   void (*release)(struct ArrowArray*);
0079   // Opaque producer-specific data
0080   void* private_data;
0081 };
0082 
0083 #  define ARROW_STATISTICS_KEY_AVERAGE_BYTE_WIDTH_EXACT "ARROW:average_byte_width:exact"
0084 #  define ARROW_STATISTICS_KEY_AVERAGE_BYTE_WIDTH_APPROXIMATE \
0085     "ARROW:average_byte_width:approximate"
0086 #  define ARROW_STATISTICS_KEY_DISTINCT_COUNT_EXACT "ARROW:distinct_count:exact"
0087 #  define ARROW_STATISTICS_KEY_DISTINCT_COUNT_APPROXIMATE \
0088     "ARROW:distinct_count:approximate"
0089 #  define ARROW_STATISTICS_KEY_MAX_BYTE_WIDTH_EXACT "ARROW:max_byte_width:exact"
0090 #  define ARROW_STATISTICS_KEY_MAX_BYTE_WIDTH_APPROXIMATE \
0091     "ARROW:max_byte_width:approximate"
0092 #  define ARROW_STATISTICS_KEY_MAX_VALUE_EXACT "ARROW:max_value:exact"
0093 #  define ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE "ARROW:max_value:approximate"
0094 #  define ARROW_STATISTICS_KEY_MIN_VALUE_EXACT "ARROW:min_value:exact"
0095 #  define ARROW_STATISTICS_KEY_MIN_VALUE_APPROXIMATE "ARROW:min_value:approximate"
0096 #  define ARROW_STATISTICS_KEY_NULL_COUNT_EXACT "ARROW:null_count:exact"
0097 #  define ARROW_STATISTICS_KEY_NULL_COUNT_APPROXIMATE "ARROW:null_count:approximate"
0098 #  define ARROW_STATISTICS_KEY_ROW_COUNT_EXACT "ARROW:row_count:exact"
0099 #  define ARROW_STATISTICS_KEY_ROW_COUNT_APPROXIMATE "ARROW:row_count:approximate"
0100 
0101 #endif  // ARROW_C_DATA_INTERFACE
0102 
0103 #ifndef ARROW_C_DEVICE_DATA_INTERFACE
0104 #  define ARROW_C_DEVICE_DATA_INTERFACE
0105 
0106 // Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html
0107 
0108 // DeviceType for the allocated memory
0109 typedef int32_t ArrowDeviceType;
0110 
0111 // CPU device, same as using ArrowArray directly
0112 #  define ARROW_DEVICE_CPU 1
0113 // CUDA GPU Device
0114 #  define ARROW_DEVICE_CUDA 2
0115 // Pinned CUDA CPU memory by cudaMallocHost
0116 #  define ARROW_DEVICE_CUDA_HOST 3
0117 // OpenCL Device
0118 #  define ARROW_DEVICE_OPENCL 4
0119 // Vulkan buffer for next-gen graphics
0120 #  define ARROW_DEVICE_VULKAN 7
0121 // Metal for Apple GPU
0122 #  define ARROW_DEVICE_METAL 8
0123 // Verilog simulator buffer
0124 #  define ARROW_DEVICE_VPI 9
0125 // ROCm GPUs for AMD GPUs
0126 #  define ARROW_DEVICE_ROCM 10
0127 // Pinned ROCm CPU memory allocated by hipMallocHost
0128 #  define ARROW_DEVICE_ROCM_HOST 11
0129 // Reserved for extension
0130 #  define ARROW_DEVICE_EXT_DEV 12
0131 // CUDA managed/unified memory allocated by cudaMallocManaged
0132 #  define ARROW_DEVICE_CUDA_MANAGED 13
0133 // unified shared memory allocated on a oneAPI non-partitioned device.
0134 #  define ARROW_DEVICE_ONEAPI 14
0135 // GPU support for next-gen WebGPU standard
0136 #  define ARROW_DEVICE_WEBGPU 15
0137 // Qualcomm Hexagon DSP
0138 #  define ARROW_DEVICE_HEXAGON 16
0139 
0140 struct ArrowDeviceArray {
0141   // the Allocated Array
0142   //
0143   // the buffers in the array (along with the buffers of any
0144   // children) are what is allocated on the device.
0145   struct ArrowArray array;
0146   // The device id to identify a specific device
0147   int64_t device_id;
0148   // The type of device which can access this memory.
0149   ArrowDeviceType device_type;
0150   // An event-like object to synchronize on if needed.
0151   void* sync_event;
0152   // Reserved bytes for future expansion.
0153   int64_t reserved[3];
0154 };
0155 
0156 #endif  // ARROW_C_DEVICE_DATA_INTERFACE
0157 
0158 #ifndef ARROW_C_STREAM_INTERFACE
0159 #  define ARROW_C_STREAM_INTERFACE
0160 
0161 struct ArrowArrayStream {
0162   // Callback to get the stream type
0163   // (will be the same for all arrays in the stream).
0164   //
0165   // Return value: 0 if successful, an `errno`-compatible error code otherwise.
0166   //
0167   // If successful, the ArrowSchema must be released independently from the stream.
0168   int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
0169 
0170   // Callback to get the next array
0171   // (if no error and the array is released, the stream has ended)
0172   //
0173   // Return value: 0 if successful, an `errno`-compatible error code otherwise.
0174   //
0175   // If successful, the ArrowArray must be released independently from the stream.
0176   int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
0177 
0178   // Callback to get optional detailed error information.
0179   // This must only be called if the last stream operation failed
0180   // with a non-0 return code.
0181   //
0182   // Return value: pointer to a null-terminated character array describing
0183   // the last error, or NULL if no description is available.
0184   //
0185   // The returned pointer is only valid until the next operation on this stream
0186   // (including release).
0187   const char* (*get_last_error)(struct ArrowArrayStream*);
0188 
0189   // Release callback: release the stream's own resources.
0190   // Note that arrays returned by `get_next` must be individually released.
0191   void (*release)(struct ArrowArrayStream*);
0192 
0193   // Opaque producer-specific data
0194   void* private_data;
0195 };
0196 
0197 #endif  // ARROW_C_STREAM_INTERFACE
0198 
0199 #ifndef ARROW_C_DEVICE_STREAM_INTERFACE
0200 #  define ARROW_C_DEVICE_STREAM_INTERFACE
0201 
0202 // Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
0203 //
0204 // This stream is intended to provide a stream of data on a single
0205 // device, if a producer wants data to be produced on multiple devices
0206 // then multiple streams should be provided. One per device.
0207 struct ArrowDeviceArrayStream {
0208   // The device that this stream produces data on.
0209   ArrowDeviceType device_type;
0210 
0211   // Callback to get the stream schema
0212   // (will be the same for all arrays in the stream).
0213   //
0214   // Return value 0 if successful, an `errno`-compatible error code otherwise.
0215   //
0216   // If successful, the ArrowSchema must be released independently from the stream.
0217   // The schema should be accessible via CPU memory.
0218   int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out);
0219 
0220   // Callback to get the next array
0221   // (if no error and the array is released, the stream has ended)
0222   //
0223   // Return value: 0 if successful, an `errno`-compatible error code otherwise.
0224   //
0225   // If successful, the ArrowDeviceArray must be released independently from the stream.
0226   int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out);
0227 
0228   // Callback to get optional detailed error information.
0229   // This must only be called if the last stream operation failed
0230   // with a non-0 return code.
0231   //
0232   // Return value: pointer to a null-terminated character array describing
0233   // the last error, or NULL if no description is available.
0234   //
0235   // The returned pointer is only valid until the next operation on this stream
0236   // (including release).
0237   const char* (*get_last_error)(struct ArrowDeviceArrayStream* self);
0238 
0239   // Release callback: release the stream's own resources.
0240   // Note that arrays returned by `get_next` must be individually released.
0241   void (*release)(struct ArrowDeviceArrayStream* self);
0242 
0243   // Opaque producer-specific data
0244   void* private_data;
0245 };
0246 
0247 #endif  // ARROW_C_DEVICE_STREAM_INTERFACE
0248 
0249 #ifndef ARROW_C_ASYNC_STREAM_INTERFACE
0250 #  define ARROW_C_ASYNC_STREAM_INTERFACE
0251 
0252 // EXPERIMENTAL: ArrowAsyncTask represents available data from a producer that was passed
0253 // to an invocation of `on_next_task` on the ArrowAsyncDeviceStreamHandler.
0254 //
0255 // The reason for this Task approach instead of the Async interface returning
0256 // the Array directly is to allow for more complex thread handling and reducing
0257 // context switching and data transfers between CPU cores (e.g. from one L1/L2
0258 // cache to another) if desired.
0259 //
0260 // For example, the `on_next_task` callback can be called when data is ready, while
0261 // the producer puts potential "decoding" logic in the `ArrowAsyncTask` object. This
0262 // allows for the producer to manage the I/O on one thread which calls `on_next_task`
0263 // and the consumer can determine when the decoding (producer logic in the `extract_data`
0264 // callback of the task) occurs and on which thread, to avoid a CPU core transfer
0265 // (data staying in the L2 cache).
0266 struct ArrowAsyncTask {
0267   // This callback should populate the ArrowDeviceArray associated with this task.
0268   // The order of ArrowAsyncTasks provided by the producer enables a consumer to
0269   // ensure the order of data to process.
0270   //
0271   // This function is expected to be synchronous, but should not perform any blocking
0272   // I/O. Ideally it should be as cheap as possible so as to not tie up the consumer
0273   // thread unnecessarily.
0274   //
0275   // Returns: 0 if successful, errno-compatible error otherwise.
0276   //
0277   // If a non-0 value is returned then it should be followed by a call to `on_error`
0278   // on the appropriate ArrowAsyncDeviceStreamHandler. This is because it's highly
0279   // likely that whatever is calling this function may be entirely disconnected from
0280   // the current control flow. Indicating an error here with a non-zero return allows
0281   // the current flow to be aware of the error occurring, while still allowing any
0282   // logging or error handling to still be centralized in the `on_error` callback of
0283   // the original Async handler.
0284   //
0285   // Rather than a release callback, any required cleanup should be performed as part
0286   // of the invocation of `extract_data`. Ownership of the Array is passed to the consumer
0287   // calling this, and so it must be released separately.
0288   //
0289   // It is only valid to call this method exactly once.
0290   int (*extract_data)(struct ArrowAsyncTask* self, struct ArrowDeviceArray* out);
0291 
0292   // opaque task-specific data
0293   void* private_data;
0294 };
0295 
0296 // EXPERIMENTAL: ArrowAsyncProducer represents a 1-to-1 relationship between an async
0297 // producer and consumer. This object allows the consumer to perform backpressure and flow
0298 // control on the asynchronous stream processing. This object must be owned by the
0299 // producer who creates it, and thus is responsible for cleaning it up.
0300 struct ArrowAsyncProducer {
0301   // The device type that this stream produces data on.
0302   ArrowDeviceType device_type;
0303 
0304   // A consumer must call this function to start receiving on_next_task calls.
0305   //
0306   // It *must* be valid to call this synchronously from within `on_next_task` or
0307   // `on_schema`, but this function *must not* immediately call `on_next_task` so as
0308   // to avoid recursion and reentrant callbacks.
0309   //
0310   // After cancel has been called, additional calls to this function must be NOPs,
0311   // but allowed. While not cancelled, calling this function must register the
0312   // given number of additional arrays/batches to be produced with the producer.
0313   // The producer should only call `on_next_task` at most the registered number
0314   // of arrays before propagating backpressure.
0315   //
0316   // Any error encountered by calling request must be propagated by calling the `on_error`
0317   // callback of the ArrowAsyncDeviceStreamHandler.
0318   //
0319   // While not cancelled, any subsequent calls to `on_next_task`, `on_error` or
0320   // `release` should be scheduled by the producer to be called later.
0321   //
0322   // It is invalid for a consumer to call this with a value of n <= 0, producers should
0323   // error if given such a value.
0324   void (*request)(struct ArrowAsyncProducer* self, int64_t n);
0325 
0326   // This cancel callback signals a producer that it must eventually stop making calls
0327   // to on_next_task. It must be idempotent and thread-safe. After calling cancel once,
0328   // subsequent calls must be NOPs. This must not call any consumer-side handlers other
0329   // than `on_error`.
0330   //
0331   // It is not required that calling cancel affect the producer immediately, only that it
0332   // must eventually stop calling on_next_task and subsequently call release on the
0333   // async handler. As such, a consumer must be prepared to receive one or more calls to
0334   // `on_next_task` even after calling cancel if there are still requested arrays pending.
0335   //
0336   // Successful cancellation should *not* result in the producer calling `on_error`, it
0337   // should finish out any remaining tasks and eventually call `release`.
0338   //
0339   // Any error encountered during handling a call to cancel must be reported via the
0340   // on_error callback on the async stream handler.
0341   void (*cancel)(struct ArrowAsyncProducer* self);
0342 
0343   // Any additional metadata tied to a specific stream of data. This must either be NULL
0344   // or a valid pointer to metadata which is encoded in the same way schema metadata
0345   // would be. Non-null metadata must be valid for the lifetime of this object. As an
0346   // example a producer could use this to provide the total number of rows and/or batches
0347   // in the stream if known.
0348   const char* additional_metadata;
0349 
0350   // producer-specific opaque data.
0351   void* private_data;
0352 };
0353 
0354 // EXPERIMENTAL: Similar to ArrowDeviceArrayStream, except designed for an asynchronous
0355 // style of interaction. While ArrowDeviceArrayStream provides producer
0356 // defined callbacks, this is intended to be created by the consumer instead.
0357 // The consumer passes this handler to the producer, which in turn uses the
0358 // callbacks to inform the consumer of events in the stream.
0359 struct ArrowAsyncDeviceStreamHandler {
0360   // Handler for receiving a schema. The passed in stream_schema must be
0361   // released or moved by the handler (producer is giving ownership of the schema to
0362   // the handler, but not ownership of the top level object itself).
0363   //
0364   // With the exception of an error occurring (on_error), this must be the first
0365   // callback function which is called by a producer and must only be called exactly
0366   // once. As such, the producer should provide a valid ArrowAsyncProducer instance
0367   // so the consumer can control the flow. See the documentation on ArrowAsyncProducer
0368   // for how it works. The ArrowAsyncProducer is owned by the producer who calls this
0369   // function and thus the producer is responsible for cleaning it up when calling
0370   // the release callback of this handler.
0371   //
0372   // If there is any additional metadata tied to this stream, it will be provided as
0373   // a non-null value for the `additional_metadata` field of the ArrowAsyncProducer
0374   // which will be valid at least until the release callback is called.
0375   //
0376   // Return value: 0 if successful, `errno`-compatible error otherwise
0377   //
0378   // A producer that receives a non-zero return here should stop producing and eventually
0379   // call release instead.
0380   int (*on_schema)(struct ArrowAsyncDeviceStreamHandler* self,
0381                    struct ArrowSchema* stream_schema);
0382 
0383   // Handler for receiving data. This is called when data is available providing an
0384   // ArrowAsyncTask struct to signify it. The producer indicates the end of the stream
0385   // by passing NULL as the value for the task rather than a valid pointer to a task.
0386   // The task object is only valid for the lifetime of this function call, if a consumer
0387   // wants to utilize it after this function returns, it must copy or move the contents
0388   // of it to a new ArrowAsyncTask object.
0389   //
0390   // The `request` callback of a provided ArrowAsyncProducer must be called in order
0391   // to start receiving calls to this handler.
0392   //
0393   // The metadata argument can be null or can be used by a producer
0394   // to pass arbitrary extra information to the consumer (such as total number
0395   // of rows, context info, or otherwise). The data should be passed using the same
0396   // encoding as the metadata within the ArrowSchema struct itself (defined in
0397   // the spec at
0398   // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata)
0399   //
0400   // If metadata is non-null then it only needs to exist for the lifetime of this call,
0401   // a consumer who wants it to live after that must copy it to ensure lifetime.
0402   //
0403   // A producer *must not* call this concurrently from multiple different threads.
0404   //
0405   // A consumer must be prepared to receive one or more calls to this callback even
0406   // after calling cancel on the corresponding ArrowAsyncProducer, as cancel does not
0407   // guarantee it happens immediately.
0408   //
0409   // Return value: 0 if successful, `errno`-compatible error otherwise.
0410   //
0411   // If the consumer returns a non-zero return from this method, that indicates to the
0412   // producer that it should stop propagating data as an error occurred. After receiving
0413   // such a return, the only interaction with this object is for the producer to call
0414   // the `release` callback.
0415   int (*on_next_task)(struct ArrowAsyncDeviceStreamHandler* self,
0416                       struct ArrowAsyncTask* task, const char* metadata);
0417 
0418   // Handler for encountering an error. The producer should call release after
0419   // this returns to clean up any resources. The `code` passed in can be any error
0420   // code that a producer wants, but should be errno-compatible for consistency.
0421   //
0422   // If the message or metadata are non-null, they will only last as long as this
0423   // function call. The consumer would need to perform a copy of the data if it is
0424   // necessary for them to live past the lifetime of this call.
0425   //
0426   // Error metadata should be encoded as with metadata in ArrowSchema, defined in
0427   // the spec at
0428   // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata
0429   //
0430   // It is valid for this to be called by a producer with or without a preceding call
0431   // to ArrowAsyncProducer.request.
0432   //
0433   // This callback must not call any methods of an ArrowAsyncProducer object.
0434   void (*on_error)(struct ArrowAsyncDeviceStreamHandler* self, int code,
0435                    const char* message, const char* metadata);
0436 
0437   // Release callback to release any resources for the handler. Should always be
0438   // called by a producer when it is done utilizing a handler. No callbacks should
0439   // be called after this is called.
0440   //
0441   // It is valid for the release callback to be called by a producer with or without
0442   // a preceding call to ArrowAsyncProducer.request.
0443   //
0444   // The release callback must not call any methods of an ArrowAsyncProducer object.
0445   void (*release)(struct ArrowAsyncDeviceStreamHandler* self);
0446 
0447   // MUST be populated by the producer BEFORE calling any callbacks other than release.
0448   // This provides the connection between a handler and its producer, and must exist until
0449   // the release callback is called.
0450   struct ArrowAsyncProducer* producer;
0451 
0452   // Opaque handler-specific data
0453   void* private_data;
0454 };
0455 
0456 #endif  // ARROW_C_ASYNC_STREAM_INTERFACE
0457 
0458 #ifdef __cplusplus
0459 }
0460 #endif