Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:19

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 /// User-defined extension types.
0019 /// \since 0.13.0
0020 
0021 #pragma once
0022 
0023 #include <memory>
0024 #include <string>
0025 
0026 #include "arrow/array/array_base.h"
0027 #include "arrow/array/data.h"
0028 #include "arrow/result.h"
0029 #include "arrow/status.h"
0030 #include "arrow/type.h"
0031 #include "arrow/type_fwd.h"
0032 #include "arrow/util/checked_cast.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/visibility.h"
0035 
0036 namespace arrow {
0037 
0038 /// \brief The base class for custom / user-defined types.
0039 class ARROW_EXPORT ExtensionType : public DataType {
0040  public:
0041   static constexpr Type::type type_id = Type::EXTENSION;
0042 
0043   static constexpr const char* type_name() { return "extension"; }
0044 
0045   /// \brief The type of array used to represent this extension type's data
0046   const std::shared_ptr<DataType>& storage_type() const { return storage_type_; }
0047 
0048   /// \brief Return the type category of the storage type
0049   Type::type storage_id() const override { return storage_type_->id(); }
0050 
0051   DataTypeLayout layout() const override;
0052 
0053   std::string ToString(bool show_metadata = false) const override;
0054 
0055   std::string name() const override { return "extension"; }
0056 
0057   int32_t byte_width() const override { return storage_type_->byte_width(); }
0058   int bit_width() const override { return storage_type_->bit_width(); }
0059 
0060   /// \brief Unique name of extension type used to identify type for
0061   /// serialization
0062   /// \return the string name of the extension
0063   virtual std::string extension_name() const = 0;
0064 
0065   /// \brief Determine if two instances of the same extension types are
0066   /// equal. Invoked from ExtensionType::Equals
0067   /// \param[in] other the type to compare this type with
0068   /// \return bool true if type instances are equal
0069   virtual bool ExtensionEquals(const ExtensionType& other) const = 0;
0070 
0071   /// \brief Wrap built-in Array type in a user-defined ExtensionArray instance
0072   /// \param[in] data the physical storage for the extension type
0073   virtual std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const = 0;
0074 
0075   /// \brief Create an instance of the ExtensionType given the actual storage
0076   /// type and the serialized representation
0077   /// \param[in] storage_type the physical storage type of the extension
0078   /// \param[in] serialized_data the serialized representation produced by
0079   /// Serialize
0080   virtual Result<std::shared_ptr<DataType>> Deserialize(
0081       std::shared_ptr<DataType> storage_type,
0082       const std::string& serialized_data) const = 0;
0083 
0084   /// \brief Create a serialized representation of the extension type's
0085   /// metadata. The storage type will be handled automatically in IPC code
0086   /// paths
0087   /// \return the serialized representation
0088   virtual std::string Serialize() const = 0;
0089 
0090   /// \brief Wrap the given storage array as an extension array
0091   static std::shared_ptr<Array> WrapArray(const std::shared_ptr<DataType>& ext_type,
0092                                           const std::shared_ptr<Array>& storage);
0093 
0094   /// \brief Wrap the given chunked storage array as a chunked extension array
0095   static std::shared_ptr<ChunkedArray> WrapArray(
0096       const std::shared_ptr<DataType>& ext_type,
0097       const std::shared_ptr<ChunkedArray>& storage);
0098 
0099  protected:
0100   explicit ExtensionType(std::shared_ptr<DataType> storage_type)
0101       : DataType(Type::EXTENSION), storage_type_(std::move(storage_type)) {}
0102 
0103   std::shared_ptr<DataType> storage_type_;
0104 };
0105 
0106 /// \brief Base array class for user-defined extension types
0107 class ARROW_EXPORT ExtensionArray : public Array {
0108  public:
0109   using TypeClass = ExtensionType;
0110   /// \brief Construct an ExtensionArray from an ArrayData.
0111   ///
0112   /// The ArrayData must have the right ExtensionType.
0113   explicit ExtensionArray(const std::shared_ptr<ArrayData>& data);
0114 
0115   /// \brief Construct an ExtensionArray from a type and the underlying storage.
0116   ExtensionArray(const std::shared_ptr<DataType>& type,
0117                  const std::shared_ptr<Array>& storage);
0118 
0119   const ExtensionType* extension_type() const {
0120     return internal::checked_cast<const ExtensionType*>(data_->type.get());
0121   }
0122 
0123   /// \brief The physical storage for the extension array
0124   const std::shared_ptr<Array>& storage() const { return storage_; }
0125 
0126  protected:
0127   void SetData(const std::shared_ptr<ArrayData>& data);
0128   std::shared_ptr<Array> storage_;
0129 };
0130 
0131 class ARROW_EXPORT ExtensionTypeRegistry {
0132  public:
0133   /// \brief Provide access to the global registry to allow code to control for
0134   /// race conditions in registry teardown when some types need to be
0135   /// unregistered and destroyed first
0136   static std::shared_ptr<ExtensionTypeRegistry> GetGlobalRegistry();
0137 
0138   virtual ~ExtensionTypeRegistry() = default;
0139 
0140   virtual Status RegisterType(std::shared_ptr<ExtensionType> type) = 0;
0141   virtual Status UnregisterType(const std::string& type_name) = 0;
0142   virtual std::shared_ptr<ExtensionType> GetType(const std::string& type_name) = 0;
0143 };
0144 
0145 /// \brief Register an extension type globally. The name returned by the type's
0146 /// extension_name() method should be unique. This method is thread-safe
0147 /// \param[in] type an instance of the extension type
0148 /// \return Status
0149 ARROW_EXPORT
0150 Status RegisterExtensionType(std::shared_ptr<ExtensionType> type);
0151 
0152 /// \brief Delete an extension type from the global registry. This method is
0153 /// thread-safe
0154 /// \param[in] type_name the unique name of a registered extension type
0155 /// \return Status error if the type name is unknown
0156 ARROW_EXPORT
0157 Status UnregisterExtensionType(const std::string& type_name);
0158 
0159 /// \brief Retrieve an extension type from the global registry. Returns nullptr
0160 /// if not found. This method is thread-safe
0161 /// \return the globally-registered extension type
0162 ARROW_EXPORT
0163 std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name);
0164 
0165 ARROW_EXPORT extern const char kExtensionTypeKeyName[];
0166 ARROW_EXPORT extern const char kExtensionMetadataKeyName[];
0167 
0168 }  // namespace arrow