File indexing completed on 2025-08-27 08:47:21
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <vector>
0024
0025 #include "arrow/buffer.h"
0026 #include "arrow/compare.h"
0027 #include "arrow/result.h"
0028 #include "arrow/status.h"
0029 #include "arrow/type.h"
0030 #include "arrow/type_traits.h"
0031 #include "arrow/util/macros.h"
0032 #include "arrow/util/visibility.h"
0033
0034 namespace arrow {
0035
0036 static inline bool is_tensor_supported(Type::type type_id) {
0037 switch (type_id) {
0038 case Type::UINT8:
0039 case Type::INT8:
0040 case Type::UINT16:
0041 case Type::INT16:
0042 case Type::UINT32:
0043 case Type::INT32:
0044 case Type::UINT64:
0045 case Type::INT64:
0046 case Type::HALF_FLOAT:
0047 case Type::FLOAT:
0048 case Type::DOUBLE:
0049 return true;
0050 default:
0051 break;
0052 }
0053 return false;
0054 }
0055
0056 namespace internal {
0057
0058 ARROW_EXPORT
0059 Status ComputeRowMajorStrides(const FixedWidthType& type,
0060 const std::vector<int64_t>& shape,
0061 std::vector<int64_t>* strides);
0062
0063 ARROW_EXPORT
0064 Status ComputeColumnMajorStrides(const FixedWidthType& type,
0065 const std::vector<int64_t>& shape,
0066 std::vector<int64_t>* strides);
0067
0068 ARROW_EXPORT
0069 bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
0070 const std::vector<int64_t>& shape,
0071 const std::vector<int64_t>& strides);
0072
0073 ARROW_EXPORT
0074 Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
0075 const std::shared_ptr<Buffer>& data,
0076 const std::vector<int64_t>& shape,
0077 const std::vector<int64_t>& strides,
0078 const std::vector<std::string>& dim_names);
0079
0080 ARROW_EXPORT
0081 Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
0082 MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
0083
0084 }
0085
0086 class ARROW_EXPORT Tensor {
0087 public:
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099 static inline Result<std::shared_ptr<Tensor>> Make(
0100 const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
0101 const std::vector<int64_t>& shape, const std::vector<int64_t>& strides = {},
0102 const std::vector<std::string>& dim_names = {}) {
0103 ARROW_RETURN_NOT_OK(
0104 internal::ValidateTensorParameters(type, data, shape, strides, dim_names));
0105 return std::make_shared<Tensor>(type, data, shape, strides, dim_names);
0106 }
0107
0108 virtual ~Tensor() = default;
0109
0110
0111 Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
0112 const std::vector<int64_t>& shape);
0113
0114
0115 Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
0116 const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
0117
0118
0119 Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
0120 const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
0121 const std::vector<std::string>& dim_names);
0122
0123 std::shared_ptr<DataType> type() const { return type_; }
0124 std::shared_ptr<Buffer> data() const { return data_; }
0125
0126 const uint8_t* raw_data() const { return data_->data(); }
0127 uint8_t* raw_mutable_data() { return data_->mutable_data(); }
0128
0129 const std::vector<int64_t>& shape() const { return shape_; }
0130 const std::vector<int64_t>& strides() const { return strides_; }
0131
0132 int ndim() const { return static_cast<int>(shape_.size()); }
0133
0134 const std::vector<std::string>& dim_names() const { return dim_names_; }
0135 const std::string& dim_name(int i) const;
0136
0137
0138 int64_t size() const;
0139
0140
0141 bool is_mutable() const { return data_->is_mutable(); }
0142
0143
0144 bool is_contiguous() const;
0145
0146
0147 bool is_row_major() const;
0148
0149
0150 bool is_column_major() const;
0151
0152 Type::type type_id() const;
0153
0154 bool Equals(const Tensor& other, const EqualOptions& = EqualOptions::Defaults()) const;
0155
0156
0157 Result<int64_t> CountNonZero() const;
0158
0159
0160 static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
0161 const std::vector<int64_t>& index) {
0162 const int64_t n = static_cast<int64_t>(index.size());
0163 int64_t offset = 0;
0164 for (int64_t i = 0; i < n; ++i) {
0165 offset += index[i] * strides[i];
0166 }
0167 return offset;
0168 }
0169
0170 int64_t CalculateValueOffset(const std::vector<int64_t>& index) const {
0171 return Tensor::CalculateValueOffset(strides_, index);
0172 }
0173
0174
0175 template <typename ValueType>
0176 const typename ValueType::c_type& Value(const std::vector<int64_t>& index) const {
0177 using c_type = typename ValueType::c_type;
0178 const int64_t offset = CalculateValueOffset(index);
0179 const c_type* ptr = reinterpret_cast<const c_type*>(raw_data() + offset);
0180 return *ptr;
0181 }
0182
0183 Status Validate() const {
0184 return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_);
0185 }
0186
0187 protected:
0188 Tensor() {}
0189
0190 std::shared_ptr<DataType> type_;
0191 std::shared_ptr<Buffer> data_;
0192 std::vector<int64_t> shape_;
0193 std::vector<int64_t> strides_;
0194
0195
0196 std::vector<std::string> dim_names_;
0197
0198 template <typename SparseIndexType>
0199 friend class SparseTensorImpl;
0200
0201 private:
0202 ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
0203 };
0204
0205 template <typename TYPE>
0206 class NumericTensor : public Tensor {
0207 public:
0208 using TypeClass = TYPE;
0209 using value_type = typename TypeClass::c_type;
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221 static Result<std::shared_ptr<NumericTensor<TYPE>>> Make(
0222 const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
0223 const std::vector<int64_t>& strides = {},
0224 const std::vector<std::string>& dim_names = {}) {
0225 ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters(
0226 TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names));
0227 return std::make_shared<NumericTensor<TYPE>>(data, shape, strides, dim_names);
0228 }
0229
0230
0231 NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
0232 const std::vector<int64_t>& strides,
0233 const std::vector<std::string>& dim_names)
0234 : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
0235
0236
0237 NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape)
0238 : NumericTensor(data, shape, {}, {}) {}
0239
0240
0241 NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
0242 const std::vector<int64_t>& strides)
0243 : NumericTensor(data, shape, strides, {}) {}
0244
0245 const value_type& Value(const std::vector<int64_t>& index) const {
0246 return Tensor::Value<TypeClass>(index);
0247 }
0248 };
0249
0250 }