File indexing completed on 2025-01-18 10:10:52
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
0018 #define TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
0019
0020 #include <cstddef>
0021
0022
0023 #include "TMatrix.h"
0024 #include "TMVA/Config.h"
0025 #include "CpuBuffer.h"
0026 #include "CpuMatrix.h"
0027 #include <TMVA/RTensor.hxx>
0028
0029 namespace TMVA {
0030 namespace DNN {
0031
0032
0033
0034
0035
0036
0037
0038
0039 template <typename AFloat>
0040 class TCpuTensor : public TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>> {
0041
0042 private:
0043
0044 public:
0045 friend class TCpuMatrix<AFloat>;
0046
0047 using Shape_t = typename TMVA::Experimental::RTensor<AFloat>::Shape_t;
0048 using MemoryLayout = TMVA::Experimental::MemoryLayout;
0049 using Matrix_t = TCpuMatrix<AFloat>;
0050 using Scalar_t = AFloat;
0051
0052
0053 TCpuTensor(): TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(0), {0})
0054 {}
0055
0056
0057 TCpuTensor(size_t n, size_t m, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0058 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(n * m), {n, m}, memlayout)
0059 {}
0060
0061
0062 TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0063 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * hw), {depth, hw, bsize}, memlayout)
0064 {
0065 if (memlayout == MemoryLayout::RowMajor)
0066 this->ReshapeInplace({bsize, depth, hw});
0067 }
0068
0069
0070 TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width,
0071 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0072 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * height * width),
0073 {depth, height, width, bsize}, memlayout)
0074 {
0075 if (memlayout == MemoryLayout::RowMajor)
0076 this->ReshapeInplace({bsize, depth, height, width});
0077 }
0078
0079
0080 TCpuTensor(Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0081 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)),
0082 shape, memlayout)
0083 {}
0084
0085
0086
0087 TCpuTensor(AFloat *data, const Shape_t &shape,
0088 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0089 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)), shape, memlayout)
0090 {
0091 auto& container = *(this->GetContainer());
0092 for (size_t i = 0; i < this->GetSize(); ++i) container[i] = data[i];
0093 }
0094
0095
0096
0097
0098
0099 TCpuTensor(const TCpuBuffer<AFloat>& buffer, Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0100 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(buffer), shape, memlayout) {
0101 R__ASSERT(this->GetSize() <= this->GetContainer()->GetSize());
0102 }
0103
0104
0105
0106
0107
0108 TCpuTensor(const TCpuMatrix<AFloat> &matrix, size_t dim = 3, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
0109 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(matrix.GetBuffer()),{matrix.GetNrows(), matrix.GetNcols()}, memlayout)
0110 {
0111
0112 if (dim > 2) {
0113 Shape_t shape = this->GetShape();
0114
0115 if (this->GetLayout() == MemoryLayout::ColumnMajor) {
0116 shape.insert(shape.end(),dim-2, 1);
0117 } else {
0118 shape.insert(shape.begin(), dim - 2, 1);
0119 }
0120 this->ReshapeInplace(shape);
0121 }
0122 }
0123
0124
0125
0126
0127
0128 operator TMatrixT<AFloat>() const {
0129
0130 if (this->GetShape().size() == 2 || (this->GetShape().size() == 3 && GetFirstSize() == 1)) {
0131 TCpuMatrix<AFloat> temp = GetMatrix();
0132 return temp;
0133 }
0134
0135 return TMatrixT<AFloat>(1, this->GetSize(), this->GetData());
0136 }
0137
0138
0139
0140
0141 AFloat *GetRawDataPointer() { return *(this->GetContainer()); }
0142 const AFloat *GetRawDataPointer() const { return *(this->GetContainer()); }
0143
0144
0145 const TCpuBuffer<AFloat> & GetDeviceBuffer() const {return *(this->GetContainer());}
0146 TCpuBuffer<AFloat> & GetDeviceBuffer() {return *(this->GetContainer());}
0147
0148
0149 size_t GetNoElements() const { return this->GetSize(); }
0150
0151
0152
0153
0154
0155 size_t GetFirstSize() const
0156 {
0157 auto& shape = this->GetShape();
0158 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.back() : shape.front();
0159 }
0160
0161 size_t GetCSize() const
0162 {
0163 auto& shape = this->GetShape();
0164 if (shape.size() == 2) return 1;
0165 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.front() : shape[1];
0166 }
0167
0168 size_t GetHSize() const
0169 {
0170 auto& shape = this->GetShape();
0171 if (shape.size() == 2) return shape[0];
0172 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[0] : shape[1] ;
0173 if (shape.size() >= 4) return shape[2] ;
0174 return 0;
0175
0176 }
0177 size_t GetWSize() const
0178 {
0179 auto& shape = this->GetShape();
0180 if (shape.size() == 2) return shape[1];
0181 if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[1] : shape[2] ;
0182 if (shape.size() >= 4) return shape[3] ;
0183 return 0;
0184
0185 }
0186
0187
0188
0189
0190 size_t GetNrows() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetStrides().back() : this->GetShape().front();}
0191 size_t GetNcols() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetShape().back() : this->GetStrides().front(); }
0192
0193
0194 MemoryLayout GetLayout() const { return this->GetMemoryLayout(); }
0195
0196
0197 TCpuMatrix<AFloat> GetMatrix() const
0198 {
0199 [[maybe_unused]] size_t ndims = 0;
0200 auto& shape = this->GetShape();
0201
0202 for (auto& shape_i : shape){
0203 if (shape_i != 1) {
0204 ndims++;
0205 }
0206 }
0207 assert(ndims <= 2 && shape.size() > 1);
0208 return TCpuMatrix<AFloat>(*(this->GetContainer()), GetHSize(), GetWSize());
0209 }
0210
0211
0212 TCpuTensor<AFloat> Reshape(Shape_t shape) const
0213 {
0214 TCpuTensor<AFloat> x(*this);
0215 x.ReshapeInplace(shape);
0216 return x;
0217 }
0218
0219
0220
0221 TCpuTensor<AFloat> At(size_t i)
0222 {
0223 auto &shape = this->GetShape();
0224 auto layout = this->GetMemoryLayout();
0225 Shape_t sliced_shape = (layout == MemoryLayout::RowMajor) ? Shape_t(shape.begin() + 1, shape.end())
0226 : Shape_t(shape.begin(), shape.end() - 1);
0227
0228 size_t buffsize = (layout == MemoryLayout::RowMajor) ? this->GetStrides().front() : this->GetStrides().back();
0229 size_t offset = i * buffsize;
0230
0231 return TCpuTensor<AFloat>(this->GetContainer()->GetSubBuffer(offset, buffsize), sliced_shape, layout);
0232 }
0233
0234 TCpuTensor<AFloat> At(size_t i) const { return (const_cast<TCpuTensor<AFloat> &>(*this)).At(i); }
0235
0236
0237 TCpuMatrix<AFloat> operator[](size_t i) const {
0238 assert(this->GetMemoryLayout() == MemoryLayout::ColumnMajor );
0239 return At(i).GetMatrix();
0240 }
0241
0242
0243 void Zero()
0244 {
0245 AFloat *data = *(this->GetContainer());
0246 for (size_t i = 0; i < this->GetSize(); ++i)
0247 data[i] = 0;
0248 }
0249
0250
0251 AFloat &operator()(size_t i, size_t j)
0252 {
0253 auto &shape = this->GetShape();
0254 assert(shape.size() == 2);
0255 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (*(this->GetContainer()))[i * shape[1] + j]
0256 : (*(this->GetContainer()))[j * shape[0] + i];
0257 }
0258
0259
0260
0261 AFloat &operator()(size_t i, size_t j, size_t k)
0262 {
0263 auto &shape = this->GetShape();
0264 assert(shape.size() == 3);
0265
0266 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
0267 ? (*(this->GetContainer()))[i * shape[1] * shape[2] + j * shape[2] + k]
0268 : (*(this->GetContainer()))[i * shape[0] * shape[1] + k * shape[0] + j];
0269 }
0270
0271
0272 AFloat operator()(size_t i, size_t j) const
0273 {
0274 auto &shape = this->GetShape();
0275 assert(shape.size() == 2);
0276 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (this->GetData())[i * shape[1] + j]
0277 : (this->GetData())[j * shape[0] + i];
0278 }
0279
0280 AFloat operator()(size_t i, size_t j, size_t k) const
0281 {
0282 auto &shape = this->GetShape();
0283 assert(shape.size() == 3);
0284
0285 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
0286 ? (this->GetData())[i * shape[1] * shape[2] + j * shape[2] + k]
0287 : (this->GetData())[i * shape[0] * shape[1] + k * shape[0] + j];
0288 }
0289
0290
0291
0292 template <typename Function_t>
0293 void Map(Function_t & f);
0294
0295
0296
0297 template <typename Function_t>
0298 void MapFrom(Function_t & f, const TCpuTensor<AFloat> &A);
0299
0300 size_t GetBufferUseCount() const { return this->GetContainer()->GetUseCount(); }
0301
0302 void Print(const char *name = "Tensor") const
0303 {
0304 PrintShape(name);
0305
0306 for (size_t i = 0; i < this->GetSize(); i++)
0307 std::cout << (this->GetData())[i] << " ";
0308 std::cout << std::endl;
0309 }
0310 void PrintShape(const char *name = "Tensor") const
0311 {
0312 std::string memlayout = (GetLayout() == MemoryLayout::RowMajor) ? "RowMajor" : "ColMajor";
0313 std::cout << name << " shape : { ";
0314 auto &shape = this->GetShape();
0315 for (size_t i = 0; i < shape.size() - 1; ++i)
0316 std::cout << shape[i] << " , ";
0317 std::cout << shape.back() << " } "
0318 << " Layout : " << memlayout << std::endl;
0319 }
0320 };
0321
0322
0323 template <typename AFloat>
0324 template <typename Function_t>
0325 inline void TCpuTensor<AFloat>::Map(Function_t &f)
0326 {
0327 AFloat *data = GetRawDataPointer();
0328 size_t nelements = GetNoElements();
0329 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
0330
0331 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
0332 size_t jMax = std::min(workerID + nsteps, nelements);
0333 for (size_t j = workerID; j < jMax; ++j) {
0334 data[j] = f(data[j]);
0335 }
0336 return 0;
0337 };
0338
0339 if (nsteps < nelements) {
0340 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
0341
0342
0343
0344
0345 } else {
0346 R__ASSERT(nelements == nsteps);
0347 ff(0);
0348 }
0349 }
0350
0351
0352 template <typename AFloat>
0353 template <typename Function_t>
0354 inline void TCpuTensor<AFloat>::MapFrom(Function_t &f, const TCpuTensor<AFloat> &A)
0355 {
0356 AFloat *dataB = GetRawDataPointer();
0357 const AFloat *dataA = A.GetRawDataPointer();
0358
0359 size_t nelements = GetNoElements();
0360 R__ASSERT(nelements == A.GetNoElements());
0361 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
0362
0363 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
0364 size_t jMax = std::min(workerID + nsteps, nelements);
0365 for (size_t j = workerID; j < jMax; ++j) {
0366 dataB[j] = f(dataA[j]);
0367 }
0368 return 0;
0369 };
0370 if (nsteps < nelements) {
0371 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
0372
0373
0374
0375 } else {
0376 R__ASSERT(nelements == nsteps);
0377 ff(0);
0378 }
0379 }
0380
0381
0382 }
0383 }
0384
0385 #endif