File indexing completed on 2025-01-18 10:10:52
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
0018 #define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
0019
0020 #ifdef R__USE_IMT
0021 #define DL_USE_MTE
0022 #endif
0023
0024 #include <cstddef>
0025 #include <vector>
0026
0027 #include "TMatrix.h"
0028 #include "TMVA/Config.h"
0029 #include "CpuBuffer.h"
0030
0031
0032 #if defined(DEBUG_TMVA_TCPUMATRIX)
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045 #define TMVA_DNN_PrintTCpuMatrix(mat, text) \
0046 { \
0047 auto _dpointer = mat.GetRawDataPointer(); \
0048 if (!_dpointer) { \
0049 std::cout << #mat << " is null pointer" << std::endl; \
0050 exit(1); \
0051 } \
0052 auto _nrows = mat.GetNrows(); \
0053 auto _ncols = mat.GetNcols(); \
0054 std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
0055 << "--------------------" << std::endl; \
0056 for (size_t _i = 0; _i < _nrows; _i++) { \
0057 for (size_t _j = 0; _j < _ncols; _j++) { \
0058 std::cout << mat(_i, _j); \
0059 if (_j < _ncols - 1) \
0060 std::cout << ","; \
0061 } \
0062 std::cout << std::endl; \
0063 } \
0064 }
0065 #else
0066 #define TMVA_DNN_PrintTCpuMatrix(mat, text)
0067 #endif
0068
0069 namespace TMVA {
0070 namespace DNN {
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 template <typename AFloat>
0086 class TCpuMatrix {
0087 private:
0088 static std::vector<AFloat> fOnes;
0089
0090 public:
0091 TCpuBuffer<AFloat> fBuffer;
0092
0093 private:
0094 size_t fNCols;
0095 size_t fNRows;
0096
0097 public:
0098
0099
0100
0101
0102
0103
0104 TCpuBuffer<AFloat>& GetBuffer() {return fBuffer;}
0105 const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}
0106
0107 TCpuBuffer<AFloat> &GetDeviceBuffer() { return fBuffer; }
0108 const TCpuBuffer<AFloat> &GetDeviceBuffer() const { return fBuffer; }
0109
0110 static const AFloat *GetOnePointer() { return fOnes.data(); }
0111
0112 static size_t GetOnePointerSize() { return fOnes.size(); }
0113
0114 static void InitializeOneVector(size_t n);
0115
0116 TCpuMatrix() : fNCols(0), fNRows(0) {}
0117
0118
0119 TCpuMatrix(size_t nRows, size_t nCols);
0120
0121
0122 TCpuMatrix(const TMatrixT<AFloat> &);
0123
0124
0125 TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
0126
0127
0128 TCpuMatrix<AFloat> &operator=(const TMatrixT<AFloat> &);
0129
0130
0131 TCpuMatrix(const TCpuMatrix &) = default;
0132 TCpuMatrix(TCpuMatrix &&) = default;
0133 TCpuMatrix &operator=(const TCpuMatrix &) = default;
0134 TCpuMatrix &operator=(TCpuMatrix &&) = default;
0135 ~TCpuMatrix() = default;
0136
0137
0138
0139 void Zero();
0140
0141
0142
0143 operator TMatrixT<AFloat>() const;
0144
0145
0146
0147 template <typename Function_t>
0148 void Map(Function_t &f);
0149
0150
0151
0152 template <typename Function_t>
0153 void MapFrom(Function_t &f, const TCpuMatrix &A);
0154
0155 size_t GetNrows() const { return fNRows; }
0156 size_t GetNcols() const { return fNCols; }
0157 size_t GetNoElements() const { return fNRows * fNCols; }
0158 size_t GetSize() const { return fNRows * fNCols; }
0159
0160
0161 AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }
0162 AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }
0163
0164
0165
0166 AFloat *GetRawDataPointer() { return fBuffer; }
0167 const AFloat *GetRawDataPointer() const { return fBuffer; }
0168
0169 static Executor &GetThreadExecutor() { return TMVA::Config::Instance().GetThreadExecutor(); }
0170
0171
0172 static size_t GetNWorkItems(size_t nelements);
0173
0174
0175 void Print() const
0176 {
0177 TCpuMatrix cpuMatrix = *this;
0178 TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");
0179 }
0180
0181 private:
0182 void Initialize();
0183 };
0184
0185 template <typename AFloat>
0186 std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};
0187
0188
0189
0190 template <typename AFloat>
0191 size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
0192 {
0193
0194
0195
0196 const size_t minElements = 1000;
0197 const size_t nCpu = TMVA::Config::Instance().GetNCpu();
0198 if (nElements <= minElements)
0199 return nElements;
0200 if (nElements < nCpu * minElements) {
0201 size_t nt = nElements / minElements;
0202 return nElements / nt;
0203 }
0204 return nElements / nCpu;
0205
0206
0207 }
0208
0209
0210 template <typename AFloat>
0211 template <typename Function_t>
0212 inline void TCpuMatrix<AFloat>::Map(Function_t &f)
0213 {
0214 AFloat *data = GetRawDataPointer();
0215 size_t nelements = GetNoElements();
0216 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
0217
0218 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
0219 size_t jMax = std::min(workerID + nsteps, nelements);
0220 for (size_t j = workerID; j < jMax; ++j) {
0221 data[j] = f(data[j]);
0222 }
0223 return 0;
0224 };
0225
0226 if (nsteps < nelements) {
0227 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
0228
0229
0230
0231
0232 } else {
0233 R__ASSERT(nelements == nsteps);
0234 ff(0);
0235 }
0236 }
0237
0238
0239 template <typename AFloat>
0240 template <typename Function_t>
0241 inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
0242 {
0243 AFloat *dataB = GetRawDataPointer();
0244 const AFloat *dataA = A.GetRawDataPointer();
0245
0246 size_t nelements = GetNoElements();
0247 R__ASSERT(nelements == A.GetNoElements());
0248 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
0249
0250 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
0251 size_t jMax = std::min(workerID + nsteps, nelements);
0252 for (size_t j = workerID; j < jMax; ++j) {
0253 dataB[j] = f(dataA[j]);
0254 }
0255 return 0;
0256 };
0257 if (nsteps < nelements) {
0258 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
0259
0260
0261
0262 } else {
0263 R__ASSERT(nelements == nsteps);
0264 ff(0);
0265 }
0266 }
0267
0268 template <typename AFloat>
0269 void TCpuMatrix<AFloat>::Zero()
0270 {
0271 for (size_t j = 0; j < fNCols; j++) {
0272 for (size_t i = 0; i < fNRows; i++) {
0273 (*this)(i, j) = 0;
0274 }
0275 }
0276 }
0277
0278 }
0279 }
0280
0281 #endif