File indexing completed on 2025-01-18 10:10:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_GENERALLAYER
0028 #define TMVA_DNN_GENERALLAYER
0029
0030 #include <sstream>
0031 #include <limits>
0032 #include <vector>
0033 #include <string>
0034
0035
0036 #include "TMVA/Tools.h"
0037 #include "TError.h" // for R__ASSERT
0038
0039 #include "TMVA/DNN/Functions.h"
0040
0041 namespace TMVA {
0042 namespace DNN {
0043
0044
0045
0046
0047
0048
0049
0050 template <typename Architecture_t>
0051 class VGeneralLayer {
0052
0053 using Tensor_t = typename Architecture_t::Tensor_t;
0054 using Matrix_t = typename Architecture_t::Matrix_t;
0055 using Scalar_t = typename Architecture_t::Scalar_t;
0056
0057
0058 protected:
0059 size_t fBatchSize;
0060
0061 size_t fInputDepth;
0062 size_t fInputHeight;
0063 size_t fInputWidth;
0064
0065 size_t fDepth;
0066 size_t fHeight;
0067 size_t fWidth;
0068
0069 bool fIsTraining;
0070
0071 std::vector<Matrix_t> fWeights;
0072 std::vector<Matrix_t> fBiases;
0073
0074 std::vector<Matrix_t> fWeightGradients;
0075 std::vector<Matrix_t> fBiasGradients;
0076
0077 Tensor_t fOutput;
0078 Tensor_t fActivationGradients;
0079
0080 EInitialization fInit;
0081
0082 public:
0083
0084 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
0085 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
0086 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
0087 size_t OutputNCols, EInitialization Init);
0088
0089
0090 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
0091 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
0092 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
0093 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
0094 EInitialization Init);
0095
0096
0097 VGeneralLayer(VGeneralLayer<Architecture_t> *layer);
0098
0099
0100 VGeneralLayer(const VGeneralLayer &);
0101
0102
0103 virtual ~VGeneralLayer();
0104
0105
0106 virtual void Initialize();
0107
0108
0109
0110
0111 virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;
0112
0113
0114
0115 virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;
0116
0117
0118
0119
0120
0121 virtual void ResetTraining() {}
0122
0123
0124 void Update(const Scalar_t learningRate);
0125
0126
0127 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
0128
0129
0130 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
0131
0132
0133 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
0134
0135
0136 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
0137
0138
0139 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
0140
0141
0142 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
0143
0144
0145
0146
0147 template <typename Arch>
0148 void CopyParameters(const VGeneralLayer<Arch> &layer);
0149
0150
0151 virtual void Print() const = 0;
0152
0153
0154 virtual void AddWeightsXMLTo(void *parent) = 0;
0155
0156
0157 virtual void ReadWeightsFromXML(void *parent) = 0;
0158
0159
0160 virtual void SetDropoutProbability(Scalar_t ) {}
0161
0162
0163 size_t GetBatchSize() const { return fBatchSize; }
0164 size_t GetInputDepth() const { return fInputDepth; }
0165 size_t GetInputHeight() const { return fInputHeight; }
0166 size_t GetInputWidth() const { return fInputWidth; }
0167 size_t GetDepth() const { return fDepth; }
0168 size_t GetHeight() const { return fHeight; }
0169 size_t GetWidth() const { return fWidth; }
0170 bool IsTraining() const { return fIsTraining; }
0171
0172 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
0173 std::vector<Matrix_t> &GetWeights() { return fWeights; }
0174
0175 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
0176 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
0177
0178 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
0179 std::vector<Matrix_t> &GetBiases() { return fBiases; }
0180
0181 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
0182 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
0183
0184 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
0185 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
0186
0187 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
0188 Matrix_t &GetWeightGradientsAt(size_t i) { return fWeightGradients[i]; }
0189
0190 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
0191 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
0192
0193 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
0194 Matrix_t &GetBiasGradientsAt(size_t i) { return fBiasGradients[i]; }
0195
0196 const Tensor_t &GetOutput() const { return fOutput; }
0197 Tensor_t &GetOutput() { return fOutput; }
0198
0199 const Tensor_t &GetActivationGradients() const { return fActivationGradients; }
0200 Tensor_t &GetActivationGradients() { return fActivationGradients; }
0201
0202 Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }
0203 const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }
0204
0205 Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }
0206 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }
0207
0208
0209
0210 virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }
0211
0212 virtual void SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}
0213
0214 EInitialization GetInitialization() const { return fInit; }
0215
0216
0217 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
0218 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
0219 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
0220 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
0221 void SetDepth(size_t depth) { fDepth = depth; }
0222 void SetHeight(size_t height) { fHeight = height; }
0223 void SetWidth(size_t width) { fWidth = width; }
0224 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
0225
0226
0227 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
0228 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
0229
0230 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
0231
0232 };
0233
0234
0235
0236
0237
0238 template <typename Architecture_t>
0239 VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
0240 size_t depth, size_t height, size_t width, size_t weightsNSlices,
0241 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
0242 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
0243 size_t outputNRows, size_t outputNCols, EInitialization init)
0244 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
0245 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0246 fOutput( outputNSlices, outputNRows, outputNCols ),
0247 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
0248 fInit(init)
0249 {
0250
0251 for (size_t i = 0; i < weightsNSlices; i++) {
0252 fWeights.emplace_back(weightsNRows, weightsNCols);
0253 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0254 }
0255
0256 for (size_t i = 0; i < biasesNSlices; i++) {
0257 fBiases.emplace_back(biasesNRows, biasesNCols);
0258 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0259 }
0260 }
0261
0262
0263 template <typename Architecture_t>
0264 VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
0265 size_t depth, size_t height, size_t width, size_t weightsNSlices,
0266 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
0267 size_t biasesNSlices, std::vector<size_t> biasesNRows,
0268 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
0269 size_t outputNCols, EInitialization init)
0270 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
0271 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0272 fOutput( outputNSlices, outputNRows, outputNCols ),
0273 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
0274 fInit(init)
0275 {
0276
0277 for (size_t i = 0; i < weightsNSlices; i++) {
0278 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
0279 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
0280 }
0281
0282 for (size_t i = 0; i < biasesNSlices; i++) {
0283 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
0284 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
0285 }
0286
0287
0288
0289
0290
0291 }
0292
0293
0294 template <typename Architecture_t>
0295 VGeneralLayer<Architecture_t>::VGeneralLayer(VGeneralLayer<Architecture_t> *layer)
0296 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
0297 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
0298 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
0299 fBiasGradients(),
0300 fOutput( layer->GetOutput().GetShape() ),
0301 fActivationGradients( layer->GetActivationGradients().GetShape() ),
0302 fInit(layer->GetInitialization() )
0303 {
0304
0305 size_t weightsNSlices = (layer->GetWeights()).size();
0306 size_t weightsNRows = 0;
0307 size_t weightsNCols = 0;
0308
0309 for (size_t i = 0; i < weightsNSlices; i++) {
0310 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
0311 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
0312
0313 fWeights.emplace_back(weightsNRows, weightsNCols);
0314 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0315
0316 Architecture_t::Copy(fWeights[i], layer->GetWeightsAt(i));
0317 }
0318
0319 size_t biasesNSlices = (layer->GetBiases()).size();
0320 size_t biasesNRows = 0;
0321 size_t biasesNCols = 0;
0322
0323 for (size_t i = 0; i < biasesNSlices; i++) {
0324 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
0325 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
0326
0327 fBiases.emplace_back(biasesNRows, biasesNCols);
0328 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0329
0330 Architecture_t::Copy(fBiases[i], layer->GetBiasesAt(i));
0331 }
0332 }
0333
0334
0335 template <typename Architecture_t>
0336 VGeneralLayer<Architecture_t>::VGeneralLayer(const VGeneralLayer &layer)
0337 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
0338 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
0339 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0340 fOutput( layer.GetOutput() ),
0341 fActivationGradients( layer.GetActivationGradients() ),
0342 fInit( layer.GetInitialization())
0343 {
0344
0345 size_t weightsNSlices = layer.fWeights.size();
0346 size_t weightsNRows = 0;
0347 size_t weightsNCols = 0;
0348
0349 for (size_t i = 0; i < weightsNSlices; i++) {
0350 weightsNRows = (layer.fWeights[i]).GetNrows();
0351 weightsNCols = (layer.fWeights[i]).GetNcols();
0352
0353 fWeights.emplace_back(weightsNRows, weightsNCols);
0354 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0355
0356 Architecture_t::Copy(fWeights[i], layer.fWeights[i]);
0357 }
0358
0359 size_t biasesNSlices = layer.fBiases.size();
0360 size_t biasesNRows = 0;
0361 size_t biasesNCols = 0;
0362
0363 for (size_t i = 0; i < biasesNSlices; i++) {
0364 biasesNRows = (layer.fBiases[i]).GetNrows();
0365 biasesNCols = (layer.fBiases[i]).GetNcols();
0366
0367 fBiases.emplace_back(biasesNRows, biasesNCols);
0368 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0369
0370 Architecture_t::Copy(fBiases[i], layer.fBiases[i]);
0371 }
0372
0373 size_t outputNSlices = layer.fOutput.size();
0374 size_t outputNRows = 0;
0375 size_t outputNCols = 0;
0376
0377 for (size_t i = 0; i < outputNSlices; i++) {
0378 outputNRows = (layer.fOutput[i]).GetNrows();
0379 outputNCols = (layer.fOutput[i]).GetNcols();
0380
0381 fOutput.emplace_back(outputNRows, outputNCols);
0382 fActivationGradients.emplace_back(outputNRows, outputNCols);
0383 }
0384 }
0385
0386
0387 template <typename Architecture_t>
0388 VGeneralLayer<Architecture_t>::~VGeneralLayer()
0389 {
0390
0391 }
0392
0393
0394 template <typename Architecture_t>
0395 auto VGeneralLayer<Architecture_t>::Initialize() -> void
0396 {
0397 for (size_t i = 0; i < fWeights.size(); i++) {
0398 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
0399 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
0400 }
0401
0402 for (size_t i = 0; i < fBiases.size(); i++) {
0403 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
0404 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
0405 }
0406 }
0407
0408
0409 template <typename Architecture_t>
0410 auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
0411 {
0412 this->UpdateWeights(fWeightGradients, learningRate);
0413 this->UpdateBiases(fBiasGradients, learningRate);
0414 }
0415
0416
0417 template <typename Architecture_t>
0418 auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
0419 const Scalar_t learningRate) -> void
0420 {
0421 for (size_t i = 0; i < fWeights.size(); i++) {
0422 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
0423 }
0424 }
0425
0426
0427 template <typename Architecture_t>
0428 auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
0429 const Scalar_t learningRate) -> void
0430 {
0431 for (size_t i = 0; i < fBiases.size(); i++) {
0432 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
0433 }
0434 }
0435
0436
0437 template <typename Architecture_t>
0438 auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
0439 const Scalar_t learningRate) -> void
0440 {
0441 for (size_t i = 0; i < fWeightGradients.size(); i++) {
0442 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
0443 }
0444 }
0445
0446
0447 template <typename Architecture_t>
0448 auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
0449 const Scalar_t learningRate) -> void
0450 {
0451 for (size_t i = 0; i < fBiasGradients.size(); i++) {
0452 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
0453 }
0454 }
0455
0456
0457 template <typename Architecture_t>
0458 auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
0459 {
0460
0461 for (size_t i = 0; i < fWeights.size(); i++) {
0462 Architecture_t::Copy(fWeights[i], otherWeights[i]);
0463 }
0464 }
0465
0466
0467 template <typename Architecture_t>
0468 auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
0469 {
0470 for (size_t i = 0; i < fBiases.size(); i++) {
0471 Architecture_t::Copy(fBiases[i], otherBiases[i]);
0472 }
0473 }
0474
0475
0476 template <typename Architecture_t>
0477 template <typename Arch>
0478 void VGeneralLayer<Architecture_t>::CopyParameters(const VGeneralLayer<Arch> &layer)
0479 {
0480
0481
0482 Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());
0483 Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());
0484
0485
0486 auto params = layer.GetExtraLayerParameters();
0487 if (params.size() > 0) {
0488 auto paramsToCopy = GetExtraLayerParameters();
0489 Architecture_t::CopyDiffArch(paramsToCopy, params );
0490 SetExtraLayerParameters(paramsToCopy);
0491 }
0492 }
0493
0494
0495 template <typename Architecture_t>
0496 auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
0497 {
0498 auto xmlengine = gTools().xmlengine();
0499 void* matnode = xmlengine.NewChild(node, 0, name);
0500 if (tensor.size() == 0) return;
0501 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
0502
0503 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
0504 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
0505 std::stringstream s;
0506 for (size_t i = 0; i < tensor.size(); ++i) {
0507 auto & mat = tensor[i];
0508 for (Int_t row = 0; row < mat.GetNrows(); row++) {
0509 for (Int_t col = 0; col < mat.GetNcols(); col++) {
0510
0511
0512 s << std::scientific << mat(row, col) << " ";
0513 }
0514 }
0515 }
0516 xmlengine.AddRawLine( matnode, s.str().c_str() );
0517 }
0518
0519
0520 template <typename Architecture_t>
0521 auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
0522 {
0523 auto xmlengine = gTools().xmlengine();
0524 void* matnode = xmlengine.NewChild(node, nullptr, name);
0525
0526 xmlengine.NewAttr(matnode,nullptr,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
0527 xmlengine.NewAttr(matnode,nullptr,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
0528 std::stringstream s;
0529 s.precision( std::numeric_limits<Scalar_t>::digits10 );
0530 size_t nrows = matrix.GetNrows();
0531 size_t ncols = matrix.GetNcols();
0532 for (size_t row = 0; row < nrows; row++) {
0533 for (size_t col = 0; col < ncols; col++) {
0534
0535 s << std::scientific << matrix(row,col) << " ";
0536 }
0537 }
0538
0539 xmlengine.AddRawLine( matnode, s.str().c_str() );
0540 }
0541
0542
0543 template <typename Architecture_t>
0544 auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
0545 {
0546 void *matrixXML = gTools().GetChild(node, name);
0547 size_t rows, cols;
0548 gTools().ReadAttr(matrixXML, "Rows", rows);
0549 gTools().ReadAttr(matrixXML, "Columns", cols);
0550
0551 R__ASSERT((size_t) matrix.GetNrows() == rows);
0552 R__ASSERT((size_t) matrix.GetNcols() == cols);
0553
0554 TMatrixT<Scalar_t> tmatrix(rows, cols);
0555
0556 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
0557 std::stringstream matrixStringStream(matrixString);
0558
0559 for (size_t i = 0; i < rows; i++)
0560 {
0561 for (size_t j = 0; j < cols; j++)
0562 {
0563 #ifndef R__HAS_TMVAGPU
0564 matrixStringStream >> tmatrix(i,j);
0565 #else
0566 Scalar_t value;
0567 matrixStringStream >> value;
0568 tmatrix(i,j) = value;
0569 #endif
0570
0571 }
0572 }
0573
0574
0575 Matrix_t tmp( tmatrix);
0576 Architecture_t::Copy(matrix, tmp);
0577
0578 }
0579
0580
0581 template <typename Architecture>
0582 auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void
0583 {
0584 Architecture::PrintTensor(A,name);
0585 }
0586
0587 }
0588 }
0589
0590 #endif