Warning, file /include/root/TMVA/DNN/GeneralLayer.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_GENERALLAYER
0028 #define TMVA_DNN_GENERALLAYER
0029
0030 #include <sstream>
0031 #include <limits>
0032 #include <vector>
0033 #include <string>
0034
0035
0036 #include "TMVA/Tools.h"
0037 #include "TError.h" // for R__ASSERT
0038
0039 #include "TMVA/DNN/Functions.h"
0040
0041 namespace TMVA {
0042 namespace DNN {
0043
0044
0045
0046
0047
0048
0049
0050 template <typename Architecture_t>
0051 class VGeneralLayer {
0052
0053 using Tensor_t = typename Architecture_t::Tensor_t;
0054 using Matrix_t = typename Architecture_t::Matrix_t;
0055 using Scalar_t = typename Architecture_t::Scalar_t;
0056
0057
0058 protected:
0059 size_t fBatchSize;
0060
0061 size_t fInputDepth;
0062 size_t fInputHeight;
0063 size_t fInputWidth;
0064
0065 size_t fDepth;
0066 size_t fHeight;
0067 size_t fWidth;
0068
0069 bool fIsTraining;
0070
0071 std::vector<Matrix_t> fWeights;
0072 std::vector<Matrix_t> fBiases;
0073
0074 std::vector<Matrix_t> fWeightGradients;
0075 std::vector<Matrix_t> fBiasGradients;
0076
0077 Tensor_t fOutput;
0078 Tensor_t fActivationGradients;
0079
0080 EInitialization fInit;
0081
0082 public:
0083
0084 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
0085 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
0086 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
0087 size_t OutputNCols, EInitialization Init);
0088
0089
0090 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
0091 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
0092 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
0093 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
0094 EInitialization Init);
0095
0096
0097 VGeneralLayer(VGeneralLayer<Architecture_t> *layer);
0098
0099
0100 VGeneralLayer(const VGeneralLayer &);
0101
0102
0103 virtual ~VGeneralLayer();
0104
0105
0106 virtual void Initialize();
0107
0108
0109
0110
0111 virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;
0112
0113
0114
0115 virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;
0116
0117
0118
0119
0120
0121 virtual void ResetTraining() {}
0122
0123
0124 void Update(const Scalar_t learningRate);
0125
0126
0127 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
0128
0129
0130 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
0131
0132
0133 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
0134
0135
0136 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
0137
0138
0139 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
0140
0141
0142 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
0143
0144
0145
0146
0147 template <typename Arch>
0148 void CopyParameters(const VGeneralLayer<Arch> &layer);
0149
0150
0151 virtual void Print() const = 0;
0152
0153
0154 virtual void AddWeightsXMLTo(void *parent) = 0;
0155
0156
0157 virtual void ReadWeightsFromXML(void *parent) = 0;
0158
0159
0160 virtual void SetDropoutProbability(Scalar_t ) {}
0161
0162
0163 size_t GetBatchSize() const { return fBatchSize; }
0164 size_t GetInputDepth() const { return fInputDepth; }
0165 size_t GetInputHeight() const { return fInputHeight; }
0166 size_t GetInputWidth() const { return fInputWidth; }
0167 size_t GetDepth() const { return fDepth; }
0168 size_t GetHeight() const { return fHeight; }
0169 size_t GetWidth() const { return fWidth; }
0170 bool IsTraining() const { return fIsTraining; }
0171
0172 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
0173 std::vector<Matrix_t> &GetWeights() { return fWeights; }
0174
0175 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
0176 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
0177
0178 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
0179 std::vector<Matrix_t> &GetBiases() { return fBiases; }
0180
0181 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
0182 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
0183
0184 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
0185 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
0186
0187 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
0188 Matrix_t &GetWeightGradientsAt(size_t i) { return fWeightGradients[i]; }
0189
0190 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
0191 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
0192
0193 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
0194 Matrix_t &GetBiasGradientsAt(size_t i) { return fBiasGradients[i]; }
0195
0196 const Tensor_t &GetOutput() const { return fOutput; }
0197 Tensor_t &GetOutput() { return fOutput; }
0198
0199 const Tensor_t &GetActivationGradients() const { return fActivationGradients; }
0200 Tensor_t &GetActivationGradients() { return fActivationGradients; }
0201
0202 Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }
0203 const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }
0204
0205 Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }
0206 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }
0207
0208
0209
0210 virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }
0211
0212 virtual void SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}
0213
0214 EInitialization GetInitialization() const { return fInit; }
0215
0216
0217 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
0218 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
0219 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
0220 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
0221 void SetDepth(size_t depth) { fDepth = depth; }
0222 void SetHeight(size_t height) { fHeight = height; }
0223 void SetWidth(size_t width) { fWidth = width; }
0224 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
0225
0226
0227 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
0228 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
0229
0230 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
0231
0232 };
0233
0234
0235
0236
0237
0238 template <typename Architecture_t>
0239 VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
0240 size_t depth, size_t height, size_t width, size_t weightsNSlices,
0241 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
0242 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
0243 size_t outputNRows, size_t outputNCols, EInitialization init)
0244 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
0245 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0246 fOutput( outputNSlices, outputNRows, outputNCols ),
0247 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
0248 fInit(init)
0249 {
0250
0251 for (size_t i = 0; i < weightsNSlices; i++) {
0252 fWeights.emplace_back(weightsNRows, weightsNCols);
0253 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0254 }
0255
0256 for (size_t i = 0; i < biasesNSlices; i++) {
0257 fBiases.emplace_back(biasesNRows, biasesNCols);
0258 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0259 }
0260 }
0261
0262
0263 template <typename Architecture_t>
0264 VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
0265 size_t depth, size_t height, size_t width, size_t weightsNSlices,
0266 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
0267 size_t biasesNSlices, std::vector<size_t> biasesNRows,
0268 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
0269 size_t outputNCols, EInitialization init)
0270 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
0271 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0272 fOutput( outputNSlices, outputNRows, outputNCols ),
0273 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
0274 fInit(init)
0275 {
0276
0277 for (size_t i = 0; i < weightsNSlices; i++) {
0278 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
0279 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
0280 }
0281
0282 for (size_t i = 0; i < biasesNSlices; i++) {
0283 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
0284 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
0285 }
0286
0287
0288
0289
0290
0291 }
0292
0293
0294 template <typename Architecture_t>
0295 VGeneralLayer<Architecture_t>::VGeneralLayer(VGeneralLayer<Architecture_t> *layer)
0296 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
0297 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
0298 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
0299 fBiasGradients(),
0300 fOutput( layer->GetOutput().GetShape() ),
0301 fActivationGradients( layer->GetActivationGradients().GetShape() ),
0302 fInit(layer->GetInitialization() )
0303 {
0304
0305 size_t weightsNSlices = (layer->GetWeights()).size();
0306 size_t weightsNRows = 0;
0307 size_t weightsNCols = 0;
0308
0309 for (size_t i = 0; i < weightsNSlices; i++) {
0310 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
0311 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
0312
0313 fWeights.emplace_back(weightsNRows, weightsNCols);
0314 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0315
0316 Architecture_t::Copy(fWeights[i], layer->GetWeightsAt(i));
0317 }
0318
0319 size_t biasesNSlices = (layer->GetBiases()).size();
0320 size_t biasesNRows = 0;
0321 size_t biasesNCols = 0;
0322
0323 for (size_t i = 0; i < biasesNSlices; i++) {
0324 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
0325 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
0326
0327 fBiases.emplace_back(biasesNRows, biasesNCols);
0328 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0329
0330 Architecture_t::Copy(fBiases[i], layer->GetBiasesAt(i));
0331 }
0332 }
0333
0334
0335 template <typename Architecture_t>
0336 VGeneralLayer<Architecture_t>::VGeneralLayer(const VGeneralLayer &layer)
0337 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
0338 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
0339 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
0340 fOutput( layer.GetOutput() ),
0341 fActivationGradients( layer.GetActivationGradients() ),
0342 fInit( layer.GetInitialization())
0343 {
0344
0345 size_t weightsNSlices = layer.fWeights.size();
0346 size_t weightsNRows = 0;
0347 size_t weightsNCols = 0;
0348
0349 for (size_t i = 0; i < weightsNSlices; i++) {
0350 weightsNRows = (layer.fWeights[i]).GetNrows();
0351 weightsNCols = (layer.fWeights[i]).GetNcols();
0352
0353 fWeights.emplace_back(weightsNRows, weightsNCols);
0354 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
0355
0356 Architecture_t::Copy(fWeights[i], layer.fWeights[i]);
0357 }
0358
0359 size_t biasesNSlices = layer.fBiases.size();
0360 size_t biasesNRows = 0;
0361 size_t biasesNCols = 0;
0362
0363 for (size_t i = 0; i < biasesNSlices; i++) {
0364 biasesNRows = (layer.fBiases[i]).GetNrows();
0365 biasesNCols = (layer.fBiases[i]).GetNcols();
0366
0367 fBiases.emplace_back(biasesNRows, biasesNCols);
0368 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
0369
0370 Architecture_t::Copy(fBiases[i], layer.fBiases[i]);
0371 }
0372
0373 size_t outputNSlices = layer.fOutput.size();
0374 size_t outputNRows = 0;
0375 size_t outputNCols = 0;
0376
0377 for (size_t i = 0; i < outputNSlices; i++) {
0378 outputNRows = (layer.fOutput[i]).GetNrows();
0379 outputNCols = (layer.fOutput[i]).GetNcols();
0380
0381 fOutput.emplace_back(outputNRows, outputNCols);
0382 fActivationGradients.emplace_back(outputNRows, outputNCols);
0383 }
0384 }
0385
0386
0387 template <typename Architecture_t>
0388 VGeneralLayer<Architecture_t>::~VGeneralLayer()
0389 {
0390
0391 }
0392
0393
0394 template <typename Architecture_t>
0395 auto VGeneralLayer<Architecture_t>::Initialize() -> void
0396 {
0397 for (size_t i = 0; i < fWeights.size(); i++) {
0398 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
0399 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
0400 }
0401
0402 for (size_t i = 0; i < fBiases.size(); i++) {
0403 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
0404 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
0405 }
0406 }
0407
0408
0409 template <typename Architecture_t>
0410 auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
0411 {
0412 this->UpdateWeights(fWeightGradients, learningRate);
0413 this->UpdateBiases(fBiasGradients, learningRate);
0414 }
0415
0416
0417 template <typename Architecture_t>
0418 auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
0419 const Scalar_t learningRate) -> void
0420 {
0421 for (size_t i = 0; i < fWeights.size(); i++) {
0422 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
0423 }
0424 }
0425
0426
0427 template <typename Architecture_t>
0428 auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
0429 const Scalar_t learningRate) -> void
0430 {
0431 for (size_t i = 0; i < fBiases.size(); i++) {
0432 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
0433 }
0434 }
0435
0436
0437 template <typename Architecture_t>
0438 auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
0439 const Scalar_t learningRate) -> void
0440 {
0441 for (size_t i = 0; i < fWeightGradients.size(); i++) {
0442 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
0443 }
0444 }
0445
0446
0447 template <typename Architecture_t>
0448 auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
0449 const Scalar_t learningRate) -> void
0450 {
0451 for (size_t i = 0; i < fBiasGradients.size(); i++) {
0452 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
0453 }
0454 }
0455
0456
0457 template <typename Architecture_t>
0458 auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
0459 {
0460
0461 for (size_t i = 0; i < fWeights.size(); i++) {
0462 Architecture_t::Copy(fWeights[i], otherWeights[i]);
0463 }
0464 }
0465
0466
0467 template <typename Architecture_t>
0468 auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
0469 {
0470 for (size_t i = 0; i < fBiases.size(); i++) {
0471 Architecture_t::Copy(fBiases[i], otherBiases[i]);
0472 }
0473 }
0474
0475
0476 template <typename Architecture_t>
0477 template <typename Arch>
0478 void VGeneralLayer<Architecture_t>::CopyParameters(const VGeneralLayer<Arch> &layer)
0479 {
0480
0481
0482 Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());
0483 Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());
0484
0485
0486 auto params = layer.GetExtraLayerParameters();
0487 if (params.size() > 0) {
0488 auto paramsToCopy = GetExtraLayerParameters();
0489 Architecture_t::CopyDiffArch(paramsToCopy, params );
0490 SetExtraLayerParameters(paramsToCopy);
0491 }
0492 }
0493
0494
0495 template <typename Architecture_t>
0496 auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
0497 {
0498 auto xmlengine = gTools().xmlengine();
0499 void* matnode = xmlengine.NewChild(node, 0, name);
0500 if (tensor.size() == 0) return;
0501 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
0502
0503 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
0504 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
0505 std::stringstream s;
0506 for (size_t i = 0; i < tensor.size(); ++i) {
0507 auto & mat = tensor[i];
0508 for (Int_t row = 0; row < mat.GetNrows(); row++) {
0509 for (Int_t col = 0; col < mat.GetNcols(); col++) {
0510
0511
0512 s << std::scientific << mat(row, col) << " ";
0513 }
0514 }
0515 }
0516 xmlengine.AddRawLine( matnode, s.str().c_str() );
0517 }
0518
0519
0520 template <typename Architecture_t>
0521 auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
0522 {
0523 auto xmlengine = gTools().xmlengine();
0524 void* matnode = xmlengine.NewChild(node, nullptr, name);
0525
0526 xmlengine.NewAttr(matnode,nullptr,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
0527 xmlengine.NewAttr(matnode,nullptr,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
0528 std::stringstream s;
0529 s.precision(std::numeric_limits<Scalar_t>::max_digits10);
0530 size_t nrows = matrix.GetNrows();
0531 size_t ncols = matrix.GetNcols();
0532 for (size_t row = 0; row < nrows; row++) {
0533 for (size_t col = 0; col < ncols; col++) {
0534
0535 s << std::scientific << matrix(row,col) << " ";
0536 }
0537 }
0538
0539 xmlengine.AddRawLine( matnode, s.str().c_str() );
0540 }
0541
0542
0543 template <typename Architecture_t>
0544 auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
0545 {
0546 void *matrixXML = gTools().GetChild(node, name);
0547 size_t rows, cols;
0548 gTools().ReadAttr(matrixXML, "Rows", rows);
0549 gTools().ReadAttr(matrixXML, "Columns", cols);
0550
0551 R__ASSERT((size_t) matrix.GetNrows() == rows);
0552 R__ASSERT((size_t) matrix.GetNcols() == cols);
0553
0554 TMatrixT<Scalar_t> tmatrix(rows, cols);
0555
0556 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
0557 std::stringstream matrixStringStream(matrixString);
0558
0559 for (size_t i = 0; i < rows; i++)
0560 {
0561 for (size_t j = 0; j < cols; j++)
0562 {
0563 #ifndef R__HAS_TMVAGPU
0564 matrixStringStream >> tmatrix(i,j);
0565 #else
0566 Scalar_t value;
0567 matrixStringStream >> value;
0568 tmatrix(i,j) = value;
0569 #endif
0570
0571 }
0572 }
0573
0574
0575 Matrix_t tmp( tmatrix);
0576 Architecture_t::Copy(matrix, tmp);
0577
0578 }
0579
0580
0581 template <typename Architecture>
0582 auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void
0583 {
0584 Architecture::PrintTensor(A,name);
0585 }
0586
0587 }
0588 }
0589
0590 #endif