File indexing completed on 2025-12-16 10:30:08
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #ifndef TMVA_DNN_ARCHITECTURES_CPU
0019 #define TMVA_DNN_ARCHITECTURES_CPU
0020
0021 #include "TMVA/DNN/Functions.h"
0022 #include "TMVA/DNN/CNN/ContextHandles.h"
0023
0024 #include "TMVA/DNN/GeneralLayer.h"
0025 #include "TMVA/DNN/BatchNormLayer.h"
0026 #include "TMVA/DNN/CNN/ConvLayer.h"
0027 #include "TMVA/DNN/CNN/MaxPoolLayer.h"
0028 #include "TMVA/DNN/RNN/RNNLayer.h"
0029
0030 #include "TMVA/DNN/Architectures/Cpu/CpuBuffer.h"
0031 #include "TMVA/DNN/Architectures/Cpu/CpuMatrix.h"
0032 #include "TMVA/DNN/Architectures/Cpu/CpuTensor.h"
0033
0034 #include <vector>
0035 #include <string>
0036
0037 class TRandom;
0038
0039 namespace TMVA
0040 {
0041 namespace DNN
0042 {
0043
0044 struct DummyDescriptor {};
0045 struct DummyFilterDescriptor {};
0046 struct DummyConvolutionDescriptor {};
0047 struct DummyDropoutDescriptor {};
0048 struct DummyPoolingDescriptor {};
0049 struct DummyConvolutionFwdAlgo {};
0050 struct DummyConvolutionBwdDataAlgo {};
0051 struct DummyConvolutionBwdFilterAlgo {};
0052 struct DummyDataType {};
0053
0054 struct DummyEmptyDescriptor {};
0055
0056
0057
0058
0059
0060
0061
0062
0063 template<typename AReal = Float_t>
0064 class TCpu
0065 {
0066 private:
0067 static TRandom * fgRandomGen;
0068 public:
0069 using Scalar_t = AReal;
0070 using Tensor_t = TCpuTensor<AReal>;
0071 using Matrix_t = TCpuMatrix<AReal>;
0072 using HostBuffer_t = TCpuBuffer<AReal>;
0073 using DeviceBuffer_t = TCpuBuffer<AReal>;
0074
0075 using ActivationDescriptor_t = DummyDescriptor;
0076 using ConvolutionDescriptor_t = DummyDescriptor;
0077 using FilterDescriptor_t = DummyDescriptor;
0078 using DropoutDescriptor_t = DummyDescriptor;
0079 using PoolingDescriptor_t = DummyDescriptor;
0080 using TensorDescriptor_t = DummyDescriptor;
0081
0082 using AlgorithmForward_t = DummyConvolutionFwdAlgo;
0083 using AlgorithmBackward_t = DummyConvolutionBwdDataAlgo;
0084 using AlgorithmHelper_t = DummyConvolutionBwdFilterAlgo;
0085 using AlgorithmDataType_t = DummyDataType;
0086 using ReduceTensorDescriptor_t = DummyDataType;
0087 using RecurrentDescriptor_t = DummyDataType;
0088
0089 using EmptyDescriptor_t = DummyDescriptor;
0090
0091 using GenLayer_t = VGeneralLayer<TCpu<AReal>>;
0092 using BNormLayer_t = TBatchNormLayer<TCpu<AReal>>;
0093 using BNormDescriptors_t = TDNNGenDescriptors<BNormLayer_t>;
0094
0095 using ConvLayer_t = CNN::TConvLayer<TCpu<AReal>>;
0096 using ConvDescriptors_t = CNN::TCNNDescriptors<ConvLayer_t>;
0097 using ConvWorkspace_t = CNN::TCNNWorkspace<ConvLayer_t>;
0098 using PoolingLayer_t = CNN::TMaxPoolLayer<TCpu<AReal>>;
0099 using PoolingDescriptors_t = CNN::TCNNDescriptors<PoolingLayer_t>;
0100 using PoolingWorkspace_t = CNN::TCNNWorkspace<PoolingLayer_t>;
0101
0102 using RNNDescriptors_t = RNN::TRNNDescriptors<TCpu<AReal>>;
0103 using RNNWorkspace_t = RNN::TRNNWorkspace<TCpu<AReal>>;
0104
0105
0106 static TMVA::Experimental::MemoryLayout GetTensorLayout() { return TMVA::Experimental::MemoryLayout::ColumnMajor; }
0107
0108 static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
0109 return Tensor_t( {c,h*w,n}, GetTensorLayout());
0110 }
0111 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
0112 return Tensor_t( buffer, {c,h*w,n}, GetTensorLayout());
0113 }
0114 static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
0115 {
0116 return Tensor_t({t, w, b}, GetTensorLayout());
0117 }
0118 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
0119 {
0120 return Tensor_t(buffer, {t, w, b}, GetTensorLayout());
0121 }
0122
0123
0124 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
0125 if (!newWeights.empty()) newWeights.clear();
0126 size_t n = weights.size();
0127 for (size_t i = 0; i < n; ++i)
0128 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
0129 }
0130
0131 static bool IsCudnn() { return false; }
0132
0133
0134
0135
0136
0137
0138
0139 static void InitializeBNormDescriptors(TDescriptors * & ,
0140 BNormLayer_t * ) {}
0141
0142 static void InitializeConvDescriptors(TDescriptors * & ,
0143 ConvLayer_t * ) {}
0144 static void InitializePoolDescriptors(TDescriptors * & ,
0145 PoolingLayer_t * ) {}
0146 static void InitializeRNNDescriptors(TDescriptors *& , GenLayer_t * ) {}
0147 static void InitializeLSTMDescriptors(TDescriptors *& , GenLayer_t * ) {}
0148 static void InitializeGRUDescriptors(TDescriptors *& , GenLayer_t * ) {}
0149
0150 static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction , double = 0.0) {}
0151
0152
0153 static void ReleaseConvDescriptors(TDescriptors * & ) {}
0154 static void ReleasePoolDescriptors(TDescriptors * & ) {}
0155 static void ReleaseBNormDescriptors(TDescriptors * & ) {}
0156 static void ReleaseRNNDescriptors(TDescriptors *& ) {}
0157
0158 static void InitializeConvWorkspace(TWorkspace * & ,
0159 TDescriptors * & ,
0160 const DNN::CNN::TConvParams & ,
0161 ConvLayer_t * ) {}
0162 static void InitializePoolDropoutWorkspace(TWorkspace * & ,
0163 TDescriptors * & ,
0164 const DNN::CNN::TConvParams & ,
0165 PoolingLayer_t * ) {}
0166 static void InitializeRNNWorkspace(TWorkspace *& , TDescriptors *& , GenLayer_t * ) {}
0167 static void InitializeLSTMWorkspace(TWorkspace *& , TDescriptors *& , GenLayer_t * ){}
0168 static void InitializeGRUWorkspace(TWorkspace *& , TDescriptors *& , GenLayer_t * ){}
0169
0170 static void FreeConvWorkspace(TWorkspace * & ) {}
0171 static void FreePoolDropoutWorkspace(TWorkspace * & ) {}
0172 static void FreeRNNWorkspace(TWorkspace *& ) {}
0173
0174 static void ReleaseDescriptor(ActivationDescriptor_t & ) {}
0175
0176 static void InitializeRNNTensors(GenLayer_t * ) {}
0177 static void InitializeLSTMTensors(GenLayer_t * ) {}
0178 static void InitializeGRUTensors(GenLayer_t * ) {}
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192 static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights);
0193
0194 static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) {
0195 Matrix_t output_matrix = output.GetMatrix();
0196 MultiplyTranspose( output_matrix, input.GetMatrix(), weights);
0197
0198 }
0199
0200
0201 static void AddRowWise(Matrix_t &output,const Matrix_t &biases);
0202
0203 static void AddRowWise(Tensor_t &output, const Matrix_t &biases) {
0204 Matrix_t output_matrix = output.GetMatrix();
0205 AddRowWise(output_matrix, biases);
0206
0207 }
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222 static void Backward(Tensor_t & activationGradientsBackward,
0223 Matrix_t & weightGradients,
0224 Matrix_t & biasGradients,
0225 const Tensor_t & df,
0226 const Tensor_t & activationGradients,
0227 const Matrix_t & weights,
0228 const Tensor_t & activationBackward);
0229
0230
0231
0232
0233
0234 static void ScaleAdd(Matrix_t & A,
0235 const Matrix_t & B,
0236 Scalar_t beta = 1.0);
0237
0238 static void Copy(Matrix_t & B,
0239 const Matrix_t & A);
0240
0241
0242 template<typename AMatrix_t>
0243 static void CopyDiffArch(Matrix_t & B, const AMatrix_t & A);
0244
0245
0246
0247 static void ScaleAdd(Tensor_t & A,
0248 const Tensor_t & B,
0249 Scalar_t beta = 1.0);
0250
0251 static void Copy(Tensor_t & A,
0252 const Tensor_t & B);
0253
0254
0255 template<typename ATensor_t>
0256 static void CopyDiffArch(Tensor_t & A,
0257 const ATensor_t & B);
0258
0259
0260 template<typename AMatrix_t>
0261 static void CopyDiffArch(std::vector<Matrix_t> & A,
0262 const std::vector<AMatrix_t> & B);
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285 static void ActivationFunctionForward(Tensor_t & X, EActivationFunction activFunct,
0286 const ActivationDescriptor_t activationDescr,
0287 const double coef = 0.0, const Scalar_t alpha = 1,
0288 const Scalar_t beta = 0);
0289
0290
0291 static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
0292 const Tensor_t & dY, const Tensor_t & X,
0293 EActivationFunction activFunct,
0294 const ActivationDescriptor_t activationDescr,
0295 const Scalar_t alpha = 1,
0296 const Scalar_t beta = 0);
0297
0298 static void IdentityDerivative(Tensor_t & B,
0299 const Tensor_t &A);
0300
0301 static void Relu(Tensor_t & B);
0302 static void ReluDerivative(Tensor_t & B,
0303 const Tensor_t & A);
0304
0305 static void Sigmoid(Tensor_t & B);
0306 static void SigmoidDerivative(Tensor_t & B,
0307 const Tensor_t & A);
0308
0309 static void Tanh(Tensor_t & B);
0310 static void TanhDerivative(Tensor_t & B,
0311 const Tensor_t & A);
0312
0313
0314 static void FastTanh(Tensor_t &B);
0315 static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A);
0316
0317 static void SymmetricRelu(Tensor_t & B);
0318 static void SymmetricReluDerivative(Tensor_t & B,
0319 const Tensor_t & A);
0320
0321 static void SoftSign(Tensor_t & B);
0322 static void SoftSignDerivative(Tensor_t & B,
0323 const Tensor_t & A);
0324
0325 static void Gauss(Tensor_t & B);
0326 static void GaussDerivative(Tensor_t & B,
0327 const Tensor_t & A);
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344 static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output,
0345 const Matrix_t &weights);
0346 static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
0347 const Matrix_t &output, const Matrix_t &weights);
0348
0349
0350
0351 static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output,
0352 const Matrix_t &weights);
0353
0354 static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
0355 const Matrix_t &output, const Matrix_t &weights);
0356
0357
0358
0359 static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output,
0360 const Matrix_t &weights);
0361 static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
0362 const Matrix_t &output, const Matrix_t &weights);
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378 static void Sigmoid(Matrix_t &YHat,
0379 const Matrix_t & );
0380 static void Softmax(Matrix_t &YHat,
0381 const Matrix_t & );
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398 static Scalar_t L1Regularization(const Matrix_t & W);
0399 static void AddL1RegularizationGradients(Matrix_t & A,
0400 const Matrix_t & W,
0401 Scalar_t weightDecay);
0402
0403 static Scalar_t L2Regularization(const Matrix_t & W);
0404 static void AddL2RegularizationGradients(Matrix_t & A,
0405 const Matrix_t & W,
0406 Scalar_t weightDecay);
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421 static void InitializeGauss(Matrix_t & A);
0422 static void InitializeUniform(Matrix_t & A);
0423 static void InitializeIdentity(Matrix_t & A);
0424 static void InitializeZero(Matrix_t & A);
0425 static void InitializeZero(Tensor_t &A);
0426 static void InitializeGlorotNormal(Matrix_t & A);
0427 static void InitializeGlorotUniform(Matrix_t & A);
0428
0429
0430
0431 static TRandom & GetRandomGenerator();
0432
0433
0434 static void SetRandomSeed(size_t seed);
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448 static void DropoutForward(Tensor_t & A,
0449 TDescriptors * descriptors,
0450 TWorkspace * workspace,
0451 Scalar_t p);
0452
0453 static void DropoutForward(Matrix_t & A, Scalar_t p) {
0454 Tensor_t tA(A);
0455 DropoutForward( tA, static_cast<TDescriptors *> (nullptr), static_cast<TWorkspace *> (nullptr), p );
0456 }
0457
0458
0459 static void DropoutBackward(Tensor_t & ,
0460 TDescriptors * ,
0461 TWorkspace * ) {}
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477 static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
0478 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
0479 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
0480 Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
0481
0482
0483
0484
0485 static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta,
0486 Tensor_t &y, const Matrix_t &runningMeans,
0487 const Matrix_t &runningVars, Scalar_t epsilon,
0488 const TensorDescriptor_t &);
0489
0490
0491
0492 static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
0493 Matrix_t &gamma,
0494 Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
0495 const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
0496
0497
0498 static Tensor_t BatchNormLayerReshapeTensor(int axis, const Tensor_t &x);
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513 static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
0514
0515
0516
0517 static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight,
0518 size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
0519 size_t zeroPaddingWidth);
0520
0521 static void Im2colIndices(std::vector<int> &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight,
0522 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
0523 size_t zeroPaddingHeight, size_t zeroPaddingWidth);
0524 static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector<int> &V);
0525
0526
0527
0528 static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight,
0529 size_t filterWidth, size_t numFilters);
0530
0531
0532 static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
0533
0534
0535
0536 static void PrepareInternals(Tensor_t &) {}
0537
0538
0539 static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input,
0540 const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms,
0541 EActivationFunction activFunc, Tensor_t & ,
0542 const ConvDescriptors_t & ,
0543 ConvWorkspace_t & );
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558 static void
0559 ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients,
0560 Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights,
0561 const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc,
0562 const ConvDescriptors_t & , ConvWorkspace_t & , size_t batchSize,
0563 size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,
0564 size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews);
0565
0566
0567
0568 static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df,
0569 const Matrix_t &weights, size_t batchSize, size_t inputHeight,
0570 size_t inputWidth, size_t depth, size_t height, size_t width,
0571 size_t filterDepth, size_t filterHeight, size_t filterWidth);
0572
0573
0574
0575 static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df,
0576 const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight,
0577 size_t inputWidth, size_t depth, size_t height, size_t width,
0578 size_t filterDepth, size_t filterHeight, size_t filterWidth,
0579 size_t nLocalViews);
0580
0581
0582
0583 static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth,
0584 size_t nLocalViews);
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598 static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t & ,
0599 PoolingWorkspace_t & , size_t imgHeight, size_t imgWidth, size_t fltHeight,
0600 size_t fltWidth, size_t strideRows, size_t strideCols);
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients,
0611 const Tensor_t &indexMatrix, const Tensor_t & ,
0612 const Tensor_t & , const PoolingDescriptors_t & ,
0613 PoolingWorkspace_t & , size_t imgHeight, size_t imgWidth,
0614 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
0615 size_t nLocalViews);
0616
0617
0618
0619
0620 static Matrix_t &RecurrentLayerBackward(Matrix_t &state_gradients_backward,
0621 Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients,
0622 Matrix_t &bias_gradients,
0623 Matrix_t &df,
0624 const Matrix_t &state,
0625 const Matrix_t &weights_input,
0626 const Matrix_t &weights_state,
0627 const Matrix_t &input,
0628 Matrix_t &input_gradient);
0629
0630
0631 static void RNNForward(const Tensor_t & , const Matrix_t & , const Matrix_t & ,
0632 const Tensor_t & , Tensor_t & , Matrix_t & , Matrix_t & ,
0633 const RNNDescriptors_t & , RNNWorkspace_t & , bool )
0634 {
0635 }
0636
0637 static void RNNBackward(const Tensor_t & , const Matrix_t & , const Matrix_t & ,
0638 const Tensor_t & , const Tensor_t & , const Matrix_t & ,
0639 const Matrix_t & , const Tensor_t & , Tensor_t & ,
0640 Matrix_t & , Matrix_t & , Tensor_t & ,
0641 const RNNDescriptors_t & , RNNWorkspace_t & )
0642 {
0643 }
0644
0645
0646 static Matrix_t & LSTMLayerBackward(TCpuMatrix<Scalar_t> & state_gradients_backward,
0647 TCpuMatrix<Scalar_t> & cell_gradients_backward,
0648 TCpuMatrix<Scalar_t> & input_weight_gradients,
0649 TCpuMatrix<Scalar_t> & forget_weight_gradients,
0650 TCpuMatrix<Scalar_t> & candidate_weight_gradients,
0651 TCpuMatrix<Scalar_t> & output_weight_gradients,
0652 TCpuMatrix<Scalar_t> & input_state_weight_gradients,
0653 TCpuMatrix<Scalar_t> & forget_state_weight_gradients,
0654 TCpuMatrix<Scalar_t> & candidate_state_weight_gradients,
0655 TCpuMatrix<Scalar_t> & output_state_weight_gradients,
0656 TCpuMatrix<Scalar_t> & input_bias_gradients,
0657 TCpuMatrix<Scalar_t> & forget_bias_gradients,
0658 TCpuMatrix<Scalar_t> & candidate_bias_gradients,
0659 TCpuMatrix<Scalar_t> & output_bias_gradients,
0660 TCpuMatrix<Scalar_t> & di,
0661 TCpuMatrix<Scalar_t> & df,
0662 TCpuMatrix<Scalar_t> & dc,
0663 TCpuMatrix<Scalar_t> & dout,
0664 const TCpuMatrix<Scalar_t> & precStateActivations,
0665 const TCpuMatrix<Scalar_t> & precCellActivations,
0666 const TCpuMatrix<Scalar_t> & fInput,
0667 const TCpuMatrix<Scalar_t> & fForget,
0668 const TCpuMatrix<Scalar_t> & fCandidate,
0669 const TCpuMatrix<Scalar_t> & fOutput,
0670 const TCpuMatrix<Scalar_t> & weights_input,
0671 const TCpuMatrix<Scalar_t> & weights_forget,
0672 const TCpuMatrix<Scalar_t> & weights_candidate,
0673 const TCpuMatrix<Scalar_t> & weights_output,
0674 const TCpuMatrix<Scalar_t> & weights_input_state,
0675 const TCpuMatrix<Scalar_t> & weights_forget_state,
0676 const TCpuMatrix<Scalar_t> & weights_candidate_state,
0677 const TCpuMatrix<Scalar_t> & weights_output_state,
0678 const TCpuMatrix<Scalar_t> & input,
0679 TCpuMatrix<Scalar_t> & input_gradient,
0680 TCpuMatrix<Scalar_t> & cell_gradient,
0681 TCpuMatrix<Scalar_t> & cell_tanh);
0682
0683
0684
0685 static Matrix_t & GRULayerBackward(TCpuMatrix<Scalar_t> & state_gradients_backward,
0686 TCpuMatrix<Scalar_t> & reset_weight_gradients,
0687 TCpuMatrix<Scalar_t> & update_weight_gradients,
0688 TCpuMatrix<Scalar_t> & candidate_weight_gradients,
0689 TCpuMatrix<Scalar_t> & reset_state_weight_gradients,
0690 TCpuMatrix<Scalar_t> & update_state_weight_gradients,
0691 TCpuMatrix<Scalar_t> & candidate_state_weight_gradients,
0692 TCpuMatrix<Scalar_t> & reset_bias_gradients,
0693 TCpuMatrix<Scalar_t> & update_bias_gradients,
0694 TCpuMatrix<Scalar_t> & candidate_bias_gradients,
0695 TCpuMatrix<Scalar_t> & dr,
0696 TCpuMatrix<Scalar_t> & du,
0697 TCpuMatrix<Scalar_t> & dc,
0698 const TCpuMatrix<Scalar_t> & precStateActivations,
0699 const TCpuMatrix<Scalar_t> & fReset,
0700 const TCpuMatrix<Scalar_t> & fUpdate,
0701 const TCpuMatrix<Scalar_t> & fCandidate,
0702 const TCpuMatrix<Scalar_t> & weights_reset,
0703 const TCpuMatrix<Scalar_t> & weights_update,
0704 const TCpuMatrix<Scalar_t> & weights_candidate,
0705 const TCpuMatrix<Scalar_t> & weights_reset_state,
0706 const TCpuMatrix<Scalar_t> & weights_update_state,
0707 const TCpuMatrix<Scalar_t> & weights_candidate_state,
0708 const TCpuMatrix<Scalar_t> & input,
0709 TCpuMatrix<Scalar_t> & input_gradient,
0710 bool resetGateAfter);
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724 static void Reshape(Matrix_t &A, const Matrix_t &B);
0725
0726
0727
0728 static void Flatten(Tensor_t &A, const Tensor_t &B);
0729
0730
0731
0732 static void Deflatten(Tensor_t &A, const Tensor_t &B);
0733
0734
0735 static void Rearrange(Tensor_t &out, const Tensor_t &in);
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755 static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B);
0756
0757
0758
0759 static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha = 1.0,
0760 Scalar_t beta = 0.);
0761
0762
0763
0764 static void Hadamard(Tensor_t &A, const Tensor_t &B);
0765 static void Hadamard(Matrix_t &A, const Matrix_t &B);
0766
0767
0768
0769
0770
0771
0772
0773
0774 static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha = 1.0, Scalar_t beta = 0.);
0775
0776
0777 static Scalar_t Sum(const Matrix_t &A);
0778
0779
0780 static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
0781
0782
0783
0784
0785 static void ConstAdd(Matrix_t &A, Scalar_t beta);
0786
0787
0788
0789
0790 static void ConstMult(Matrix_t &A, Scalar_t beta);
0791
0792
0793
0794
0795 static void ReciprocalElementWise(Matrix_t &A);
0796
0797
0798
0799
0800 static void SquareElementWise(Matrix_t &A);
0801
0802
0803
0804
0805 static void SqrtElementWise(Matrix_t &A);
0806
0807
0808 static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps);
0809 static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta);
0810 static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta);
0811
0812
0813 static void PrintTensor(const Tensor_t &A, const std::string name = "Cpu-tensor", bool truncate = false);
0814
0815 };
0816
0817
0818 template <typename AReal>
0819 template <typename AMatrix_t>
0820 void TCpu<AReal>::CopyDiffArch(TCpuMatrix<AReal> &B,
0821 const AMatrix_t &A)
0822 {
0823
0824
0825 TMatrixT<AReal> tmp = A;
0826 Copy(B, TCpuMatrix<AReal>(tmp) );
0827 }
0828
0829
0830 template <typename AReal>
0831 template <typename ATensor_t>
0832 void TCpu<AReal>::CopyDiffArch(TCpuTensor<AReal> &B,
0833 const ATensor_t &A)
0834 {
0835
0836 R__ASSERT(A.GetSize() == B.GetSize());
0837
0838 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
0839 TMatrixT<AReal> tmpIn = A.At(i);
0840
0841 TCpuMatrix<AReal> tmpOut = B.At(i).GetMatrix();
0842 Copy(tmpOut, TCpuMatrix<AReal>(tmpIn));
0843 }
0844
0845
0846
0847
0848
0849 }
0850
0851
0852 template <typename AReal>
0853 template <typename AMatrix_t>
0854 void TCpu<AReal>::CopyDiffArch(std::vector<TCpuMatrix<AReal>> &A, const std::vector<AMatrix_t> &B)
0855 {
0856 for (size_t i = 0; i < A.size(); ++i) {
0857 CopyDiffArch(A[i], B[i]);
0858 }
0859 }
0860
0861 template <typename AReal>
0862 void TCpu<AReal>::PrintTensor(const typename TCpu<AReal>::Tensor_t & A, const std::string name, bool truncate )
0863 {
0864 std::cout << name << " size = " << A.GetSize() << " shape = { ";
0865 auto shape = A.GetShape();
0866 for (size_t k = 0; k < shape.size()-1; ++k)
0867 std::cout << shape[k] << " , ";
0868 std::cout << shape.back() << " } ";
0869
0870
0871
0872 std::cout << " tensor count " << A.GetBufferUseCount() << std::endl;
0873 if (A.GetShape().size() == 2 ) {
0874 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
0875 std::cout << "{ ";
0876 size_t n = A.GetShape()[1];
0877 if (truncate) n = std::min(n,size_t(10));
0878 for (size_t j = 0; j < n; ++j) {
0879 std::cout << A(i,j) << " ";
0880 }
0881 if (truncate && n < A.GetShape()[1]) std::cout << " ...... ";
0882 std::cout << " } " << std::endl;
0883 }
0884 } else if (A.GetShape().size() == 3 ) {
0885 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
0886 std::cout << "{ ";
0887 for (size_t j = 0; j < A.GetHSize(); ++j) {
0888 std::cout << "{ ";
0889 size_t n = A.GetWSize();
0890 if (truncate) n = std::min(n,size_t(10));
0891 for (size_t k = 0; k < n; ++k) {
0892 std::cout << A(i,j,k) << " ";
0893 }
0894 if (truncate && n < A.GetWSize()) std::cout << " ...... ";
0895 std::cout << " } " << std::endl;
0896 }
0897 std::cout << " } " << std::endl;
0898 }
0899 }
0900 else {
0901 for (size_t l = 0; l < A.GetSize(); ++l) {
0902 std::cout << A.GetData()[l] << " ";
0903 }
0904 std::cout << "\n";
0905 }
0906 }
0907
0908
0909
0910
0911 }
0912 }
0913
0914 #endif