File indexing completed on 2025-01-18 10:10:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #ifndef TMVA_DNN_FUNCTIONS
0019 #define TMVA_DNN_FUNCTIONS
0020
0021 namespace TMVA
0022 {
0023 namespace DNN
0024 {
0025
0026
0027
0028
0029
0030
0031 enum class EActivationFunction
0032 {
0033 kIdentity = 0,
0034 kRelu = 1,
0035 kSigmoid = 2,
0036 kTanh = 3,
0037 kSymmRelu = 4,
0038 kSoftSign = 5,
0039 kGauss = 6,
0040 kFastTanh = 7
0041
0042 };
0043
0044
0045 enum class EOutputFunction
0046 {
0047 kIdentity = 'I',
0048 kSigmoid = 'S',
0049 kSoftmax = 'M'
0050 };
0051
0052
0053
0054
0055
0056 enum class ELossFunction
0057 {
0058 kCrossEntropy = 'C',
0059 kMeanSquaredError = 'R',
0060 kSoftmaxCrossEntropy = 'S'
0061 };
0062
0063
0064 enum class ERegularization
0065 {
0066 kNone = '0',
0067 kL1 = '1',
0068 kL2 = '2'
0069 };
0070
0071
0072 enum class EInitialization {
0073 kGauss = 'G',
0074 kUniform = 'U',
0075 kIdentity = 'I',
0076 kZero = 'Z',
0077 kGlorotNormal = 'X',
0078 kGlorotUniform = 'F',
0079 };
0080
0081
0082 enum class EOptimizer {
0083 kSGD = 0,
0084 kAdam = 1,
0085 kAdagrad = 2,
0086 kRMSProp = 3,
0087 kAdadelta = 4,
0088 };
0089
0090
0091
0092
0093
0094
0095
0096
0097 template<typename Architecture_t>
0098 inline void evaluate(typename Architecture_t::Tensor_t &A,
0099 EActivationFunction f)
0100 {
0101 switch(f)
0102 {
0103 case EActivationFunction::kIdentity : break;
0104 case EActivationFunction::kRelu : Architecture_t::Relu(A);
0105 break;
0106 case EActivationFunction::kSigmoid : Architecture_t::Sigmoid(A);
0107 break;
0108 case EActivationFunction::kTanh : Architecture_t::Tanh(A);
0109 break;
0110 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
0111 break;
0112 case EActivationFunction::kSoftSign : Architecture_t::SoftSign(A);
0113 break;
0114 case EActivationFunction::kGauss : Architecture_t::Gauss(A);
0115 break;
0116 case EActivationFunction::kFastTanh : Architecture_t::FastTanh(A);
0117 break;
0118 }
0119 }
0120
0121
0122
0123
0124 template<typename Architecture_t>
0125 inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
0126 EActivationFunction f,
0127 const typename Architecture_t::Tensor_t & A)
0128 {
0129 switch(f)
0130 {
0131 case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
0132 break;
0133 case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
0134 break;
0135 case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
0136 break;
0137 case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
0138 break;
0139 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
0140 break;
0141 case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
0142 break;
0143 case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
0144 break;
0145 case EActivationFunction::kFastTanh : Architecture_t::FastTanhDerivative(B, A);
0146 break;
0147 }
0148 }
0149
0150
0151 template<typename Architecture_t>
0152 inline void evaluateMatrix( typename Architecture_t::Matrix_t &A,
0153 EActivationFunction f)
0154 {
0155 typename Architecture_t::Tensor_t t(A);
0156 evaluate<Architecture_t>(t,f);
0157 }
0158
0159 template<typename Architecture_t>
0160 inline void evaluateDerivativeMatrix( typename Architecture_t::Matrix_t &B,
0161 EActivationFunction f,
0162 const typename Architecture_t::Matrix_t & A)
0163 {
0164 typename Architecture_t::Tensor_t t(B);
0165 evaluateDerivative<Architecture_t>(t,f, typename Architecture_t::Tensor_t(A));
0166 }
0167
0168
0169
0170
0171
0172
0173
0174 template<typename Architecture_t>
0175 inline void evaluate(typename Architecture_t::Matrix_t &A,
0176 EOutputFunction f,
0177 const typename Architecture_t::Matrix_t &X)
0178 {
0179 switch(f)
0180 {
0181 case EOutputFunction::kIdentity : Architecture_t::Copy(A, X);
0182 break;
0183 case EOutputFunction::kSigmoid : Architecture_t::Sigmoid(A, X);
0184 break;
0185 case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
0186 break;
0187 }
0188 }
0189
0190
0191
0192
0193
0194
0195
0196
0197 template <typename Architecture_t>
0198 inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
0199 const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
0200 -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
0201 {
0202 switch(f)
0203 {
0204 case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
0205 case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
0206 case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
0207 }
0208 return 0.0;
0209 }
0210
0211
0212
0213
0214 template <typename Architecture_t>
0215 inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
0216 const typename Architecture_t::Matrix_t &Y,
0217 const typename Architecture_t::Matrix_t &output,
0218 const typename Architecture_t::Matrix_t &weights)
0219 {
0220 switch(f)
0221 {
0222 case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
0223 case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
0224 case ELossFunction::kSoftmaxCrossEntropy :
0225 Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
0226 break;
0227 }
0228 }
0229
0230
0231
0232
0233
0234
0235
0236
0237 template<typename Architecture_t>
0238 inline auto regularization(const typename Architecture_t::Matrix_t &A,
0239 ERegularization R)
0240 -> decltype(Architecture_t::L1Regularization(A))
0241 {
0242 switch(R)
0243 {
0244 case ERegularization::kNone :
0245 return 0.0;
0246 case ERegularization::kL1 :
0247 return Architecture_t::L1Regularization(A);
0248 case ERegularization::kL2 :
0249 return Architecture_t::L2Regularization(A);
0250 }
0251 return 0.0;
0252 }
0253
0254
0255
0256
0257 template<typename Architecture_t>
0258 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
0259 const typename Architecture_t::Matrix_t &W,
0260 typename Architecture_t::Scalar_t weightDecay,
0261 ERegularization R)
0262 {
0263 switch(R)
0264 {
0265 case ERegularization::kNone :
0266 break;
0267 case ERegularization::kL1 :
0268 Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
0269 break;
0270 case ERegularization::kL2 :
0271 Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
0272 break;
0273 }
0274 }
0275
0276
0277
0278
0279
0280
0281 template<typename Architecture_t>
0282 inline void initialize(typename Architecture_t::Matrix_t & A,
0283 EInitialization m)
0284 {
0285 switch(m) {
0286 case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
0287 break;
0288 case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
0289 break;
0290 case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
0291 break;
0292 case EInitialization::kZero : Architecture_t::InitializeZero(A);
0293 break;
0294 case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
0295 break;
0296 case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
0297 break;
0298 }
0299 }
0300
0301 }
0302 }
0303
0304 #endif