Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:11:08

0001 #ifndef TMVA_SOFIE_ROPERATOR_RNN_I
0002 #define TMVA_SOFIE_ROPERATOR_RNN_I
0003 
0004 namespace TMVA {
0005 namespace Experimental {
0006 namespace SOFIE {
0007 
0008 template <typename T>
0009 auto ROperator_RNN<T>::TypeInference(std::vector<ETensorType> input)
0010 -> std::vector<ETensorType> {
0011    ETensorType out = input[0];
0012    return {out, out};
0013 }
0014 
0015 template <typename T>
0016 auto ROperator_RNN<T>::ShapeInference(std::vector<std::vector<size_t>> input)
0017 -> std::vector<std::vector<size_t>> {
0018    size_t num_directions = input[1][0];
0019    size_t hidden_size = input[1][1];
0020    if (fAttrLayout == 0) {
0021       size_t seq_length = input[0][0];
0022       size_t batch_size = input[0][1];
0023       std::vector<std::vector<size_t>> ret(
0024           {{seq_length, num_directions, batch_size, hidden_size},
0025            {num_directions, batch_size, hidden_size}});
0026       return ret;
0027    } else {
0028       size_t batch_size = input[0][0];
0029       size_t seq_length = input[0][1];
0030       std::vector<std::vector<size_t>> ret(
0031           {{batch_size, seq_length, num_directions, hidden_size},
0032            {batch_size, num_directions, hidden_size}});
0033       return ret;
0034    }
0035 }
0036 
0037 template <typename T>
0038 auto ROperator_RNN<T>::Initialize(RModel &model)
0039 -> void {
0040    fUseSession = model.UseSession();
0041    // Check the input and output tensors
0042    if (!model.CheckIfTensorAlreadyExist(fNX)) {
0043       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX +
0044                                "  is not found in model.");
0045    }
0046    fShapeX = model.GetTensorShape(fNX);
0047    if (fShapeX.size() != 3) {
0048       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX +
0049                                " is not of 3 dimensions.");
0050    }
0051    if (!model.CheckIfTensorAlreadyExist(fNW)) {
0052       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW +
0053                                "  is not found in model.");
0054    }
0055    fShapeW = model.GetTensorShape(fNW);
0056    if (fShapeW.size() != 3) {
0057       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW +
0058                                " is not of 3 dimensions.");
0059    }
0060    if (!model.CheckIfTensorAlreadyExist(fNR)) {
0061       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR +
0062                                "  is not found in model.");
0063    }
0064    fShapeR = model.GetTensorShape(fNR);
0065    if (fShapeR.size() != 3) {
0066       throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR +
0067                                " is not of 3 dimensions.");
0068    }
0069    if (!fNB.empty()) {
0070       if (!model.CheckIfTensorAlreadyExist(fNB)) {
0071          throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB +
0072                                   " is not  found in model.");
0073       }
0074       fShapeB = model.GetTensorShape(fNB);
0075       if (fShapeB.size() != 2 && fShapeB.size() != 4) {
0076          throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB +
0077                                   " is not of 2 or 4 dimensions.");
0078       }
0079       if (fShapeB.size() == 2) {
0080          // Broadcasting the bias
0081          auto original_data = model.GetInitializedTensorData(fNB);
0082          size_t num_directions = fShapeW[0];
0083          size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0084          size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0085          if (fType == "float") {
0086             float *original_bias = static_cast<float *>(original_data.get());
0087             float *new_bias = new float[num_directions * seq_length *
0088                                         batch_size * fAttrHiddenSize];
0089             float sum[fAttrHiddenSize];
0090             for (size_t direction = 0; direction < num_directions;
0091                  direction++) {
0092                for (size_t h = 0; h < fAttrHiddenSize; h++) {
0093                   sum[h] = original_bias[direction * 2 * fAttrHiddenSize + h] +
0094                       original_bias[(2 * direction + 1) * fAttrHiddenSize + h];
0095                }
0096                for (size_t seq = 0; seq < seq_length; seq++) {
0097                   for (size_t batch = 0; batch < batch_size; batch++) {
0098                      size_t bias_offset =
0099                          direction * seq_length * batch_size * fAttrHiddenSize +
0100                          seq * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
0101                      std::copy(sum, sum + fAttrHiddenSize, new_bias + bias_offset);
0102                   }
0103                }
0104             }
0105             std::vector<size_t> new_bias_shape = {num_directions, seq_length,
0106                                                   batch_size, fAttrHiddenSize};
0107             std::shared_ptr<void> new_bias_ptr(new_bias, std::default_delete<float[]>());
0108             model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB),
0109                                           new_bias_shape, new_bias_ptr);
0110             fShapeB = model.GetTensorShape(fNB);
0111          }
0112       }
0113    }
0114    if (!fNSequence_lens.empty()) {
0115       if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
0116          throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0117                                   fNSequence_lens + "is not found in model.");
0118       }
0119       fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
0120       if (fShapeSequence_lens.size() != 1) {
0121          throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0122                                   fNSequence_lens + " is not of 1 dimension.");
0123       }
0124    }
0125    if (!fNInitial_h.empty()) {
0126       if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
0127          throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0128                                   fNInitial_h + " is not found in model.");
0129       }
0130       fShapeInitial_h = model.GetTensorShape(fNInitial_h);
0131       if (fShapeInitial_h.size() != 3) {
0132          throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0133                                   fNInitial_h + " is not of 3 dimensions.");
0134       }
0135    }
0136    if (!fNY.empty()) {
0137       fShapeY = ShapeInference({fShapeX, fShapeW})[0];
0138       if (!model.CheckIfTensorAlreadyExist(fNY)) {
0139          model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
0140       }
0141    }
0142    if (!fNY_h.empty()) {
0143       fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
0144       if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
0145          model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX),
0146                                      fShapeY_h);
0147       }
0148    }
0149    // Check the attributes
0150    for (auto &activation : fAttrActivations) {
0151       if (activation != "Relu" && activation != "Tanh" &&
0152           activation != "Sigmoid" && activation != "Affine" &&
0153           activation != "LeakyRelu" && activation != "ThresholdRelu" &&
0154           activation != "ScaledTanh" && activation != "HardSigmoid" &&
0155           activation != "Elu" && activation != "Softsign" &&
0156           activation != "Softplus") {
0157          throw std::runtime_error("TMVA SOFIE - Activation function " +
0158                                   activation + " not implemented");
0159       }
0160    }
0161    if (fAttrDirection != "forward" && fAttrDirection != "backward" &&
0162        fAttrDirection != "bidirectional") {
0163       throw std::runtime_error(
0164           "TMVA SOFIE - Invalid RNN direction fAttrDirection = " +
0165           fAttrDirection);
0166    }
0167    if (fAttrHiddenSize != fShapeW[1]) {
0168       throw std::runtime_error(
0169           "TMVA SOFIE - fAttrHiddenSize must be equal to " +
0170           std::to_string(fShapeW[1]));
0171    }
0172    if (fAttrLayout > 1) {
0173       throw std::runtime_error(
0174           "TMVA SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) +
0175           " must be 0 (timewise) or 1 (batchwise)");
0176    }
0177    if (fAttrActivations.empty()) {
0178       if (fAttrDirection == "bidirectional") {
0179          fAttrActivations = {"Tanh", "Tanh"};
0180       } else {
0181          fAttrActivations = {"Tanh"};
0182       }
0183    }
0184    // Add needed standard library headers
0185    model.AddNeededStdLib("cmath");
0186 }
0187 
0188 // generate code for Session data members (e.g. internal vectors)
0189 template <typename T>
0190 std::string ROperator_RNN<T>::GenerateSessionMembersCode(std::string opName)
0191 {
0192    opName = "op_" + opName;
0193    std::stringstream out;
0194 
0195    size_t num_directions = fShapeW[0];
0196    size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0197    size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0198    size_t input_size = fShapeX[2];
0199 
0200    if (fAttrLayout != 0) {
0201       out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">("
0202        << seq_length * batch_size * input_size << ");\n";
0203       out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">("
0204           << num_directions * batch_size * fAttrHiddenSize << ");\n";
0205    }
0206    out << "std::vector<" << fType << "> fVec_" << opName << "_feedforward = std::vector<" << fType << ">("
0207        << seq_length * batch_size * fAttrHiddenSize << ");\n";
0208 
0209    if (fAttrLayout != 0 || fNY.empty()) {
0210       out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">("
0211           << seq_length * num_directions * batch_size * fAttrHiddenSize << ");\n";
0212    }
0213 
0214    out << "\n";
0215 
0216    return out.str();
0217 }
0218 
0219 //////////////////////////////////////////////////////////////////////////////////////////////////
0220 template<typename T>
0221 auto ROperator_RNN<T>::Generate(std::string OpName)
0222 -> std::string {
0223     OpName = "op_" + OpName;
0224     std::stringstream out;
0225 
0226     size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0227     size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0228     size_t input_size = fShapeX[2];
0229    size_t num_directions = fShapeW[0];
0230 
0231    // set the input
0232    if (fAttrLayout == 0) {
0233       if (fType == "float") {
0234          out << SP << "float *" << OpName << "_input = tensor_" << fNX << ";\n";
0235       }
0236    } else {
0237       if (fUseSession)
0238          out << SP << fType << " * " << OpName << "_input = fVec_" << OpName << "_input.data();\n";
0239       else
0240          out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "];\n";
0241       out << SP << "for(size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0242       out << SP << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0243       out << SP << SP << SP << "for(size_t i = 0; i < " << input_size << "; i++) {\n";
0244       out << SP << SP << SP << SP << OpName << "_input[seq * " << batch_size * input_size 
0245           << " + batch * " << input_size << " + i] = " << "tensor_" << fNX << "[batch * "
0246           << seq_length * input_size << " + seq * " << input_size << " + i];\n";
0247       out << SP << SP << SP << "}\n";
0248       out << SP << SP << "}\n";
0249       out << SP << "}\n";
0250    }
0251 
0252    // Set the initial hidden state
0253    if (!fNInitial_h.empty()) {
0254       if (fAttrLayout == 0) {
0255          out << SP << fType << " *" << OpName << "_initial_hidden_state = " << " tensor_"
0256                 << fNInitial_h << ";\n";
0257       } else {
0258          if (fUseSession)
0259             out << SP << fType << " * " << OpName << "_initial_hidden_state = fVec_" << OpName
0260                 << "_initial_hidden_state.data();\n";
0261          else
0262             out << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size *
0263                fAttrHiddenSize << "] = {0};\n";
0264 
0265          for (size_t direction = 0; direction < num_directions; direction++) {
0266             out << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0267             out << SP << SP << "for(size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
0268             out << SP << SP << SP << OpName << "_initial_hidden_state["
0269                 << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize 
0270                 << " + h] = tensor_" << fNInitial_h << "[batch * " << num_directions * fAttrHiddenSize
0271                 << " + " << direction * fAttrHiddenSize << " + h];\n";
0272             out << SP << SP << "}\n";
0273             out << SP << "}\n";
0274          }
0275       }
0276    }
0277 
0278    if (fUseSession)
0279       out << SP << fType << " * " << OpName << "_feedforward = fVec_" << OpName
0280           << "_feedforward.data();\n";
0281    else 
0282       out << SP << fType << " " << OpName << "_feedforward[" << seq_length * batch_size * fAttrHiddenSize << "] = {0};\n";
0283 
0284    // Set the hidden state
0285    if (fAttrLayout == 0 && !fNY.empty()) {
0286       out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n";
0287    } else {
0288       if (fUseSession)
0289          out << SP << fType << " * " << OpName << "_hidden_state = fVec_" << OpName << "_hidden_state.data();\n";
0290       else 
0291          out << SP << fType << " " << OpName << "_hidden_state[" << seq_length * num_directions *
0292             batch_size * fAttrHiddenSize << "] = {0};\n";
0293    }
0294 
0295    out << SP << "char " << OpName << "_transA = 'N';\n";
0296    out << SP << "char " << OpName << "_transB = 'T';\n";
0297    out << SP << "int " << OpName << "_m = " << seq_length * batch_size << ";\n";
0298    out << SP << "int " << OpName << "_n = " << fAttrHiddenSize << ";\n";
0299    out << SP << "int " << OpName << "_k = " << input_size << ";\n";
0300    if (fType == "float") {
0301       out << SP << "float " << OpName << "_alpha = 1.;\n";
0302       out << SP << "float " << OpName << "_beta = .0;\n";
0303    }
0304    if (!fNB.empty()) {
0305       out << SP << "int " << OpName << "_bias_size = " << seq_length * batch_size * fAttrHiddenSize << ";\n";
0306       out << SP << "int " << OpName << "_incx = 1;\n";
0307       out << SP << "int " << OpName << "_incy = 1;\n";
0308    }
0309 
0310     for (size_t direction = 0; direction < num_directions; direction++) {
0311         // feedforward = input * W^T + bias
0312         if (fType == "float") {
0313             if (direction == 0) {
0314                 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0315                     << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName
0316                     << "_alpha, tensor_" << fNW << ", &" << OpName << "_k, " << OpName
0317                     << "_input, &" << OpName << "_k, &" << OpName << "_beta, " << OpName
0318                     << "_feedforward, &" << OpName << "_n);\n";
0319             } else {
0320                 out << SP << "size_t " << OpName << "_w_offset = " << fAttrHiddenSize * input_size
0321                     << ";\n";
0322                 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0323                     << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName
0324                     << "_alpha, tensor_" << fNW << " + " << OpName << "_w_offset, &" << OpName
0325                     << "_k, " << OpName << "_input, &" << OpName << "_k, &" << OpName << "_beta, "
0326                     << OpName << "_feedforward, &" << OpName << "_n);\n";
0327             }
0328         }
0329         // Add the bias
0330         if (!fNB.empty()) {
0331          if (fType == "float") {
0332             if (direction == 0) {
0333                out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
0334                    << fNB << ", &" << OpName << "_incx, " << OpName << "_feedforward, &" << OpName << "_incy);\n";
0335             } else {
0336                out << SP << "size_t " << OpName << "_bias_offset = "
0337                    << seq_length * batch_size * fAttrHiddenSize << ";\n";
0338                out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
0339                    << fNB << " + " << OpName << "_bias_offset, &" << OpName << "_incx, " << OpName
0340                    << "_feedforward, &" << OpName << "_incy);\n";
0341             }
0342          }
0343         }
0344 
0345         // Copy feedforward into hidden state
0346         out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0347         out << SP << SP << "size_t offset = seq * " << batch_size * fAttrHiddenSize << ";\n";
0348         out << SP << SP << "size_t size = " << batch_size * fAttrHiddenSize << ";\n";
0349         out << SP << SP << "size_t h_offset = seq * "
0350             << num_directions * batch_size * fAttrHiddenSize << " + "
0351             << direction * batch_size * fAttrHiddenSize << ";\n";
0352         out << SP << SP << "std::copy(" << OpName << "_feedforward + offset, " << OpName
0353             << "_feedforward + offset + size, " << OpName << "_hidden_state + h_offset);\n";
0354         out << SP << "}\n";
0355 
0356 
0357         out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0358         if (fAttrDirection == "backward" || direction == 1) {
0359             out << SP << SP << "size_t index = " << seq_length - 1 << " - seq;\n";
0360         } else {
0361             out << SP << SP << "size_t index = seq;\n";
0362         }
0363 
0364         out << SP << SP << "int m2 = " << batch_size << ";\n";
0365         out << SP << SP << "size_t offset = index * "
0366             << num_directions * batch_size * fAttrHiddenSize << " + "
0367             << direction * batch_size * fAttrHiddenSize << ";\n";
0368         out << SP << SP << "size_t size = " << batch_size * fAttrHiddenSize << ";\n";
0369         out << SP << SP << "if (seq == 0) {\n";
0370         if (!fNInitial_h.empty()) {
0371          // hidden_state = hidden_state + initial_hidden_state * R^T
0372          out << SP << SP << SP << "size_t r_offset = "
0373              << direction * fAttrHiddenSize * fAttrHiddenSize << ";\n";
0374          out << SP << SP << SP << "size_t initial_hidden_state_offset = "
0375              << direction * batch_size * fAttrHiddenSize << ";\n";
0376          if (fType == "float") {
0377             out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName
0378                 << "_transA, &" << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName 
0379                 << "_alpha, tensor_" << fNR << " + r_offset, &" << OpName << "_n, " << OpName
0380                 << "_initial_hidden_state + initial_hidden_state_offset, &" << OpName << "_n, &"
0381                 << OpName << "_alpha, " << OpName << "_hidden_state + offset, &" << OpName << "_n);\n";
0382          }
0383         }
0384       out << SP << SP << "} else {\n";
0385       // hidden_state = hidden_state + previous_hidden_state * R^T
0386       out << SP << SP << SP << "size_t r_offset = "
0387           << direction * fAttrHiddenSize * fAttrHiddenSize << ";\n";
0388       if (fAttrDirection == "backward" || direction == 1) {
0389          out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
0390              << num_directions * batch_size * fAttrHiddenSize
0391              << " + " << direction * batch_size * fAttrHiddenSize << ";\n";
0392       } else {
0393          out << SP << SP << SP << "size_t previous_offset = (seq - 1) * "
0394              << num_directions * batch_size * fAttrHiddenSize
0395              << " + " << direction * batch_size * fAttrHiddenSize << ";\n";
0396       }
0397       if (fType == "float") {
0398          out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0399              << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR
0400              << " + r_offset, &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
0401              << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_hidden_state + offset, &"
0402              << OpName << "_n);\n";
0403       }
0404       out << SP << SP << "}\n";
0405 
0406       // Clip the elements of the hidden state into the range [-fAttrClip, fAttrClip]
0407       if (fAttrClip > .0) {
0408          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0409          if (fType == "float") {
0410             out << SP << SP << SP << "float x = (" << OpName << "_hidden_state[i] > " << -fAttrClip
0411                 << ") ? " << OpName << "_hidden_state[i] : " << -fAttrClip << ";\n";
0412          }
0413          out << SP << SP << SP << OpName << "_hidden_state[i] = (x < " << fAttrClip
0414              << ") ? x : " << fAttrClip << ";\n";
0415          out << SP << SP << "}\n";
0416       }
0417 
0418       // Apply the activation function to the hidden state
0419       if (fAttrActivations[direction] == "Relu") {
0420          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0421          out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0422          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 0.;\n";
0423          out << SP << SP << "}\n";
0424       } else if (fAttrActivations[direction] == "Tanh") {
0425          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0426          if (fType == "float") {
0427             out << SP << SP << SP << "float ex = std::exp(-2 * " << OpName << "_hidden_state[i]);\n";
0428          }
0429          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = (1. - ex) / (1. + ex);\n";
0430          out << SP << SP << "}\n";
0431       } else if (fAttrActivations[direction] == "Sigmoid") {
0432          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0433          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 1. / (1. + std::exp(-" << OpName
0434              << "_hidden_state[i]));\n";
0435          out << SP << SP << "}\n";
0436       } else if (fAttrActivations[direction] == "Affine") {
0437          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0438          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0439              << " * " << OpName << "_hidden_state[i] + " << fAttrActivationBeta[direction] << ";\n";
0440          out << SP << SP << "}\n";
0441       } else if (fAttrActivations[direction] == "ScaledTanh") {
0442          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0443          if (fType == "float") {
0444             out << SP << SP << SP << "float ex = std::exp(-2 * " << fAttrActivationBeta[direction]
0445                 << " * "<< OpName << "_hidden_state[i]);\n";
0446          }
0447          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0448              << " * (1. - ex) / (1. + ex);\n";
0449          out << SP << SP << "}\n";
0450       } else if (fAttrActivations[direction] == "HardSigmoid") {
0451          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0452          if (fType == "float") {
0453             out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction] << " * "
0454                 << OpName << "_hidden_state[i] + " << fAttrActivationBeta[direction] << ";\n";
0455             out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
0456          }
0457          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = (b < 1.) ? b : 1.;\n";
0458          out << SP << SP << "}\n";
0459       } else if (fAttrActivations[direction] == "LeakyRelu") {
0460          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0461          out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0462          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0463              << " * " << OpName << "_hidden_state[i];\n";
0464          out << SP << SP << "}\n";
0465       } else if (fAttrActivations[direction] == "ThresholdRelu") {
0466          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0467          out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < "
0468              << fAttrActivationAlpha[direction] << ")\n";
0469          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 0.;\n";
0470          out << SP << SP << "}";
0471       } else if (fAttrActivations[direction] == "Elu") {
0472          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0473          out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0474          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0475              << " * std::exp(" << OpName << "_hidden_state[i] - 1.);\n";
0476          out << SP << SP << "}\n";
0477       } else if (fAttrActivations[direction] == "Softsign") {
0478          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0479          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << OpName 
0480              << "_hidden_state[i] / (1. + abs(" << OpName << "_hidden_state[i]));\n";
0481          out << SP << SP << "}\n";
0482       } else { // fAttrActivations[direction] = Softplus
0483          out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0484          out << SP << SP << SP << SP << OpName << "_hidden_state[i] = log(1. + std::exp("
0485              << OpName << "_hidden_state[i]));\n";
0486          out << SP << SP << "}\n";
0487          out << SP << "}\n";
0488         }
0489         out << SP << "}\n";
0490     }
0491 
0492     // Padding the hidden state for RNN with different sequence lengths
0493     if (!fNSequence_lens.empty()) {
0494         out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0495         out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0496         out << SP << SP << SP << "if (seq >= tensor_" << fNSequence_lens << "[batch]) {\n";
0497         out << SP << SP << SP << SP << "for (size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
0498         if (num_directions == 1) {
0499             out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0500                 << num_directions * batch_size * fAttrHiddenSize << " + batch * "
0501                 << fAttrHiddenSize << " + h] = 0.;\n";
0502         } else {
0503             out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0504                 << num_directions * batch_size * fAttrHiddenSize << " + batch * "
0505                 << fAttrHiddenSize << " + h] = 0.;\n";
0506             out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0507                 << num_directions * batch_size * fAttrHiddenSize << " + " << batch_size * fAttrHiddenSize
0508                 << " + batch * " << fAttrHiddenSize << " + h] = 0.;\n";
0509         }
0510         out << SP << SP << SP << SP << "}\n";
0511         out << SP << SP << SP << "}\n";
0512         out << SP << SP << "}\n";
0513         out << SP << "}\n";
0514     }
0515 
0516    // Copy the hidden state into y and y_h
0517    if (fAttrLayout == 0) {
0518       if (!fNY_h.empty()) {
0519          if (fNSequence_lens.empty()) {
0520             size_t yh_size = batch_size * fAttrHiddenSize;
0521             if (fAttrDirection == "backward") {
0522                out << SP << "std::copy(" << OpName << "_hidden_state, " << OpName << "_hidden_state + "
0523                    << yh_size << ", tensor_" << fNY_h << ");\n";
0524             } else {
0525                size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
0526                out << SP << "std::copy(" << OpName << "_hidden_state + " << offset << ", " << OpName
0527                    << "_hidden_state + " << offset << " + " << yh_size << ", tensor_" << fNY_h << ");\n";
0528             }
0529             if (num_directions == 2) {
0530                out << SP << "std::copy(" << OpName << "_hidden_state + " << yh_size << ", " << OpName
0531                    << "_hidden_state + " << 2 * yh_size << ", tensor_" << fNY_h << " + " << yh_size << ");\n";
0532             }
0533          } else { // RNN with different sequence lengths
0534             if (fAttrDirection == "backward") {
0535                out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0536                out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
0537                out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0538                    << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + offset);\n";
0539                out << SP << "}\n";
0540             } else {
0541                out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0542                out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
0543                out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0544                    << " + batch * " << fAttrHiddenSize << ";\n";
0545                out << SP << SP << "size_t yh_offset = batch * " << fAttrHiddenSize << ";\n";
0546                out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0547                    << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0548                out << SP << "}\n";
0549             }
0550             if (num_directions == 2) {
0551                out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0552                out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize
0553                    << " + batch * " << fAttrHiddenSize << ";\n";
0554                out << SP << SP << "size_t yh_offset = " << batch_size * fAttrHiddenSize
0555                    << " + batch * " << fAttrHiddenSize << ";\n";
0556                out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0557                    << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0558                out << SP << "}\n";
0559             }
0560          }
0561       }
0562    } else { // fAttrLayout=1
0563       if (!fNY.empty()) {
0564          for (size_t direction = 0; direction < num_directions; direction++) {
0565             out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0566             out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0567             out << SP << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0568                 << " + " << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize << ";\n";
0569             out << SP << SP << SP << "size_t y_offset = batch * " << seq_length * num_directions * fAttrHiddenSize
0570                 << " + seq * " << num_directions * fAttrHiddenSize << " + " << direction * fAttrHiddenSize << ";\n";
0571             out << SP << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0572                 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY << " + y_offset);\n";
0573             out << SP << SP << "}\n";
0574             out << SP << "}\n";
0575          }
0576       }
0577       if (!fNY_h.empty()) {
0578          if (fAttrDirection == "backward") {
0579             out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0580             out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
0581             out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
0582             out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0583                 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0584             out << SP << "}\n";
0585          } else {
0586             out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0587             if (fNSequence_lens.empty()) {
0588                out << SP << SP << "size_t seq = " << seq_length - 1 << ";\n";
0589             } else {
0590                out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
0591             }
0592             out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0593                 << " + batch * " << fAttrHiddenSize << ";\n";
0594             out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
0595             out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0596                 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0597             out << SP << "}\n";
0598          }
0599          if (num_directions == 2) {
0600             out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0601             out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize << " + batch * "
0602                 << fAttrHiddenSize << ";\n";
0603             out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << " + "
0604                 << fAttrHiddenSize << ";\n";
0605             out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0606                 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0607             out << SP << "}\n";
0608          }
0609       }
0610    }
0611 
0612    return out.str();
0613 }
0614 
0615 } // namespace SOFIE
0616 } // namespace Experimental
0617 } // namespace TMVA
0618 
0619 #endif