File indexing completed on 2025-01-18 10:11:08
0001 #ifndef TMVA_SOFIE_ROPERATOR_RNN_I
0002 #define TMVA_SOFIE_ROPERATOR_RNN_I
0003
0004 namespace TMVA {
0005 namespace Experimental {
0006 namespace SOFIE {
0007
0008 template <typename T>
0009 auto ROperator_RNN<T>::TypeInference(std::vector<ETensorType> input)
0010 -> std::vector<ETensorType> {
0011 ETensorType out = input[0];
0012 return {out, out};
0013 }
0014
0015 template <typename T>
0016 auto ROperator_RNN<T>::ShapeInference(std::vector<std::vector<size_t>> input)
0017 -> std::vector<std::vector<size_t>> {
0018 size_t num_directions = input[1][0];
0019 size_t hidden_size = input[1][1];
0020 if (fAttrLayout == 0) {
0021 size_t seq_length = input[0][0];
0022 size_t batch_size = input[0][1];
0023 std::vector<std::vector<size_t>> ret(
0024 {{seq_length, num_directions, batch_size, hidden_size},
0025 {num_directions, batch_size, hidden_size}});
0026 return ret;
0027 } else {
0028 size_t batch_size = input[0][0];
0029 size_t seq_length = input[0][1];
0030 std::vector<std::vector<size_t>> ret(
0031 {{batch_size, seq_length, num_directions, hidden_size},
0032 {batch_size, num_directions, hidden_size}});
0033 return ret;
0034 }
0035 }
0036
0037 template <typename T>
0038 auto ROperator_RNN<T>::Initialize(RModel &model)
0039 -> void {
0040 fUseSession = model.UseSession();
0041
0042 if (!model.CheckIfTensorAlreadyExist(fNX)) {
0043 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX +
0044 " is not found in model.");
0045 }
0046 fShapeX = model.GetTensorShape(fNX);
0047 if (fShapeX.size() != 3) {
0048 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX +
0049 " is not of 3 dimensions.");
0050 }
0051 if (!model.CheckIfTensorAlreadyExist(fNW)) {
0052 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW +
0053 " is not found in model.");
0054 }
0055 fShapeW = model.GetTensorShape(fNW);
0056 if (fShapeW.size() != 3) {
0057 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW +
0058 " is not of 3 dimensions.");
0059 }
0060 if (!model.CheckIfTensorAlreadyExist(fNR)) {
0061 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR +
0062 " is not found in model.");
0063 }
0064 fShapeR = model.GetTensorShape(fNR);
0065 if (fShapeR.size() != 3) {
0066 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR +
0067 " is not of 3 dimensions.");
0068 }
0069 if (!fNB.empty()) {
0070 if (!model.CheckIfTensorAlreadyExist(fNB)) {
0071 throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB +
0072 " is not found in model.");
0073 }
0074 fShapeB = model.GetTensorShape(fNB);
0075 if (fShapeB.size() != 2 && fShapeB.size() != 4) {
0076 throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB +
0077 " is not of 2 or 4 dimensions.");
0078 }
0079 if (fShapeB.size() == 2) {
0080
0081 auto original_data = model.GetInitializedTensorData(fNB);
0082 size_t num_directions = fShapeW[0];
0083 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0084 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0085 if (fType == "float") {
0086 float *original_bias = static_cast<float *>(original_data.get());
0087 float *new_bias = new float[num_directions * seq_length *
0088 batch_size * fAttrHiddenSize];
0089 float sum[fAttrHiddenSize];
0090 for (size_t direction = 0; direction < num_directions;
0091 direction++) {
0092 for (size_t h = 0; h < fAttrHiddenSize; h++) {
0093 sum[h] = original_bias[direction * 2 * fAttrHiddenSize + h] +
0094 original_bias[(2 * direction + 1) * fAttrHiddenSize + h];
0095 }
0096 for (size_t seq = 0; seq < seq_length; seq++) {
0097 for (size_t batch = 0; batch < batch_size; batch++) {
0098 size_t bias_offset =
0099 direction * seq_length * batch_size * fAttrHiddenSize +
0100 seq * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
0101 std::copy(sum, sum + fAttrHiddenSize, new_bias + bias_offset);
0102 }
0103 }
0104 }
0105 std::vector<size_t> new_bias_shape = {num_directions, seq_length,
0106 batch_size, fAttrHiddenSize};
0107 std::shared_ptr<void> new_bias_ptr(new_bias, std::default_delete<float[]>());
0108 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB),
0109 new_bias_shape, new_bias_ptr);
0110 fShapeB = model.GetTensorShape(fNB);
0111 }
0112 }
0113 }
0114 if (!fNSequence_lens.empty()) {
0115 if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
0116 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0117 fNSequence_lens + "is not found in model.");
0118 }
0119 fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
0120 if (fShapeSequence_lens.size() != 1) {
0121 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0122 fNSequence_lens + " is not of 1 dimension.");
0123 }
0124 }
0125 if (!fNInitial_h.empty()) {
0126 if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
0127 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0128 fNInitial_h + " is not found in model.");
0129 }
0130 fShapeInitial_h = model.GetTensorShape(fNInitial_h);
0131 if (fShapeInitial_h.size() != 3) {
0132 throw std::runtime_error("TMVA SOFIE RNN Op input tensor " +
0133 fNInitial_h + " is not of 3 dimensions.");
0134 }
0135 }
0136 if (!fNY.empty()) {
0137 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
0138 if (!model.CheckIfTensorAlreadyExist(fNY)) {
0139 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
0140 }
0141 }
0142 if (!fNY_h.empty()) {
0143 fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
0144 if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
0145 model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX),
0146 fShapeY_h);
0147 }
0148 }
0149
0150 for (auto &activation : fAttrActivations) {
0151 if (activation != "Relu" && activation != "Tanh" &&
0152 activation != "Sigmoid" && activation != "Affine" &&
0153 activation != "LeakyRelu" && activation != "ThresholdRelu" &&
0154 activation != "ScaledTanh" && activation != "HardSigmoid" &&
0155 activation != "Elu" && activation != "Softsign" &&
0156 activation != "Softplus") {
0157 throw std::runtime_error("TMVA SOFIE - Activation function " +
0158 activation + " not implemented");
0159 }
0160 }
0161 if (fAttrDirection != "forward" && fAttrDirection != "backward" &&
0162 fAttrDirection != "bidirectional") {
0163 throw std::runtime_error(
0164 "TMVA SOFIE - Invalid RNN direction fAttrDirection = " +
0165 fAttrDirection);
0166 }
0167 if (fAttrHiddenSize != fShapeW[1]) {
0168 throw std::runtime_error(
0169 "TMVA SOFIE - fAttrHiddenSize must be equal to " +
0170 std::to_string(fShapeW[1]));
0171 }
0172 if (fAttrLayout > 1) {
0173 throw std::runtime_error(
0174 "TMVA SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) +
0175 " must be 0 (timewise) or 1 (batchwise)");
0176 }
0177 if (fAttrActivations.empty()) {
0178 if (fAttrDirection == "bidirectional") {
0179 fAttrActivations = {"Tanh", "Tanh"};
0180 } else {
0181 fAttrActivations = {"Tanh"};
0182 }
0183 }
0184
0185 model.AddNeededStdLib("cmath");
0186 }
0187
0188
0189 template <typename T>
0190 std::string ROperator_RNN<T>::GenerateSessionMembersCode(std::string opName)
0191 {
0192 opName = "op_" + opName;
0193 std::stringstream out;
0194
0195 size_t num_directions = fShapeW[0];
0196 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0197 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0198 size_t input_size = fShapeX[2];
0199
0200 if (fAttrLayout != 0) {
0201 out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">("
0202 << seq_length * batch_size * input_size << ");\n";
0203 out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">("
0204 << num_directions * batch_size * fAttrHiddenSize << ");\n";
0205 }
0206 out << "std::vector<" << fType << "> fVec_" << opName << "_feedforward = std::vector<" << fType << ">("
0207 << seq_length * batch_size * fAttrHiddenSize << ");\n";
0208
0209 if (fAttrLayout != 0 || fNY.empty()) {
0210 out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">("
0211 << seq_length * num_directions * batch_size * fAttrHiddenSize << ");\n";
0212 }
0213
0214 out << "\n";
0215
0216 return out.str();
0217 }
0218
0219
0220 template<typename T>
0221 auto ROperator_RNN<T>::Generate(std::string OpName)
0222 -> std::string {
0223 OpName = "op_" + OpName;
0224 std::stringstream out;
0225
0226 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
0227 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
0228 size_t input_size = fShapeX[2];
0229 size_t num_directions = fShapeW[0];
0230
0231
0232 if (fAttrLayout == 0) {
0233 if (fType == "float") {
0234 out << SP << "float *" << OpName << "_input = tensor_" << fNX << ";\n";
0235 }
0236 } else {
0237 if (fUseSession)
0238 out << SP << fType << " * " << OpName << "_input = fVec_" << OpName << "_input.data();\n";
0239 else
0240 out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "];\n";
0241 out << SP << "for(size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0242 out << SP << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0243 out << SP << SP << SP << "for(size_t i = 0; i < " << input_size << "; i++) {\n";
0244 out << SP << SP << SP << SP << OpName << "_input[seq * " << batch_size * input_size
0245 << " + batch * " << input_size << " + i] = " << "tensor_" << fNX << "[batch * "
0246 << seq_length * input_size << " + seq * " << input_size << " + i];\n";
0247 out << SP << SP << SP << "}\n";
0248 out << SP << SP << "}\n";
0249 out << SP << "}\n";
0250 }
0251
0252
0253 if (!fNInitial_h.empty()) {
0254 if (fAttrLayout == 0) {
0255 out << SP << fType << " *" << OpName << "_initial_hidden_state = " << " tensor_"
0256 << fNInitial_h << ";\n";
0257 } else {
0258 if (fUseSession)
0259 out << SP << fType << " * " << OpName << "_initial_hidden_state = fVec_" << OpName
0260 << "_initial_hidden_state.data();\n";
0261 else
0262 out << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size *
0263 fAttrHiddenSize << "] = {0};\n";
0264
0265 for (size_t direction = 0; direction < num_directions; direction++) {
0266 out << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0267 out << SP << SP << "for(size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
0268 out << SP << SP << SP << OpName << "_initial_hidden_state["
0269 << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize
0270 << " + h] = tensor_" << fNInitial_h << "[batch * " << num_directions * fAttrHiddenSize
0271 << " + " << direction * fAttrHiddenSize << " + h];\n";
0272 out << SP << SP << "}\n";
0273 out << SP << "}\n";
0274 }
0275 }
0276 }
0277
0278 if (fUseSession)
0279 out << SP << fType << " * " << OpName << "_feedforward = fVec_" << OpName
0280 << "_feedforward.data();\n";
0281 else
0282 out << SP << fType << " " << OpName << "_feedforward[" << seq_length * batch_size * fAttrHiddenSize << "] = {0};\n";
0283
0284
0285 if (fAttrLayout == 0 && !fNY.empty()) {
0286 out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n";
0287 } else {
0288 if (fUseSession)
0289 out << SP << fType << " * " << OpName << "_hidden_state = fVec_" << OpName << "_hidden_state.data();\n";
0290 else
0291 out << SP << fType << " " << OpName << "_hidden_state[" << seq_length * num_directions *
0292 batch_size * fAttrHiddenSize << "] = {0};\n";
0293 }
0294
0295 out << SP << "char " << OpName << "_transA = 'N';\n";
0296 out << SP << "char " << OpName << "_transB = 'T';\n";
0297 out << SP << "int " << OpName << "_m = " << seq_length * batch_size << ";\n";
0298 out << SP << "int " << OpName << "_n = " << fAttrHiddenSize << ";\n";
0299 out << SP << "int " << OpName << "_k = " << input_size << ";\n";
0300 if (fType == "float") {
0301 out << SP << "float " << OpName << "_alpha = 1.;\n";
0302 out << SP << "float " << OpName << "_beta = .0;\n";
0303 }
0304 if (!fNB.empty()) {
0305 out << SP << "int " << OpName << "_bias_size = " << seq_length * batch_size * fAttrHiddenSize << ";\n";
0306 out << SP << "int " << OpName << "_incx = 1;\n";
0307 out << SP << "int " << OpName << "_incy = 1;\n";
0308 }
0309
0310 for (size_t direction = 0; direction < num_directions; direction++) {
0311
0312 if (fType == "float") {
0313 if (direction == 0) {
0314 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0315 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName
0316 << "_alpha, tensor_" << fNW << ", &" << OpName << "_k, " << OpName
0317 << "_input, &" << OpName << "_k, &" << OpName << "_beta, " << OpName
0318 << "_feedforward, &" << OpName << "_n);\n";
0319 } else {
0320 out << SP << "size_t " << OpName << "_w_offset = " << fAttrHiddenSize * input_size
0321 << ";\n";
0322 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0323 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName
0324 << "_alpha, tensor_" << fNW << " + " << OpName << "_w_offset, &" << OpName
0325 << "_k, " << OpName << "_input, &" << OpName << "_k, &" << OpName << "_beta, "
0326 << OpName << "_feedforward, &" << OpName << "_n);\n";
0327 }
0328 }
0329
0330 if (!fNB.empty()) {
0331 if (fType == "float") {
0332 if (direction == 0) {
0333 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
0334 << fNB << ", &" << OpName << "_incx, " << OpName << "_feedforward, &" << OpName << "_incy);\n";
0335 } else {
0336 out << SP << "size_t " << OpName << "_bias_offset = "
0337 << seq_length * batch_size * fAttrHiddenSize << ";\n";
0338 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
0339 << fNB << " + " << OpName << "_bias_offset, &" << OpName << "_incx, " << OpName
0340 << "_feedforward, &" << OpName << "_incy);\n";
0341 }
0342 }
0343 }
0344
0345
0346 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0347 out << SP << SP << "size_t offset = seq * " << batch_size * fAttrHiddenSize << ";\n";
0348 out << SP << SP << "size_t size = " << batch_size * fAttrHiddenSize << ";\n";
0349 out << SP << SP << "size_t h_offset = seq * "
0350 << num_directions * batch_size * fAttrHiddenSize << " + "
0351 << direction * batch_size * fAttrHiddenSize << ";\n";
0352 out << SP << SP << "std::copy(" << OpName << "_feedforward + offset, " << OpName
0353 << "_feedforward + offset + size, " << OpName << "_hidden_state + h_offset);\n";
0354 out << SP << "}\n";
0355
0356
0357 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0358 if (fAttrDirection == "backward" || direction == 1) {
0359 out << SP << SP << "size_t index = " << seq_length - 1 << " - seq;\n";
0360 } else {
0361 out << SP << SP << "size_t index = seq;\n";
0362 }
0363
0364 out << SP << SP << "int m2 = " << batch_size << ";\n";
0365 out << SP << SP << "size_t offset = index * "
0366 << num_directions * batch_size * fAttrHiddenSize << " + "
0367 << direction * batch_size * fAttrHiddenSize << ";\n";
0368 out << SP << SP << "size_t size = " << batch_size * fAttrHiddenSize << ";\n";
0369 out << SP << SP << "if (seq == 0) {\n";
0370 if (!fNInitial_h.empty()) {
0371
0372 out << SP << SP << SP << "size_t r_offset = "
0373 << direction * fAttrHiddenSize * fAttrHiddenSize << ";\n";
0374 out << SP << SP << SP << "size_t initial_hidden_state_offset = "
0375 << direction * batch_size * fAttrHiddenSize << ";\n";
0376 if (fType == "float") {
0377 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName
0378 << "_transA, &" << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName
0379 << "_alpha, tensor_" << fNR << " + r_offset, &" << OpName << "_n, " << OpName
0380 << "_initial_hidden_state + initial_hidden_state_offset, &" << OpName << "_n, &"
0381 << OpName << "_alpha, " << OpName << "_hidden_state + offset, &" << OpName << "_n);\n";
0382 }
0383 }
0384 out << SP << SP << "} else {\n";
0385
0386 out << SP << SP << SP << "size_t r_offset = "
0387 << direction * fAttrHiddenSize * fAttrHiddenSize << ";\n";
0388 if (fAttrDirection == "backward" || direction == 1) {
0389 out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
0390 << num_directions * batch_size * fAttrHiddenSize
0391 << " + " << direction * batch_size * fAttrHiddenSize << ";\n";
0392 } else {
0393 out << SP << SP << SP << "size_t previous_offset = (seq - 1) * "
0394 << num_directions * batch_size * fAttrHiddenSize
0395 << " + " << direction * batch_size * fAttrHiddenSize << ";\n";
0396 }
0397 if (fType == "float") {
0398 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
0399 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR
0400 << " + r_offset, &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
0401 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_hidden_state + offset, &"
0402 << OpName << "_n);\n";
0403 }
0404 out << SP << SP << "}\n";
0405
0406
0407 if (fAttrClip > .0) {
0408 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0409 if (fType == "float") {
0410 out << SP << SP << SP << "float x = (" << OpName << "_hidden_state[i] > " << -fAttrClip
0411 << ") ? " << OpName << "_hidden_state[i] : " << -fAttrClip << ";\n";
0412 }
0413 out << SP << SP << SP << OpName << "_hidden_state[i] = (x < " << fAttrClip
0414 << ") ? x : " << fAttrClip << ";\n";
0415 out << SP << SP << "}\n";
0416 }
0417
0418
0419 if (fAttrActivations[direction] == "Relu") {
0420 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0421 out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0422 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 0.;\n";
0423 out << SP << SP << "}\n";
0424 } else if (fAttrActivations[direction] == "Tanh") {
0425 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0426 if (fType == "float") {
0427 out << SP << SP << SP << "float ex = std::exp(-2 * " << OpName << "_hidden_state[i]);\n";
0428 }
0429 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = (1. - ex) / (1. + ex);\n";
0430 out << SP << SP << "}\n";
0431 } else if (fAttrActivations[direction] == "Sigmoid") {
0432 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0433 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 1. / (1. + std::exp(-" << OpName
0434 << "_hidden_state[i]));\n";
0435 out << SP << SP << "}\n";
0436 } else if (fAttrActivations[direction] == "Affine") {
0437 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0438 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0439 << " * " << OpName << "_hidden_state[i] + " << fAttrActivationBeta[direction] << ";\n";
0440 out << SP << SP << "}\n";
0441 } else if (fAttrActivations[direction] == "ScaledTanh") {
0442 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0443 if (fType == "float") {
0444 out << SP << SP << SP << "float ex = std::exp(-2 * " << fAttrActivationBeta[direction]
0445 << " * "<< OpName << "_hidden_state[i]);\n";
0446 }
0447 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0448 << " * (1. - ex) / (1. + ex);\n";
0449 out << SP << SP << "}\n";
0450 } else if (fAttrActivations[direction] == "HardSigmoid") {
0451 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0452 if (fType == "float") {
0453 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction] << " * "
0454 << OpName << "_hidden_state[i] + " << fAttrActivationBeta[direction] << ";\n";
0455 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
0456 }
0457 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = (b < 1.) ? b : 1.;\n";
0458 out << SP << SP << "}\n";
0459 } else if (fAttrActivations[direction] == "LeakyRelu") {
0460 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0461 out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0462 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0463 << " * " << OpName << "_hidden_state[i];\n";
0464 out << SP << SP << "}\n";
0465 } else if (fAttrActivations[direction] == "ThresholdRelu") {
0466 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0467 out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < "
0468 << fAttrActivationAlpha[direction] << ")\n";
0469 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = 0.;\n";
0470 out << SP << SP << "}";
0471 } else if (fAttrActivations[direction] == "Elu") {
0472 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0473 out << SP << SP << SP << "if (" << OpName << "_hidden_state[i] < 0.)\n";
0474 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << fAttrActivationAlpha[direction]
0475 << " * std::exp(" << OpName << "_hidden_state[i] - 1.);\n";
0476 out << SP << SP << "}\n";
0477 } else if (fAttrActivations[direction] == "Softsign") {
0478 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0479 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = " << OpName
0480 << "_hidden_state[i] / (1. + abs(" << OpName << "_hidden_state[i]));\n";
0481 out << SP << SP << "}\n";
0482 } else {
0483 out << SP << SP << "for (size_t i = offset; i < offset + size; i++) {\n";
0484 out << SP << SP << SP << SP << OpName << "_hidden_state[i] = log(1. + std::exp("
0485 << OpName << "_hidden_state[i]));\n";
0486 out << SP << SP << "}\n";
0487 out << SP << "}\n";
0488 }
0489 out << SP << "}\n";
0490 }
0491
0492
0493 if (!fNSequence_lens.empty()) {
0494 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0495 out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0496 out << SP << SP << SP << "if (seq >= tensor_" << fNSequence_lens << "[batch]) {\n";
0497 out << SP << SP << SP << SP << "for (size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
0498 if (num_directions == 1) {
0499 out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0500 << num_directions * batch_size * fAttrHiddenSize << " + batch * "
0501 << fAttrHiddenSize << " + h] = 0.;\n";
0502 } else {
0503 out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0504 << num_directions * batch_size * fAttrHiddenSize << " + batch * "
0505 << fAttrHiddenSize << " + h] = 0.;\n";
0506 out << SP << SP << SP << SP << SP << OpName << "_hidden_state[seq * "
0507 << num_directions * batch_size * fAttrHiddenSize << " + " << batch_size * fAttrHiddenSize
0508 << " + batch * " << fAttrHiddenSize << " + h] = 0.;\n";
0509 }
0510 out << SP << SP << SP << SP << "}\n";
0511 out << SP << SP << SP << "}\n";
0512 out << SP << SP << "}\n";
0513 out << SP << "}\n";
0514 }
0515
0516
0517 if (fAttrLayout == 0) {
0518 if (!fNY_h.empty()) {
0519 if (fNSequence_lens.empty()) {
0520 size_t yh_size = batch_size * fAttrHiddenSize;
0521 if (fAttrDirection == "backward") {
0522 out << SP << "std::copy(" << OpName << "_hidden_state, " << OpName << "_hidden_state + "
0523 << yh_size << ", tensor_" << fNY_h << ");\n";
0524 } else {
0525 size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
0526 out << SP << "std::copy(" << OpName << "_hidden_state + " << offset << ", " << OpName
0527 << "_hidden_state + " << offset << " + " << yh_size << ", tensor_" << fNY_h << ");\n";
0528 }
0529 if (num_directions == 2) {
0530 out << SP << "std::copy(" << OpName << "_hidden_state + " << yh_size << ", " << OpName
0531 << "_hidden_state + " << 2 * yh_size << ", tensor_" << fNY_h << " + " << yh_size << ");\n";
0532 }
0533 } else {
0534 if (fAttrDirection == "backward") {
0535 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0536 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
0537 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0538 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + offset);\n";
0539 out << SP << "}\n";
0540 } else {
0541 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0542 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
0543 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0544 << " + batch * " << fAttrHiddenSize << ";\n";
0545 out << SP << SP << "size_t yh_offset = batch * " << fAttrHiddenSize << ";\n";
0546 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0547 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0548 out << SP << "}\n";
0549 }
0550 if (num_directions == 2) {
0551 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0552 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize
0553 << " + batch * " << fAttrHiddenSize << ";\n";
0554 out << SP << SP << "size_t yh_offset = " << batch_size * fAttrHiddenSize
0555 << " + batch * " << fAttrHiddenSize << ";\n";
0556 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0557 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0558 out << SP << "}\n";
0559 }
0560 }
0561 }
0562 } else {
0563 if (!fNY.empty()) {
0564 for (size_t direction = 0; direction < num_directions; direction++) {
0565 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
0566 out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0567 out << SP << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0568 << " + " << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize << ";\n";
0569 out << SP << SP << SP << "size_t y_offset = batch * " << seq_length * num_directions * fAttrHiddenSize
0570 << " + seq * " << num_directions * fAttrHiddenSize << " + " << direction * fAttrHiddenSize << ";\n";
0571 out << SP << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0572 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY << " + y_offset);\n";
0573 out << SP << SP << "}\n";
0574 out << SP << "}\n";
0575 }
0576 }
0577 if (!fNY_h.empty()) {
0578 if (fAttrDirection == "backward") {
0579 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0580 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
0581 out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
0582 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0583 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0584 out << SP << "}\n";
0585 } else {
0586 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0587 if (fNSequence_lens.empty()) {
0588 out << SP << SP << "size_t seq = " << seq_length - 1 << ";\n";
0589 } else {
0590 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
0591 }
0592 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
0593 << " + batch * " << fAttrHiddenSize << ";\n";
0594 out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
0595 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0596 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0597 out << SP << "}\n";
0598 }
0599 if (num_directions == 2) {
0600 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
0601 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize << " + batch * "
0602 << fAttrHiddenSize << ";\n";
0603 out << SP << SP << "size_t yh_offset = batch * " << num_directions * fAttrHiddenSize << " + "
0604 << fAttrHiddenSize << ";\n";
0605 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
0606 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + yh_offset);\n";
0607 out << SP << "}\n";
0608 }
0609 }
0610 }
0611
0612 return out.str();
0613 }
0614
0615 }
0616 }
0617 }
0618
0619 #endif