File indexing completed on 2025-09-16 09:08:51
0001 #ifndef TMVA_SOFIE_ROPERATOR_CONV
0002 #define TMVA_SOFIE_ROPERATOR_CONV
0003
0004 #include "TMVA/SOFIE_common.hxx"
0005 #include "TMVA/ROperator.hxx"
0006 #include "TMVA/RModel.hxx"
0007
0008 #include <memory>
0009 #include <sstream>
0010 #include <algorithm>
0011 #include <stdexcept>
0012 #include <vector>
0013 #include <cassert>
0014
0015 namespace TMVA {
0016 namespace Experimental {
0017 namespace SOFIE {
0018
0019 template<typename T>
0020 class ROperator_Conv final : public ROperator
0021 {
0022 private:
0023 std::string fAttrAutopad;
0024 std::vector<size_t> fAttrDilations;
0025 size_t fAttrGroup;
0026 std::vector<size_t> fAttrKernelShape;
0027 std::vector<size_t> fAttrPads;
0028 std::vector<size_t> fAttrStrides;
0029
0030 std::string fNX;
0031 std::string fNW;
0032 std::string fNB;
0033 std::string fNB2;
0034 std::string fNY;
0035
0036 std::string convK;
0037 std::string imcol;
0038
0039 std::vector<size_t> fShapeX;
0040 std::vector<size_t> fShapeW;
0041 std::vector<size_t> fShapeB;
0042 std::vector<size_t> fShapeY;
0043
0044 std::string fType;
0045
0046 size_t fDim;
0047
0048
0049 public:
0050
0051 ROperator_Conv() {}
0052
0053 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
0054 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
0055 std::vector<size_t> strides, std::string nameX, std::string nameW,
0056 std::string nameB, std::string nameY):
0057 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
0058 fAttrPads(pads), fAttrStrides(strides),
0059 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
0060 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
0061 {
0062 if(std::is_same<T, float>::value) {
0063 fType = "float";
0064 } else {
0065 throw
0066 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
0067 }
0068 fInputTensorNames = { fNX, fNB };
0069 fOutputTensorNames = { fNY };
0070 }
0071
0072 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
0073 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
0074 std::vector<size_t> strides, std::string nameX, std::string nameW,
0075 std::string nameY):
0076 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
0077 fAttrPads(pads), fAttrStrides(strides),
0078 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
0079 {
0080 if(std::is_same<T, float>::value) {
0081 fType = "float";
0082 } else {
0083 throw
0084 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
0085 }
0086 fInputTensorNames = { fNX };
0087 fOutputTensorNames = { fNY };
0088 }
0089
0090 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
0091 ETensorType out = input[0];
0092 return {out};
0093 }
0094
0095
0096 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
0097
0098
0099
0100 if (input.size() > 3 ) {
0101 throw
0102 std::runtime_error("TMVA SOFIE Conv Op Shape inference need 2 or 3 input tensors");
0103 }
0104 for(size_t i = 0; i < input.size(); i++) {
0105 if (input[i].size() -2 != fDim) {
0106 throw
0107 std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid inputs ");
0108 }
0109 }
0110
0111 if (fAttrGroup == 0) {
0112 fAttrGroup = input[0][1] / input[1][1];
0113 }
0114
0115
0116 size_t k1 = ((fAttrKernelShape.empty())? input[1][2] : fAttrKernelShape[0]);
0117 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? input[1][3] : fAttrKernelShape[1]) : 1;
0118 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? input[1][4] : fAttrKernelShape[2]) : 1;
0119
0120
0121 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
0122 size_t i2 = (fDim > 2) ? 4 : 3;
0123 size_t i3 = 5;
0124
0125 if (fAttrDilations.empty()) {
0126 fAttrDilations = {1, 1, 1};
0127 }
0128 fAttrDilations.resize(3);
0129 if (fDim < 3) {
0130 fAttrDilations.resize(3, 1);
0131 }
0132
0133 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
0134 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
0135 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
0136
0137 if (fAttrAutopad == "NOTSET") {
0138 if (fAttrPads.empty()) {
0139 fAttrPads = {1, 1, 1, 1, 1, 1};
0140 }
0141 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
0142 if (fDim == 1)
0143 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[0] / 2};
0144 else if (fDim == 2)
0145 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2};
0146 else if (fDim == 3)
0147 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2,
0148 fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2};
0149
0150
0151 if (fAttrKernelShape[0] % 2 == 1) {
0152 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++;
0153 }
0154 if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) {
0155 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++;
0156 }
0157 if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) {
0158 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++;
0159 }
0160 } else if (fAttrAutopad != "VALID") {
0161 throw
0162 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
0163 }
0164
0165 if (fDim < 3) fAttrPads.resize(6, 0);
0166
0167 if (fAttrStrides.empty()) {
0168 fAttrStrides = {1, 1, 1};
0169 }
0170 if (fDim < 3)
0171 fAttrStrides.resize(3, 1);
0172
0173
0174 size_t input1 = input[0][2];
0175 size_t input2 = (fDim > 1) ? input[0][3] : 1;
0176 size_t input3 = (fDim > 2) ? input[0][4] : 1;
0177
0178 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
0179 size_t output1 = (input1 + pad1 - fAttrKernelShape[0]) / fAttrStrides[0] + 1;
0180
0181 size_t batch_size = input[0][0];
0182 size_t output_channels = input[1][0];
0183
0184 std::vector<std::vector<size_t>> ret({{ batch_size, output_channels, output1 }});
0185
0186 if (fDim == 1)
0187 return ret;
0188
0189 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
0190 size_t output2 = (input2 + pad2 - fAttrKernelShape[1]) / fAttrStrides[1] + 1;
0191
0192 ret[0].push_back(output2);
0193 if (fDim == 2)
0194 return ret;
0195
0196 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
0197 size_t output3 = (input3 + pad3 - fAttrKernelShape[2] ) / fAttrStrides[2] + 1;
0198
0199
0200 ret[0].push_back(output3);
0201 return ret;
0202 }
0203
0204 void Initialize(RModel& model) override {
0205 fUseSession = model.UseSession();
0206 if (!model.CheckIfTensorAlreadyExist(fNX)) {
0207 throw
0208 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
0209 }
0210 fShapeX = model.GetTensorShape(fNX);
0211 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
0212 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
0213 throw
0214 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
0215 }
0216 fDim = fShapeX.size() - 2;
0217 if (!model.CheckIfTensorAlreadyExist(fNW)) {
0218 throw
0219 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
0220 }
0221 fShapeW = model.GetTensorShape(fNW);
0222 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
0223 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
0224 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
0225 }
0226 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
0227 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
0228 if (fNB != "") {
0229 if (!model.CheckIfTensorAlreadyExist(fNB)) {
0230 throw
0231 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
0232 }
0233 fShapeB = model.GetTensorShape(fNB);
0234 std::vector<size_t> targetShape(fShapeY.begin() + 1, fShapeY.end());
0235 bool broadcast_needed = !UTILITY::AreSameShape(fShapeB, targetShape);
0236 if (broadcast_needed) {
0237 auto original_data = model.GetInitializedTensorData(fNB);
0238
0239 if (fShapeB.size() < 1)
0240 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
0241
0242
0243 if (fShapeB[0] != fShapeY[1])
0244 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
0245 ConvertShapeToString(fShapeB));
0246 if (fType != "float")
0247 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
0248
0249 if (!fUseSession) {
0250 std::vector<size_t> shape(fDim + 1, 1);
0251 shape[0] = fShapeB[0];
0252 std::shared_ptr<void> new_data_ptr(
0253 UTILITY::UnidirectionalBroadcast<float>(static_cast<float *>(original_data.get()), shape, targetShape),
0254 std::default_delete<float[]>());
0255 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), targetShape, new_data_ptr);
0256 fShapeB = model.GetTensorShape(fNB);
0257 fNB2 = fNB;
0258 }
0259 else {
0260
0261
0262 fNB2 = fNB + "bcast";
0263 model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape);
0264 }
0265 }
0266 }
0267
0268 size_t outputChannelSize = fShapeY[2];
0269 size_t kernelSize = fAttrKernelShape[0];
0270 for (size_t i = 1; i < fDim; i++) {
0271 outputChannelSize *= fShapeY[2 + i];
0272 kernelSize *= fAttrKernelShape[i];
0273 }
0274
0275 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
0276 std::vector<size_t> shape2 = {fShapeW[1], kernelSize, outputChannelSize};
0277 model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );
0278 model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );
0279 convK = fNX +"_f";
0280 imcol = fNX +"_xcol";
0281 fOutputTensorNames.emplace_back(convK);
0282 fOutputTensorNames.emplace_back(imcol);
0283 }
0284
0285 std::string GenerateInitCode() override {
0286 std::stringstream out;
0287
0288 if (!fNB2.empty()) {
0289
0290 std::vector<size_t> shape(fDim + 1, 1);
0291 shape[0] = fShapeB[0];
0292 std::vector<size_t> targetShape(fShapeY.begin() + 1, fShapeY.end());
0293 out << SP << "{\n";
0294 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
0295 << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
0296 out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
0297 out << SP << SP << "delete[] data;\n";
0298 out << SP << "}\n";
0299 }
0300 return out.str();
0301 }
0302
0303 std::string Generate(std::string OpName) override {
0304 OpName = "op_" + OpName;
0305
0306 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
0307 throw
0308 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
0309 }
0310
0311 std::stringstream out;
0312 size_t bsize = fShapeX[0];
0313 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1;
0314 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1;
0315 size_t kWidth = fShapeW[fDim+1];
0316 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1;
0317 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1;
0318 size_t iWidth = fShapeX[fDim+1];
0319 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1;
0320 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1;
0321 size_t oWidth = fShapeY[fDim+1];
0322
0323 out << "\n//---- operator Conv " << OpName << "\n";
0324
0325
0326
0327
0328
0329 size_t id = (fDim > 2) ? fDim-3 : 2;
0330 size_t ih = (fDim > 1) ? fDim-2 : 1;
0331 size_t iw = fDim-1;
0332
0333 size_t wstrideDil = fAttrDilations[iw];
0334 size_t hstride = kWidth;
0335 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw];
0336 size_t dstride = kHeight * kWidth;
0337 size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
0338 size_t icstride = kHeight * kWidth * kDepth;
0339 size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
0340 size_t ocstride = fShapeW[1] * icstride;
0341 size_t ocstrideDil = fShapeW[1] * icstrideDil;
0342
0343 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
0344 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
0345 if (fDim > 2)
0346 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
0347 if (fDim > 1)
0348 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
0349 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
0350
0351 out << SP << SP << SP << SP << SP << "tensor_" <<fNX << "_f[oc * "
0352 << ocstrideDil << " + ic * " << icstrideDil;
0353 if (fDim > 2) out << " + kd * " << dstrideDil;
0354 if (fDim > 1) out << " + kh * " << hstrideDil;
0355 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
0356 if (fDim > 2) out << " + kd * " << dstride;
0357 if (fDim > 1) out << " + kh * " << hstride;
0358 out << " + kw ];\n";
0359
0360 out << SP << SP << SP << SP << "}\n";
0361 if (fDim > 1) out << SP << SP << SP << "}\n";
0362 if (fDim > 2) out << SP << SP << SP << "}\n";
0363 out << SP << SP << "}\n";
0364 out << SP << "}\n";
0365
0366
0367 out << SP << "char " << OpName << "_transA = 'N';\n";
0368 out << SP << "char " << OpName << "_transB = 'N';\n";
0369 out << SP << "int " << OpName << "_m = " << oHeight * oWidth * oDepth << ";\n";
0370 assert(fShapeY[1] == fShapeW[0]);
0371 assert(fShapeW[1] == fShapeX[1] / fAttrGroup);
0372 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n";
0373 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
0374 out << SP << "float " << OpName << "_alpha = 1.0;\n";
0375 out << SP << "float " << OpName << "_beta = 0.0;\n";
0376
0377
0378
0379 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
0380
0381
0382
0383
0384
0385
0386
0387
0388 if (fDim ==1) {
0389 if (fAttrPads[0] != fAttrPads[1] ) {
0390 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
0391 << std::endl;
0392 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
0393 }
0394 fAttrPads[1] = 0;
0395 fAttrStrides[1] = 1;
0396 }
0397 if (fDim == 2) {
0398 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
0399 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
0400 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
0401 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
0402 }
0403 }
0404 if (fDim == 3) {
0405 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
0406 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
0407 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
0408 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
0409 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
0410 }
0411 }
0412 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
0413
0414 if (fAttrGroup == 1) {
0415 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << ";\n";
0416
0417
0418 if (fDim < 3) {
0419 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
0420 << " + x_offset,"
0421
0422
0423
0424 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
0425 if (fDim == 1)
0426 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
0427 << fAttrDilations[0];
0428 else
0429 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
0430 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
0431 << fAttrDilations[1];
0432 out << "," << "tensor_" <<fNX << "_xcol);\n\n ";
0433 } else {
0434
0435 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
0436 << " + x_offset,"
0437
0438
0439
0440 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
0441 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
0442 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
0443 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
0444 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
0445 << "tensor_" << fNX << "_xcol);\n\n ";
0446 }
0447
0448 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
0449 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName
0450 << "_m,\n";
0451 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
0452 << " + out_offset, &" << OpName << "_m);\n";
0453 } else {
0454
0455
0456
0457
0458 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
0459 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << " + g * "
0460 << fShapeW[1] * iDepth * iHeight * iWidth << ";\n ";
0461 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << " + g * "
0462 << fShapeW[0] * oDepth * oHeight * oWidth / fAttrGroup << ";\n ";
0463
0464 if (fDim < 3) {
0465 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
0466 << " + x_offset,"
0467
0468
0469
0470 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
0471 if (fDim == 1)
0472 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
0473 << fAttrDilations[0];
0474 else
0475 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
0476 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
0477 << fAttrDilations[1];
0478 out << ", tensor_" << fNX << "_xcol);\n\n ";
0479 } else {
0480
0481 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
0482 << " + x_offset,"
0483
0484
0485
0486 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
0487 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
0488 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
0489 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
0490 << "_xcol);\n\n ";
0491 }
0492
0493
0494
0495 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
0496
0497 out << SP << SP << SP << "size_t offset_f = g * "
0498 << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] / fAttrGroup
0499 << ";\n";
0500 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
0501 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
0502 << "_m,\n";
0503 out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
0504 << " + out_offset"
0505 << ", &" << OpName << "_m);\n";
0506
0507 out << SP << SP << "}\n";
0508 }
0509
0510 if (fNB2 != "") {
0511 out << SP << "int " << OpName << "_size = " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
0512 out << SP << "float " << OpName << "_gamma = 1.0;\n";
0513 out << SP << "int " << OpName << "_incx = 1;\n";
0514 out << SP << "int " << OpName << "_incy = 1;\n";
0515
0516 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
0517 << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
0518
0519 }
0520 out << SP << "}\n";
0521
0522 return out.str();
0523 }
0524
0525
0526
0527 std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }
0528 };
0529
0530 }
0531 }
0532 }
0533
0534 #endif