Warning, file /include/root/TMVA/ROperator_Conv.hxx was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #ifndef TMVA_SOFIE_ROPERATOR_CONV
0002 #define TMVA_SOFIE_ROPERATOR_CONV
0003
0004 #include "TMVA/SOFIE_common.hxx"
0005 #include "TMVA/ROperator.hxx"
0006 #include "TMVA/RModel.hxx"
0007
0008 #include <memory>
0009 #include <sstream>
0010 #include <algorithm>
0011 #include <stdexcept>
0012 #include <vector>
0013 #include <cassert>
0014
0015 namespace TMVA {
0016 namespace Experimental {
0017 namespace SOFIE {
0018
0019 template<typename T>
0020 class ROperator_Conv final : public ROperator
0021 {
0022 private:
0023 std::string fAttrAutopad;
0024 std::vector<size_t> fAttrDilations;
0025 size_t fAttrGroup;
0026 std::vector<size_t> fAttrKernelShape;
0027 std::vector<size_t> fAttrPads;
0028 std::vector<size_t> fAttrStrides;
0029
0030 std::string fNX;
0031 std::string fNW;
0032 std::string fNB;
0033 std::string fNB2;
0034 std::string fNY;
0035
0036 std::string convK;
0037 std::string imcol;
0038
0039 std::vector<Dim> fShapeX;
0040 std::vector<size_t> fShapeW;
0041 std::vector<size_t> fShapeB;
0042 std::vector<Dim> fShapeY;
0043
0044 std::string fType;
0045
0046 size_t fDim;
0047
0048
0049 public:
0050
0051 ROperator_Conv() {}
0052
0053 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
0054 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
0055 std::vector<size_t> strides, std::string nameX, std::string nameW,
0056 std::string nameB, std::string nameY):
0057 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
0058 fAttrPads(pads), fAttrStrides(strides),
0059 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)),
0060 fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY))
0061 {
0062 if(std::is_same<T, float>::value) {
0063 fType = "float";
0064 } else {
0065 throw
0066 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
0067 }
0068 fInputTensorNames = { fNX, fNB };
0069 fOutputTensorNames = { fNY };
0070 }
0071
0072 ROperator_Conv(std::string autopad, std::vector<size_t> dilations,
0073 size_t group, std::vector<size_t> kernelShape, std::vector<size_t> pads,
0074 std::vector<size_t> strides, std::string nameX, std::string nameW,
0075 std::string nameY):
0076 fAttrAutopad(autopad), fAttrDilations(dilations), fAttrGroup(group), fAttrKernelShape(kernelShape),
0077 fAttrPads(pads), fAttrStrides(strides),
0078 fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY))
0079 {
0080 if(std::is_same<T, float>::value) {
0081 fType = "float";
0082 } else {
0083 throw
0084 std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator");
0085 }
0086 fInputTensorNames = { fNX };
0087 fOutputTensorNames = { fNY };
0088 }
0089
0090 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
0091 ETensorType out = input[0];
0092 return {out};
0093 }
0094
0095
0096 std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> & weight) {
0097
0098
0099
0100 if (input.size() -2 != fDim) {
0101 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid input ");
0102 }
0103 if (weight.size() -2 != fDim) {
0104 throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid weights ");
0105 }
0106 if (fAttrGroup == 0 && input[1].isParam)
0107 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without group attr");
0108 if (fAttrKernelShape.empty()) {
0109 if (input[2].isParam || (fDim > 1 && input[3].isParam) || (fDim > 2 && input[4].isParam))
0110 throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without kernel attr");
0111 }
0112
0113 if (fAttrGroup == 0) {
0114 fAttrGroup = input[1].dim / weight[1];
0115 }
0116
0117
0118 size_t k1 = ((fAttrKernelShape.empty())? weight[2] : fAttrKernelShape[0]);
0119 size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? weight[3] : fAttrKernelShape[1]) : 1;
0120 size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? weight[4] : fAttrKernelShape[2]) : 1;
0121
0122
0123 size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1;
0124 size_t i2 = (fDim > 2) ? 4 : 3;
0125 size_t i3 = 5;
0126
0127 if (fAttrDilations.empty()) {
0128 fAttrDilations = {1, 1, 1};
0129 }
0130 fAttrDilations.resize(3);
0131 if (fDim < 3) {
0132 fAttrDilations.resize(3, 1);
0133 }
0134
0135 fAttrKernelShape = {k1 + (fAttrDilations[0] - 1) * (k1 - 1),
0136 k2 + (fAttrDilations[1] - 1) * (k2 - 1),
0137 k3 + (fAttrDilations[2] - 1) * (k3 - 1)};
0138
0139 if (fAttrAutopad == "NOTSET") {
0140 if (fAttrPads.empty()) {
0141 fAttrPads = {1, 1, 1, 1, 1, 1};
0142 }
0143 } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") {
0144 if (fDim == 1)
0145 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[0] / 2};
0146 else if (fDim == 2)
0147 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2};
0148 else if (fDim == 3)
0149 fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2,
0150 fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2};
0151
0152
0153 if (fAttrKernelShape[0] % 2 == 1) {
0154 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++;
0155 }
0156 if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) {
0157 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++;
0158 }
0159 if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) {
0160 (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++;
0161 }
0162 } else if (fAttrAutopad != "VALID") {
0163 throw
0164 std::runtime_error("TMVA SOFIE Conv Op invalid fAutopad");
0165 }
0166
0167 if (fDim < 3) fAttrPads.resize(6, 0);
0168
0169 if (fAttrStrides.empty()) {
0170 fAttrStrides = {1, 1, 1};
0171 }
0172 if (fDim < 3)
0173 fAttrStrides.resize(3, 1);
0174
0175
0176 Dim input1 = input[2];
0177 Dim input2 = (fDim > 1) ? input[3] : Dim{1};
0178 Dim input3 = (fDim > 2) ? input[4] : Dim{1};
0179
0180 size_t pad1 = fAttrPads[0] + fAttrPads[i1];
0181
0182
0183
0184 auto computeOutput = [&](Dim inputDim, size_t kernel, size_t pad, size_t stride) {
0185 if (!inputDim.isParam) {
0186 size_t outSize = (inputDim.dim + pad - kernel) / stride + 1;
0187 return Dim{outSize};
0188 } else {
0189 if (stride == 1){
0190 if ((pad - kernel + 1) == 0 )
0191
0192 return inputDim;
0193 else {
0194 int64_t v = pad - kernel + 1;
0195 std::string outStr = "(" + inputDim.param + "+" + std::to_string(v) + ")";
0196 return Dim{ outStr, static_cast<size_t>(-1)};
0197 }
0198 } else {
0199 int64_t v = pad - kernel;
0200 std::string outStr = "((" + inputDim.param + "+" + std::to_string(v) + ")/"
0201 + std::to_string(stride) + "1)";
0202 return Dim{ outStr, static_cast<size_t>(-1)};
0203 }
0204 }
0205 std::runtime_error("TMVA SOFIE Conv Op - invalid values");
0206 return Dim{};
0207 };
0208
0209 Dim output1 = computeOutput(input1, fAttrKernelShape[0], pad1, fAttrStrides[0]);
0210
0211 Dim batch_size = input[0];
0212 Dim output_channels = Dim{weight[0]};
0213
0214 std::vector<Dim> ret({ batch_size, output_channels, output1 });
0215
0216 if (fDim == 1)
0217 return ret;
0218
0219 size_t pad2 = fAttrPads[1] + fAttrPads[i2];
0220 Dim output2 = computeOutput(input2, fAttrKernelShape[1], pad2, fAttrStrides[1]);
0221
0222
0223 ret.push_back(output2);
0224 if (fDim == 2)
0225 return ret;
0226
0227 size_t pad3 = fAttrPads[2] + fAttrPads[i3];
0228 Dim output3 = computeOutput(input3, fAttrKernelShape[2], pad3, fAttrStrides[2]);
0229
0230
0231 ret.push_back(output3);
0232 return ret;
0233 }
0234
0235 void Initialize(RModel& model) override {
0236 fUseSession = model.UseSession();
0237 if (!model.CheckIfTensorAlreadyExist(fNX)) {
0238 throw
0239 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model");
0240 }
0241 fShapeX = model.GetDimTensorShape(fNX);
0242 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
0243 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
0244 throw
0245 std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions");
0246 }
0247 fDim = fShapeX.size() - 2;
0248 if (!model.CheckIfTensorAlreadyExist(fNW)) {
0249 throw
0250 std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
0251 }
0252 fShapeW = model.GetTensorShape(fNW);
0253 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
0254 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
0255 throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions");
0256 }
0257 fShapeY = DoShapeInference(fShapeX, fShapeW);
0258 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
0259 if (fNB != "") {
0260 if (!model.CheckIfTensorAlreadyExist(fNB)) {
0261 throw
0262 std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
0263 }
0264 fShapeB = model.GetTensorShape(fNB);
0265 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
0266 auto shapeDimB = model.GetDimTensorShape(fNB);
0267 bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape);
0268 if (broadcast_needed) {
0269 auto original_data = model.GetInitializedTensorData(fNB);
0270
0271 if (fShapeB.size() < 1)
0272 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape");
0273
0274
0275 if (!(shapeDimB[0] == fShapeY[1]))
0276 throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " +
0277 ConvertShapeToString(fShapeB));
0278 if (fType != "float")
0279 throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
0280
0281 if (!fUseSession) {
0282 std::vector<size_t> shape(fDim + 1, 1);
0283 shape[0] = fShapeB[0];
0284 auto intTargetShape = ConvertShapeToInt(targetShape);
0285 std::shared_ptr<void> new_data_ptr(
0286 UTILITY::UnidirectionalBroadcast<float>(static_cast<float *>(original_data.get()), shape, intTargetShape),
0287 std::default_delete<float[]>());
0288 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr);
0289 fShapeB = model.GetTensorShape(fNB);
0290 fNB2 = fNB;
0291 }
0292 else {
0293
0294
0295 fNB2 = fNB + "bcast";
0296 model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape);
0297 }
0298 }
0299 }
0300
0301 std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());
0302 auto outputChannelSize = ConvertDimShapeToLength(outputDims);
0303 size_t kernelSize = fAttrKernelShape[0];
0304 for (size_t i = 1; i < fDim; i++) {
0305 kernelSize *= fAttrKernelShape[i];
0306 }
0307
0308 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
0309 std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}};
0310 model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );
0311 model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );
0312 convK = fNX +"_f";
0313 imcol = fNX +"_xcol";
0314 fOutputTensorNames.emplace_back(convK);
0315 fOutputTensorNames.emplace_back(imcol);
0316 fInputTensorNames.emplace_back(convK);
0317 fInputTensorNames.emplace_back(imcol);
0318
0319 if (model.Verbose()) {
0320 std::cout << "Conv - " << fDim << " " << fNX << " : " << ConvertShapeToString(fShapeX)
0321 << " --> " << fNY << " : " << ConvertShapeToString(fShapeY) << std::endl;
0322 }
0323 }
0324
0325 std::string GenerateInitCode() override {
0326 std::stringstream out;
0327
0328 if (!fNB2.empty()) {
0329
0330 std::vector<size_t> shape(fDim + 1, 1);
0331 shape[0] = fShapeB[0];
0332 std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
0333 out << SP << "{\n";
0334 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
0335 << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
0336 out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
0337 out << SP << SP << "delete[] data;\n";
0338 out << SP << "}\n";
0339 }
0340 return out.str();
0341 }
0342
0343 std::string Generate(std::string OpName) override {
0344 OpName = "op_" + OpName;
0345
0346 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
0347 throw
0348 std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
0349 }
0350
0351 std::stringstream out;
0352 auto bsize = fShapeX[0];
0353 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1;
0354 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1;
0355 size_t kWidth = fShapeW[fDim+1];
0356 auto iDepth = (fDim > 2) ? fShapeX[2] : Dim{1};
0357 auto iHeight = (fDim > 1) ? fShapeX[fDim] : Dim{1};
0358 auto iWidth = fShapeX[fDim+1];
0359 auto oDepth = (fDim > 2) ? fShapeY[2] : Dim{1};
0360 auto oHeight = (fDim > 1) ? fShapeY[fDim] : Dim{1};
0361 auto oWidth = fShapeY[fDim+1];
0362
0363 auto outputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{oDepth, oHeight, oWidth});
0364 auto outputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeY[1] , oDepth, oHeight, oWidth});
0365
0366 auto inputChannelStride = ConvertDimShapeToLength(std::vector<Dim>{iDepth, iHeight, iWidth});
0367 auto inputBatchStride = ConvertDimShapeToLength(std::vector<Dim>{fShapeX[1] , iDepth, iHeight, iWidth});
0368
0369 out << "\n//---- operator Conv " << OpName << "\n";
0370
0371
0372
0373
0374
0375 size_t id = (fDim > 2) ? fDim-3 : 2;
0376 size_t ih = (fDim > 1) ? fDim-2 : 1;
0377 size_t iw = fDim-1;
0378
0379 size_t wstrideDil = fAttrDilations[iw];
0380 size_t hstride = kWidth;
0381 size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw];
0382 size_t dstride = kHeight * kWidth;
0383 size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
0384 size_t icstride = kHeight * kWidth * kDepth;
0385 size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw];
0386 size_t ocstride = fShapeW[1] * icstride;
0387 size_t ocstrideDil = fShapeW[1] * icstrideDil;
0388
0389 out << SP << "for (std::size_t oc = 0; oc < " << fShapeW[0] << "; oc++) {\n";
0390 out << SP << SP << "for (std::size_t ic = 0; ic < " << fShapeW[1] << "; ic++) {\n";
0391 if (fDim > 2)
0392 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
0393 if (fDim > 1)
0394 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
0395 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
0396
0397 out << SP << SP << SP << SP << SP << "tensor_" <<fNX << "_f[oc * "
0398 << ocstrideDil << " + ic * " << icstrideDil;
0399 if (fDim > 2) out << " + kd * " << dstrideDil;
0400 if (fDim > 1) out << " + kh * " << hstrideDil;
0401 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[oc * " << ocstride << " + ic * " << icstride;
0402 if (fDim > 2) out << " + kd * " << dstride;
0403 if (fDim > 1) out << " + kh * " << hstride;
0404 out << " + kw ];\n";
0405
0406 out << SP << SP << SP << SP << "}\n";
0407 if (fDim > 1) out << SP << SP << SP << "}\n";
0408 if (fDim > 2) out << SP << SP << SP << "}\n";
0409 out << SP << SP << "}\n";
0410 out << SP << "}\n";
0411
0412
0413 out << SP << "char " << OpName << "_transA = 'N';\n";
0414 out << SP << "char " << OpName << "_transB = 'N';\n";
0415 out << SP << "int " << OpName << "_m = " << outputChannelStride << ";\n";
0416 assert(fShapeY[1] == fShapeW[0]);
0417
0418 out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n";
0419 out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n";
0420 out << SP << "float " << OpName << "_alpha = 1.0;\n";
0421 out << SP << "float " << OpName << "_beta = 0.0;\n";
0422
0423
0424
0425 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
0426
0427
0428
0429
0430
0431
0432
0433
0434 if (fDim ==1) {
0435 if (fAttrPads[0] != fAttrPads[1] ) {
0436 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
0437 << std::endl;
0438 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
0439 }
0440 fAttrPads[1] = 0;
0441 fAttrStrides[1] = 1;
0442 }
0443 if (fDim == 2) {
0444 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
0445 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
0446 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
0447 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
0448 }
0449 }
0450 if (fDim == 3) {
0451 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
0452 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl;
0453 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
0454 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
0455 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
0456 }
0457 }
0458 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << ";\n";
0459
0460 if (fAttrGroup == 1) {
0461 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << ";\n";
0462
0463
0464 if (fDim < 3) {
0465 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
0466 << " + x_offset,"
0467
0468
0469
0470 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
0471 if (fDim == 1)
0472 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
0473 << fAttrDilations[0];
0474 else
0475 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
0476 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
0477 << fAttrDilations[1];
0478 out << "," << "tensor_" <<fNX << "_xcol);\n\n ";
0479 } else {
0480
0481 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
0482 << " + x_offset,"
0483
0484
0485
0486 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << ","
0487 << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << ","
0488 << fAttrPads[0] << "," << fAttrPads[1] << "," << fAttrPads[2] << ","
0489 << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
0490 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ","
0491 << "tensor_" << fNX << "_xcol);\n\n ";
0492 }
0493
0494 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
0495 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName
0496 << "_m,\n";
0497 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
0498 << " + out_offset, &" << OpName << "_m);\n";
0499 } else {
0500
0501
0502
0503
0504 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
0505 out << SP << SP << "size_t x_offset = n * " << inputBatchStride << " + g * "
0506 << fShapeW[1] << " * " << inputChannelStride << ";\n ";
0507 out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g * "
0508 << fShapeW[0] << " * (" << outputChannelStride << ") / " << fAttrGroup << ";\n ";
0509
0510 if (fDim < 3) {
0511 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col<float>(tensor_" << fNX
0512 << " + x_offset,"
0513
0514
0515
0516 << fShapeW[1] << "," << iHeight << "," << iWidth << ",";
0517 if (fDim == 1)
0518 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
0519 << fAttrDilations[0];
0520 else
0521 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
0522 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
0523 << fAttrDilations[1];
0524 out << ", tensor_" << fNX << "_xcol);\n\n ";
0525 } else {
0526
0527 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
0528 << " + x_offset,"
0529
0530
0531
0532 << fShapeW[1] << "," << iDepth << "," << iHeight << "," << iWidth << "," << fAttrKernelShape[0] << ","
0533 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1]
0534 << "," << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2]
0535 << "," << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << ",tensor_" << fNX
0536 << "_xcol);\n\n ";
0537 }
0538
0539
0540
0541 out << SP << SP << SP << OpName << "_n = " << fShapeW[0] / fAttrGroup << ";\n";
0542
0543 out << SP << SP << SP << "size_t offset_f = g * "
0544 << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] / fAttrGroup
0545 << ";\n";
0546 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
0547 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
0548 << "_m,\n";
0549 out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
0550 << " + out_offset"
0551 << ", &" << OpName << "_m);\n";
0552
0553 out << SP << SP << "}\n";
0554 }
0555
0556 if (fNB2 != "") {
0557 out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";
0558 out << SP << "float " << OpName << "_gamma = 1.0;\n";
0559 out << SP << "int " << OpName << "_incx = 1;\n";
0560 out << SP << "int " << OpName << "_incy = 1;\n";
0561
0562 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
0563 << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
0564
0565 }
0566 out << SP << "}\n";
0567
0568 return out.str();
0569 }
0570
0571
0572
0573 std::vector<std::string> GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; }
0574 };
0575
0576 }
0577 }
0578 }
0579
0580 #endif