Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:11:07

0001 #ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
0002 #define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
0003 
0004 #include "TMVA/RModel.hxx"
0005 #include "TMVA/SOFIE_common.hxx"
0006 
0007 #include <sstream>
0008 #include <string>
0009 
0010 namespace TMVA {
0011 namespace Experimental {
0012 namespace SOFIE {
0013 
0014 template <typename T>
0015 class ROperator_LayerNormalization : public ROperator {
0016 private:
0017    int fAttrAxis;
0018    float fAttrEpsilon;
0019    size_t fAttrStashType;
0020 
0021    std::string fNX;
0022    std::string fNScale;
0023    std::string fNB;
0024    std::string fNY;
0025    std::string fNMean;
0026    std::string fNInvStdDev;
0027 
0028    std::string fNCastedX;
0029    std::string fNNormalizedX;
0030    std::string fNBroadcastedB;
0031 
0032    std::vector<Dim> fShapeX;
0033    std::vector<Dim> fShapeScale;
0034    std::vector<size_t> fShapeB;  // shape of input Bias (B) is assumed to be fully defined
0035    std::vector<Dim> fShapeY;
0036    std::vector<Dim> fShapeMean;
0037    std::vector<Dim> fShapeInvStdDev;
0038 
0039    size_t fAxis; // axis in [0, size)
0040    size_t fSize; // Size of the input
0041    // size_t fAxisDim;
0042 
0043    std::vector<Dim> fNormalizedShape;
0044    std::vector<Dim> fAxesShape;
0045    // lengths in string format
0046    std::string fLength; // Length of the input
0047    std::string fNormalizedLength;
0048    std::string fAxesLength;
0049 
0050    std::string fType;
0051 
0052 public:
0053    ROperator_LayerNormalization() {}
0054 
0055    ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
0056                                 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
0057                                 const std::string &nameMean, const std::string &nameInvStdDev)
0058       : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
0059         fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
0060         fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
0061    {
0062    }
0063 
0064    std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
0065 
0066    std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
0067 
0068    void Initialize(RModel &model) override
0069    {
0070       if (!model.CheckIfTensorAlreadyExist(fNX)) {
0071          throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
0072       }
0073       bool isDynamic = model.IsDynamicTensor(fNX);
0074       fShapeX = model.GetDynamicTensorShape(fNX);
0075       fShapeY = fShapeX;
0076       model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
0077       // Type of the output
0078       fType = ConvertTypeToString(model.GetTensorType(fNX));
0079       // Size of the input
0080       fSize = fShapeX.size();
0081       // Axis in [0, size)
0082       fAxis = (fAttrAxis < 0) ? fSize + fAttrAxis : fAttrAxis;
0083       // Shape of fShapeX[0, ..., fAxis)
0084       fAxesShape = std::vector<Dim>(fShapeX.begin(), fShapeX.begin() + fAxis);
0085       // Length of the axes
0086       fAxesLength = ConvertDynamicShapeToLength(fAxesShape);
0087       // Shape of fShapeX[fAxis, ..., fSize)
0088       fNormalizedShape = std::vector<Dim>(fShapeX.begin() + fAxis, fShapeX.end());
0089       // Length of the normalized axis
0090       fNormalizedLength = ConvertDynamicShapeToLength(fNormalizedShape);
0091       // length of the input
0092       fLength = ConvertDynamicShapeToLength(fShapeX);
0093       // Type of mean and std
0094       ETensorType type = (fAttrStashType == 1) ? ETensorType::FLOAT : model.GetTensorType(fNX);
0095       // Mean
0096       if (fNMean.empty()) {
0097          fNMean = "Mean" + fNX;
0098          // cannot use initializer list with one element since it is ambiguous
0099          if (isDynamic)
0100             // add size_t(-1) to indicate that shape is an expression
0101             model.AddIntermediateTensor(fNMean, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
0102          else
0103             model.AddIntermediateTensor(fNMean, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
0104       }
0105       // Inverse Standard Deviation
0106       if (fNInvStdDev.empty()) {
0107          fNInvStdDev = "InvStdDev" + fNX;
0108          if (isDynamic)
0109             model.AddIntermediateTensor(fNInvStdDev, type, std::vector<Dim>(1,Dim{fAxesLength,std::size_t(-1)}));
0110          else
0111             model.AddIntermediateTensor(fNInvStdDev, type, std::vector<size_t>(1,std::stoi(fAxesLength)));
0112       }
0113       // Cast X to float
0114       if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
0115          fNCastedX = "Casted" + fNX;
0116          model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX);
0117          fNNormalizedX = "Normalized" + fNX;
0118          model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX);
0119       }
0120       // Broadcast the bias
0121       if (!fNB.empty()) {
0122          fShapeB = model.GetTensorShape(fNB);
0123          size_t lengthB = ConvertShapeToLength(fShapeB);
0124          if (isDynamic || lengthB < static_cast<size_t>(std::stoi(fLength))) {
0125             fNBroadcastedB = "Broadcasted" + fNB;
0126             model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX);
0127          }
0128       }
0129       model.AddNeededStdLib("cmath");
0130    }
0131 
0132    std::string GenerateInitCode() override
0133    {
0134       std::stringstream out;
0135       if (!fNBroadcastedB.empty()) {
0136          out << SP << "// Broadcasting the bias of LayerNormalization op\n";
0137          out << SP << "{\n";
0138          out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
0139          out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertDynamicShapeToString(fShapeX) << ");\n";
0140          out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
0141          out << SP << "delete[] data;\n";
0142          out << SP << "}\n";
0143       }
0144       return out.str();
0145    }
0146 
0147    std::string Generate(std::string OpName) override
0148    {
0149       OpName = "op_" + OpName;
0150       if (fShapeX.empty()) {
0151          throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + OpName +
0152                                   " called to generate without being initialized first.");
0153       }
0154       if (fShapeX.size() > 5) {
0155          throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
0156                                   "implemented for input tensor of size > 5.");
0157       }
0158 
0159       std::stringstream out;
0160 
0161       out << "//---- Layer Normalization  operator " << OpName << "\n";
0162 
0163       // Loop over all the normalized axes i.e. [axis, ..., size)
0164       out << SP << "std::vector<size_t> " << OpName << "_InputShape ({";
0165       for (size_t i = 0; i < fSize; i++) {
0166          out << fShapeX[i].GetVal();
0167          if (i + 1 < fSize) {
0168             out << ",";
0169          }
0170       }
0171       out << "});\n";
0172       std::string inputShape = OpName + "_InputShape";
0173 
0174       auto strides = UTILITY::ComputeStrideFromShape(fShapeX);
0175       std::string InputIndex = "axis_0 * " + strides[0].GetVal();
0176       for (size_t i = 1; i < fSize; i++) {
0177          InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal();
0178       }
0179 
0180       auto axesStrides = UTILITY::ComputeStrideFromShape(fAxesShape);
0181       std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal();
0182       for (size_t i = 1; i < fAxis; i++) {
0183          axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal();
0184       }
0185 
0186       auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape);
0187       std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal();
0188       for (size_t i = fAxis + 1; i < fSize; i++) {
0189          normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal();
0190       }
0191 
0192       if (!fNCastedX.empty()) {
0193          // Cast X to float
0194          out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
0195          out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
0196          out << "[i]);\n";
0197          out << SP << "}\n";
0198       }
0199 
0200       out << SP << "// Compute the mean\n";
0201       // Loop over the normalized dimensions
0202       for (size_t i = 0; i < fAxis; i++) {
0203          std::string iIdx = "axis_" + std::to_string(i);
0204          out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
0205          out << "[" << i << "]; " << iIdx << "++) {\n";
0206       }
0207       out << SP << SP << fType << " sum = 0.;\n";
0208       // loop over all the dims in [0, fAxis)
0209       for (size_t j = fAxis; j < fSize; j++) {
0210          std::string jIdx = "axis_" + std::to_string(j);
0211          out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
0212          out << "[" << j << "]; " << jIdx << "++) {\n";
0213       }
0214       out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
0215       for (size_t j = fAxis; j < fSize; j++) {
0216          out << SP << SP << "}\n";
0217       }
0218       out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
0219       out << fNormalizedLength << ");\n";
0220       for (size_t i = fAxis; i < fSize; i++) {
0221          out << SP << "}\n";
0222       }
0223 
0224       out << SP << "// Compute the inverse Standard Deviation\n";
0225       // Loop over the normalized dimensions
0226       for (size_t i = 0; i < fAxis; i++) {
0227          std::string iIdx = "axis_" + std::to_string(i);
0228          out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
0229          out << "[" << i << "]; " << iIdx << "++){\n";
0230       }
0231       // Set sum = 0
0232       out << SP << SP << fType << " sum = 0.;\n";
0233       // loop over all the dims in [0, fAxis)
0234       for (size_t j = fAxis; j < fSize; j++) {
0235          std::string jIdx = "axis_" + std::to_string(j);
0236          out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
0237          out << "[" << j << "]; " << jIdx << "++){\n";
0238       }
0239       out << SP << SP << SP << "sum += std::pow(tensor_" << fNX << "[" << InputIndex << "] - tensor_";
0240       out << fNMean << "[" << axesIndex << "], 2);\n";
0241       for (size_t j = fAxis; j < fSize; j++) {
0242          out << SP << SP << "}\n";
0243       }
0244       out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
0245       out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
0246       for (size_t i = 0; i < fAxis; i++) {
0247          out << SP << "}\n";
0248       }
0249 
0250       if (!fNCastedX.empty()) {
0251          out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
0252          for (size_t i = 0; i < fAxis; i++) {
0253             std::string iIdx = "axis_" + std::to_string(i);
0254             out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
0255             out << "[" << i << "]; " << iIdx << "++){\n";
0256          }
0257          for (size_t j = fAxis; j < fSize; j++) {
0258             std::string jIdx = "axis_" + std::to_string(j);
0259             out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
0260             out << "[" << j << "]; " << jIdx << "++){\n";
0261          }
0262          out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
0263          out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
0264          out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
0265          for (size_t j = fAxis; j < fSize; j++) {
0266             out << SP << SP << "}\n";
0267          }
0268          for (size_t i = fAxis; i < fSize; i++) {
0269             out << SP << "}\n";
0270          }
0271          out << "// Y = Scale o NormalizedX";
0272          for (size_t i = 0; i < fAxis; i++) {
0273             std::string iIdx = "axis_" + std::to_string(i);
0274             out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
0275             out << "[" << i << "]; " << iIdx << "++){\n";
0276          }
0277          for (size_t j = fAxis; j < fSize; j++) {
0278             std::string jIdx = "axis_" + std::to_string(j);
0279             out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
0280             out << "[" << j << "]; " << jIdx << "++){\n";
0281          }
0282          out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
0283          out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
0284          out << "]);\n";
0285          for (size_t j = fAxis; j < fSize; j++) {
0286             out << SP << SP << "}\n";
0287          }
0288          for (size_t i = fAxis; i < fSize; i++) {
0289             out << SP << "}\n";
0290          }
0291       } else {
0292          out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
0293          for (size_t i = 0; i < fAxis; i++) {
0294             std::string iIdx = "axis_" + std::to_string(i);
0295             out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
0296             out << "[" << i << "]; " << iIdx << "++){\n";
0297          }
0298          for (size_t j = fAxis; j < fSize; j++) {
0299             std::string jIdx = "axis_" + std::to_string(j);
0300             out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
0301             out << "[" << j << "]; " << jIdx << "++){\n";
0302          }
0303          out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
0304          out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
0305          out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
0306          out << axesIndex << "]);\n";
0307          for (size_t j = fAxis; j < fSize; j++) {
0308             out << SP << SP << "}\n";
0309          }
0310          for (size_t i = fAxis; i < fSize; i++) {
0311             out << SP << "}\n";
0312          }
0313       }
0314 
0315       if (!fNB.empty()) {
0316          std::string Bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
0317          out << SP << "// Add the bias to Y\n";
0318          out << SP << "int " << OpName << "_n = " << fLength << ";\n";
0319          out << SP << "float " << OpName << "_alpha = 1.;\n";
0320          out << SP << "int " << OpName << "_inc = 1;\n";
0321          out << SP << "BLAS::saxpy_(&" << OpName << "_n, &" << OpName << "_alpha, " << Bias << ", &";
0322          out << OpName << "_inc, " << "tensor_" << fNY << ", &" << OpName << "_inc);\n";
0323       }
0324 
0325       return out.str();
0326    }
0327 
0328    std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
0329 
0330    std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
0331 };
0332 
0333 } // namespace SOFIE
0334 } // namespace Experimental
0335 } // namespace TMVA
0336 
0337 #endif