File indexing completed on 2025-01-30 10:23:02
0001 #ifndef TMVA_RSTANDARDSCALER
0002 #define TMVA_RSTANDARDSCALER
0003
0004 #include <TFile.h>
0005
0006 #include <TMVA/RTensor.hxx>
0007 #include <string_view>
0008
0009 #include <cmath>
0010 #include <vector>
0011
0012 namespace TMVA {
0013 namespace Experimental {
0014
0015 template <typename T>
0016 class RStandardScaler {
0017 private:
0018 std::vector<T> fMeans;
0019 std::vector<T> fStds;
0020
0021 public:
0022 RStandardScaler() = default;
0023 RStandardScaler(std::string_view title, std::string_view filename);
0024 void Fit(const RTensor<T>& x);
0025 std::vector<T> Compute(const std::vector<T>& x);
0026 RTensor<T> Compute(const RTensor<T>& x);
0027 std::vector<T> GetMeans() const { return fMeans; }
0028 std::vector<T> GetStds() const { return fStds; }
0029 void Save(std::string_view title, std::string_view filename);
0030 };
0031
0032 template <typename T>
0033 inline RStandardScaler<T>::RStandardScaler(std::string_view title, std::string_view filename) {
0034 auto file = TFile::Open(filename.data(), "READ");
0035 RStandardScaler<T>* obj;
0036 file->GetObject(title.data(), obj);
0037 fMeans = obj->GetMeans();
0038 fStds = obj->GetStds();
0039 delete obj;
0040 file->Close();
0041 }
0042
0043 template <typename T>
0044 inline void RStandardScaler<T>::Save(std::string_view title, std::string_view filename) {
0045 auto file = TFile::Open(filename.data(), "UPDATE");
0046 file->WriteObject<RStandardScaler<T>>(this, title.data());
0047 file->Write();
0048 file->Close();
0049 }
0050
0051 template <typename T>
0052 inline void RStandardScaler<T>::Fit(const RTensor<T>& x) {
0053 const auto shape = x.GetShape();
0054 if (shape.size() != 2)
0055 throw std::runtime_error("Can only fit to input tensor of rank 2.");
0056 fMeans.clear();
0057 fMeans.resize(shape[1]);
0058 fStds.clear();
0059 fStds.resize(shape[1]);
0060
0061
0062 for (std::size_t i = 0; i < shape[0]; i++) {
0063 for (std::size_t j = 0; j < shape[1]; j++) {
0064 fMeans[j] += x(i, j);
0065 }
0066 }
0067 for (std::size_t i = 0; i < shape[1]; i++) {
0068 fMeans[i] /= shape[0];
0069 }
0070
0071
0072 for (std::size_t i = 0; i < shape[0]; i++) {
0073 for (std::size_t j = 0; j < shape[1]; j++) {
0074 fStds[j] += (x(i, j) - fMeans[j]) * (x(i, j) - fMeans[j]);
0075 }
0076 }
0077 for (std::size_t i = 0; i < shape[1]; i++) {
0078 fStds[i] = std::sqrt(fStds[i] / (shape[0] - 1));
0079 }
0080 }
0081
0082 template <typename T>
0083 inline std::vector<T> RStandardScaler<T>::Compute(const std::vector<T>& x) {
0084 const auto size = x.size();
0085 if (size != fMeans.size())
0086 throw std::runtime_error("Size of input vector is not equal to number of fitted variables.");
0087
0088 std::vector<T> y(size);
0089 for (std::size_t i = 0; i < size; i++) {
0090 y[i] = (x[i] - fMeans[i]) / fStds[i];
0091 }
0092
0093 return y;
0094 }
0095
0096 template <typename T>
0097 inline RTensor<T> RStandardScaler<T>::Compute(const RTensor<T>& x) {
0098 const auto shape = x.GetShape();
0099 if (shape.size() != 2)
0100 throw std::runtime_error("Can only compute output for input tensor of rank 2.");
0101 if (shape[1] != fMeans.size())
0102 throw std::runtime_error("Second dimension of input tensor is not equal to number of fitted variables.");
0103
0104 RTensor<T> y(shape);
0105 for (std::size_t i = 0; i < shape[0]; i++) {
0106 for (std::size_t j = 0; j < shape[1]; j++) {
0107 y(i, j) = (x(i, j) - fMeans[j]) / fStds[j];
0108 }
0109 }
0110
0111 return y;
0112 }
0113
0114 }
0115 }
0116
0117 #endif