Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:38

0001 // Author: Dante Niewenhuis, VU Amsterdam 07/2023
0002 // Author: Kristupas Pranckietis, Vilnius University 05/2024
0003 // Author: Nopphakorn Subsa-Ard, King Mongkut's University of Technology Thonburi (KMUTT) (TH) 08/2024
0004 // Author: Vincenzo Eduardo Padulano, CERN 10/2024
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef TMVA_RCHUNKLOADER
0015 #define TMVA_RCHUNKLOADER
0016 
0017 #include <vector>
0018 
0019 #include "TMVA/RTensor.hxx"
0020 #include "ROOT/RDataFrame.hxx"
0021 #include "ROOT/RDF/Utils.hxx"
0022 #include "ROOT/RVec.hxx"
0023 
0024 #include "ROOT/RLogger.hxx"
0025 
0026 namespace TMVA {
0027 namespace Experimental {
0028 namespace Internal {
0029 
0030 // RChunkLoader class used to load content of a RDataFrame onto a RTensor.
0031 template <typename... ColTypes>
0032 class RChunkLoaderFunctor {
0033    std::size_t fOffset{};
0034    std::size_t fVecSizeIdx{};
0035    float fVecPadding{};
0036    std::vector<std::size_t> fMaxVecSizes{};
0037 
0038    TMVA::Experimental::RTensor<float> &fChunkTensor;
0039 
0040    template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0041    void AssignToTensor(const T &vec)
0042    {
0043       const auto &max_vec_size = fMaxVecSizes[fVecSizeIdx++];
0044       const auto &vec_size = vec.size();
0045       if (vec_size < max_vec_size) // Padding vector column to max_vec_size with fVecPadding
0046       {
0047          std::copy(vec.cbegin(), vec.cend(), &fChunkTensor.GetData()[fOffset]);
0048          std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
0049                    fVecPadding);
0050       } else // Copy only max_vec_size length from vector column
0051       {
0052          std::copy(vec.cbegin(), vec.cbegin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
0053       }
0054       fOffset += max_vec_size;
0055    }
0056 
0057    template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0058    void AssignToTensor(const T &val)
0059    {
0060       fChunkTensor.GetData()[fOffset++] = val;
0061    }
0062 
0063 public:
0064    RChunkLoaderFunctor(TMVA::Experimental::RTensor<float> &chunkTensor, const std::vector<std::size_t> &maxVecSizes,
0065                        float vecPadding)
0066       : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding)
0067    {
0068    }
0069 
0070    void operator()(const ColTypes &...cols)
0071    {
0072       fVecSizeIdx = 0;
0073       (AssignToTensor(cols), ...);
0074    }
0075 };
0076 
0077 template <typename... ColTypes>
0078 class RChunkLoaderFunctorFilters {
0079 
0080 private:
0081    std::size_t fOffset{};
0082    std::size_t fVecSizeIdx{};
0083    std::size_t fEntries{};
0084    std::size_t fChunkSize{};
0085    float fVecPadding{};
0086    std::vector<std::size_t> fMaxVecSizes{};
0087 
0088    TMVA::Experimental::RTensor<float> &fChunkTensor;
0089    TMVA::Experimental::RTensor<float> &fRemainderTensor;
0090 
0091    template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0092    void AssignToTensor(const T &vec)
0093    {
0094       std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];
0095       std::size_t vec_size = vec.size();
0096       if (vec_size < max_vec_size) // Padding vector column to max_vec_size with fVecPadding
0097       {
0098          std::copy(vec.begin(), vec.end(), &fChunkTensor.GetData()[fOffset]);
0099          std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
0100                    fVecPadding);
0101       } else // Copy only max_vec_size length from vector column
0102       {
0103          std::copy(vec.begin(), vec.begin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
0104       }
0105       fOffset += max_vec_size;
0106       fEntries++;
0107    }
0108 
0109    template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0110    void AssignToTensor(const T &val)
0111    {
0112       fChunkTensor.GetData()[fOffset++] = val;
0113       fEntries++;
0114    }
0115 
0116 public:
0117    RChunkLoaderFunctorFilters(TMVA::Experimental::RTensor<float> &chunkTensor,
0118                               TMVA::Experimental::RTensor<float> &remainderTensor, std::size_t entries,
0119                               std::size_t chunkSize, std::size_t &&offset,
0120                               const std::vector<std::size_t> &maxVecSizes = std::vector<std::size_t>(),
0121                               const float vecPadding = 0.0)
0122       : fChunkTensor(chunkTensor),
0123         fRemainderTensor(remainderTensor),
0124         fEntries(entries),
0125         fChunkSize(chunkSize),
0126         fOffset(offset),
0127         fMaxVecSizes(maxVecSizes),
0128         fVecPadding(vecPadding)
0129    {
0130    }
0131 
0132    void operator()(const ColTypes &...cols)
0133    {
0134       fVecSizeIdx = 0;
0135       if (fEntries == fChunkSize) {
0136          fChunkTensor = fRemainderTensor;
0137          fOffset = 0;
0138       }
0139       (AssignToTensor(cols), ...);
0140    }
0141 
0142    std::size_t &SetEntries() { return fEntries; }
0143    std::size_t &SetOffset() { return fOffset; }
0144 };
0145 
0146 template <typename... Args>
0147 class RChunkLoader {
0148 
0149 private:
0150    std::size_t fChunkSize;
0151 
0152    std::vector<std::string> fCols;
0153 
0154    std::vector<std::size_t> fVecSizes;
0155    std::size_t fVecPadding;
0156 
0157    ROOT::RDF::RNode &f_rdf;
0158    TMVA::Experimental::RTensor<float> &fChunkTensor;
0159 
0160 public:
0161    /// \brief Constructor for the RChunkLoader
0162    /// \param rdf
0163    /// \param chunkSize
0164    /// \param cols
0165    /// \param vecSizes
0166    /// \param vecPadding
0167    RChunkLoader(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t chunkSize,
0168                 const std::vector<std::string> &cols, const std::vector<std::size_t> &vecSizes = {},
0169                 const float vecPadding = 0.0)
0170       : f_rdf(rdf),
0171         fChunkTensor(chunkTensor),
0172         fChunkSize(chunkSize),
0173         fCols(cols),
0174         fVecSizes(vecSizes),
0175         fVecPadding(vecPadding)
0176    {
0177    }
0178 
0179    /// \brief Load a chunk of data using the RChunkLoaderFunctor
0180    /// \param chunkTensor
0181    /// \param currentRow
0182    /// \return Number of processed events
0183    std::size_t LoadChunk(const std::size_t currentRow)
0184    {
0185       RChunkLoaderFunctor<Args...> func(fChunkTensor, fVecSizes, fVecPadding);
0186 
0187       ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, currentRow, currentRow + fChunkSize);
0188       auto myCount = f_rdf.Count();
0189 
0190       // load data
0191       f_rdf.Foreach(func, fCols);
0192 
0193       // get loading info
0194       return myCount.GetValue();
0195    }
0196 };
0197 
0198 template <typename... Args>
0199 class RChunkLoaderFilters {
0200 
0201 private:
0202    ROOT::RDF::RNode &f_rdf;
0203    TMVA::Experimental::RTensor<float> &fChunkTensor;
0204 
0205    std::size_t fChunkSize;
0206    std::vector<std::string> fCols;
0207    const std::size_t fNumEntries;
0208    std::size_t fNumAllEntries;
0209    std::vector<std::size_t> fVecSizes;
0210    std::size_t fVecPadding;
0211    std::size_t fNumColumns;
0212 
0213    const std::size_t fPartOfChunkSize;
0214    TMVA::Experimental::RTensor<float> fRemainderChunkTensor;
0215    std::size_t fRemainderChunkTensorRow = 0;
0216 
0217 public:
0218    /// \brief Constructor for the RChunkLoader
0219    /// \param rdf
0220    /// \param chunkSize
0221    /// \param cols
0222    /// \param filters
0223    /// \param vecSizes
0224    /// \param vecPadding
0225    RChunkLoaderFilters(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor<float> &chunkTensor,
0226                        const std::size_t chunkSize, const std::vector<std::string> &cols, std::size_t numEntries,
0227                        std::size_t numAllEntries, const std::vector<std::size_t> &vecSizes = {},
0228                        const float vecPadding = 0.0)
0229       : f_rdf(rdf),
0230         fChunkTensor(chunkTensor),
0231         fChunkSize(chunkSize),
0232         fCols(cols),
0233         fNumEntries(numEntries),
0234         fNumAllEntries(numAllEntries),
0235         fVecSizes(vecSizes),
0236         fVecPadding(vecPadding),
0237         fNumColumns(cols.size()),
0238         fPartOfChunkSize(chunkSize / 5),
0239         fRemainderChunkTensor(std::vector<std::size_t>{fPartOfChunkSize, fNumColumns})
0240    {
0241    }
0242 
0243    /// \brief Load a chunk of data using the RChunkLoaderFunctor
0244    /// \param chunkTensor
0245    /// \param currentRow
0246    /// \return A pair of size_t defining the number of events processed and how many passed all filters
0247    std::pair<std::size_t, std::size_t> LoadChunk(std::size_t currentRow)
0248    {
0249       for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
0250          std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
0251                    fRemainderChunkTensor.GetData() + ((i + 1) * fNumColumns),
0252                    fChunkTensor.GetData() + (i * fNumColumns));
0253       }
0254 
0255       RChunkLoaderFunctorFilters<Args...> func(fChunkTensor, fRemainderChunkTensor, fRemainderChunkTensorRow,
0256                                                fChunkSize, fRemainderChunkTensorRow * fNumColumns, fVecSizes,
0257                                                fVecPadding);
0258 
0259       std::size_t passedEvents = 0;
0260       std::size_t processedEvents = 0;
0261 
0262       while ((passedEvents < fChunkSize && passedEvents < fNumEntries) && currentRow < fNumAllEntries) {
0263          ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, currentRow, currentRow + fPartOfChunkSize);
0264          auto report = f_rdf.Report();
0265 
0266          f_rdf.Foreach(func, fCols);
0267 
0268          processedEvents += report.begin()->GetAll();
0269          passedEvents += (report.end() - 1)->GetPass();
0270 
0271          currentRow += fPartOfChunkSize;
0272          func.SetEntries() = passedEvents;
0273          func.SetOffset() = passedEvents * fNumColumns;
0274       }
0275 
0276       fRemainderChunkTensorRow = passedEvents > fChunkSize ? passedEvents - fChunkSize : 0;
0277 
0278       return std::make_pair(processedEvents, passedEvents);
0279    }
0280 
0281    std::size_t LastChunk()
0282    {
0283       for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
0284          std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
0285                    fRemainderChunkTensor.GetData() + ((i + 1) * fNumColumns),
0286                    fChunkTensor.GetData() + (i * fNumColumns));
0287       }
0288 
0289       return fRemainderChunkTensorRow;
0290    }
0291 };
0292 } // namespace Internal
0293 } // namespace Experimental
0294 } // namespace TMVA
0295 #endif // TMVA_RCHUNKLOADER