Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-17 08:29:02

0001 #ifndef PODIO_DATASOURCE_H
0002 #define PODIO_DATASOURCE_H
0003 
0004 // Podio
0005 #include <podio/CollectionBase.h>
0006 #include <podio/Frame.h>
0007 #include <podio/Reader.h>
0008 
0009 // ROOT
0010 #include <ROOT/RDataFrame.hxx>
0011 #include <ROOT/RDataSource.hxx>
0012 
0013 // STL
0014 #include <memory>
0015 #include <string>
0016 #include <typeinfo>
0017 #include <utility>
0018 #include <vector>
0019 
0020 namespace podio {
0021 class DataSource : public ROOT::RDF::RDataSource {
0022 public:
0023   ///
0024   /// @brief Construct the podio::DataSource from the provided file.
0025   ///
0026   /// @param filePath Path to the file that should be read
0027   /// @param nEvents Number of events to process (optional, defaults to -1 for
0028   ///                all events)
0029   /// @param collsToRead The collections that should be made available (optional,
0030   ///                    defaults to empty vector for all collections)
0031   ///
0032   explicit DataSource(const std::string& filePath, int nEvents = -1, const std::vector<std::string>& collsToRead = {});
0033 
0034   ///
0035   /// @brief Construct the podio::DataSource from the provided file list.
0036   ///
0037   /// @param filePathList Paths to the files that should be read
0038   /// @param nEvents Number of events to process (optional, defaults to -1 for
0039   ///                all events)
0040   /// @param collsToRead The collections that should be made available (optional,
0041   ///                    defaults to empty vector for all collections)
0042   ///
0043   explicit DataSource(const std::vector<std::string>& filePathList, int nEvents = -1,
0044                       const std::vector<std::string>& collsToRead = {});
0045 
0046   ///
0047   /// @brief Inform the podio::DataSource of the desired level of parallelism.
0048   ///
0049   void SetNSlots(unsigned int nSlots) override;
0050 
0051   ///
0052   /// @brief Inform podio::DataSource that an event-loop is about to start.
0053   ///
0054   void Initialize() override;
0055 
0056   ///
0057   /// @brief Retrieve from podio::DataSource a set of ranges of entries that
0058   ///        can be processed concurrently.
0059   ///
0060   std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() override;
0061 
0062   ///
0063   /// @brief Inform podio::DataSource that a certain thread is about to start
0064   ///        working on a certain range of entries.
0065   ///
0066   void InitSlot(unsigned int slot, ULong64_t firstEntry) override;
0067 
0068   ///
0069   /// @brief Inform podio::DataSource that a certain thread is about to start
0070   ///        working on a certain entry.
0071   ///
0072   bool SetEntry(unsigned int slot, ULong64_t entry) override;
0073 
0074   ///
0075   /// @brief Inform podio::DataSource that a certain thread finished working
0076   ///        on a certain range of entries.
0077   ///
0078   void FinalizeSlot(unsigned int slot) override;
0079 
0080   ///
0081   /// @brief Inform podio::DataSource that an event-loop finished.
0082   ///
0083   void Finalize() override;
0084 
0085   ///
0086   /// @brief Returns a reference to the collection of the dataset's column
0087   ///        names
0088   ///
0089   const std::vector<std::string>& GetColumnNames() const override;
0090 
0091   ///
0092   /// @brief Checks if the dataset has a certain column.
0093   ///
0094   bool HasColumn(std::string_view columnName) const override;
0095 
0096   ///
0097   /// @brief Type of a column as a string. Required for JITting.
0098   ///
0099   std::string GetTypeName(std::string_view columnName) const override;
0100 
0101   std::string GetLabel() override {
0102     return "PODIO Datasource";
0103   };
0104 
0105 protected:
0106   ///
0107   /// @brief Type-erased vector of pointers to pointers to column
0108   ///        values --- one per slot.
0109   ///
0110   std::vector<void*> GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override;
0111 
0112   std::string AsString() override {
0113     return "Podio data source";
0114   }
0115 
0116 private:
0117   /// Number of slots/threads
0118   unsigned int m_nSlots = 1;
0119 
0120   /// Input filename
0121   std::vector<std::string> m_filePathList = {};
0122 
0123   /// Total number of events
0124   ULong64_t m_nEvents = 0;
0125 
0126   /// Ranges of events available to be processed
0127   std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAvailable = {};
0128 
0129   /// Ranges of events available ever created
0130   std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAll = {};
0131 
0132   /// Column names
0133   std::vector<std::string> m_columnNames{};
0134 
0135   /// Column types
0136   std::vector<std::string> m_columnTypes = {};
0137 
0138   /// Collections, m_Collections[columnIndex][slotIndex]
0139   std::vector<std::vector<const podio::CollectionBase*>> m_Collections = {};
0140 
0141   /// Active collections
0142   std::vector<unsigned int> m_activeCollections = {};
0143 
0144   /// Root podio readers
0145   std::vector<std::unique_ptr<podio::Reader>> m_podioReaders = {};
0146 
0147   /// Podio frames
0148   std::vector<std::unique_ptr<podio::Frame>> m_frames = {};
0149 
0150   ///
0151   /// @brief Setup input for the podio::DataSource.
0152   ///
0153   /// @param[in] nEvents Number of events.
0154   ///
0155   void SetupInput(int nEvents, const std::vector<std::string>& collsToRead);
0156 };
0157 
0158 ///
0159 /// @brief Create RDataFrame from multiple Podio files.
0160 ///
0161 /// @param[in] filePathList  List of file paths from which the RDataFrame
0162 ///                          will be created.
0163 /// @param[in] collsToRead   List of collection names that should be made
0164 ///                          available
0165 ///
0166 /// @return                  RDataFrame created from input file list.
0167 ///
0168 ROOT::RDataFrame CreateDataFrame(const std::vector<std::string>& filePathList,
0169                                  const std::vector<std::string>& collsToRead = {});
0170 
0171 ///
0172 /// @brief Create RDataFrame from a Podio file or glob pattern matching multiple Podio files.
0173 ///
0174 /// @param[in] filePath  File path from which the RDataFrame will be created.
0175 ///                      The file path can include glob patterns to match multiple files.
0176 /// @param[in] collsToRead   List of collection names that should be made
0177 ///                          available
0178 ///
0179 /// @return              RDataFrame created from input file list.
0180 ///
0181 ROOT::RDataFrame CreateDataFrame(const std::string& filePath, const std::vector<std::string>& collsToRead = {});
0182 } // namespace podio
0183 
0184 #endif /* PODIO_DATASOURCE_H */