Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-19 10:01:33

0001 #ifndef PODIO_FRAME_H
0002 #define PODIO_FRAME_H
0003 
0004 #include "podio/CollectionBase.h"
0005 #include "podio/CollectionIDTable.h"
0006 #include "podio/FrameCategories.h" // mainly for convenience
0007 #include "podio/GenericParameters.h"
0008 #include "podio/ICollectionProvider.h"
0009 #include "podio/SchemaEvolution.h"
0010 #include "podio/utilities/TypeHelpers.h"
0011 
0012 #include <concepts>
0013 #include <initializer_list>
0014 #include <memory>
0015 #include <mutex>
0016 #include <optional>
0017 #include <set>
0018 #include <stdexcept>
0019 #include <string>
0020 #include <type_traits>
0021 #include <unordered_map>
0022 #include <vector>
0023 
0024 namespace podio {
0025 
0026 /// Concept for enabling overloads for r-values
0027 template <typename T>
0028 concept RValueType = !std::is_lvalue_reference_v<T>;
0029 
0030 /// Concept for enabling overloads only for Collection r-values
0031 template <typename T>
0032 concept CollectionRValueType = CollectionType<T> && RValueType<T>;
0033 
0034 /// Concept encoding the minimal interface a type has to provide to usable as
0035 /// raw data from which a Frame can be constructed.
0036 ///
0037 /// Since the Frame either locks on raw data access, or is guaranteed to only
0038 /// run on a single thread when doing so (e.g. constructors) none of these have
0039 /// to be thread-safe either, even though implementations might be able to
0040 /// provide reasonable guarantees anyway.
0041 template <typename T>
0042 concept FrameDataType = requires(T t) {
0043   { t.getIDTable() } -> std::same_as<podio::CollectionIDTable>;
0044   { t.getCollectionBuffers(std::string{}) } -> std::same_as<std::optional<podio::CollectionReadBuffers>>;
0045   { t.getAvailableCollections() } -> std::same_as<std::vector<std::string>>;
0046   { t.getParameters() } -> std::same_as<std::unique_ptr<podio::GenericParameters>>;
0047 };
0048 
0049 template <typename T>
0050 concept RValueFrameDataType = FrameDataType<T> && RValueType<T>;
0051 
0052 namespace detail {
0053   /// The minimal interface for raw data types
0054   struct EmptyFrameData {
0055     podio::CollectionIDTable getIDTable() const {
0056       return {};
0057     }
0058 
0059     /// Try to get the buffers for a collection
0060     std::optional<podio::CollectionReadBuffers> getCollectionBuffers(const std::string&) {
0061       return std::nullopt;
0062     }
0063 
0064     /// Get the **still available**, i.e. yet unpacked, collections from the raw data
0065     std::vector<std::string> getAvailableCollections() const {
0066       return {};
0067     }
0068 
0069     /// Get the parameters that are stored in the raw data
0070     std::unique_ptr<podio::GenericParameters> getParameters() {
0071       return std::make_unique<podio::GenericParameters>();
0072     }
0073   };
0074   static_assert(FrameDataType<EmptyFrameData>, "EmptyFrameData should match FrameDataType concept");
0075 } // namespace detail
0076 
0077 template <FrameDataType FrameData>
0078 std::optional<podio::CollectionReadBuffers> unpack(FrameData* data, const std::string& name) {
0079   return data->getCollectionBuffers(name);
0080 }
0081 
0082 /// The Frame is a generalized (event) data container that aggregates all
0083 /// relevant data.
0084 ///
0085 /// It is possible to store collections as well as parameters / meta data in a
0086 /// Frame and all I/O facilities of podio operate on Frames.
0087 class Frame {
0088   /// Internal abstract interface for the type-erased implementation of the
0089   /// Frame class
0090   struct FrameConcept {
0091     virtual ~FrameConcept() = default;
0092     virtual const podio::CollectionBase* get(const std::string& name) const = 0;
0093     virtual const podio::CollectionBase* put(std::unique_ptr<podio::CollectionBase> coll, const std::string& name) = 0;
0094     virtual podio::GenericParameters& parameters() = 0;
0095     virtual const podio::GenericParameters& parameters() const = 0;
0096 
0097     virtual std::vector<std::string> availableCollections() const = 0;
0098 
0099     // Writing interface. Need this to be able to store all necessary information
0100     // TODO: Figure out whether this can be "hidden" somehow
0101     virtual podio::CollectionIDTable getIDTable() const = 0;
0102   };
0103 
0104   /// The interface implementation of the abstract FrameConcept that is
0105   /// necessary for a type-erased implementation of the Frame class
0106   template <typename FrameDataT>
0107   struct FrameModel final : FrameConcept, public ICollectionProvider {
0108 
0109     FrameModel(std::unique_ptr<FrameDataT> data);
0110     ~FrameModel() override = default;
0111     FrameModel(const FrameModel&) = delete;
0112     FrameModel& operator=(const FrameModel&) = delete;
0113     FrameModel(FrameModel&&) = default;
0114     FrameModel& operator=(FrameModel&&) = default;
0115 
0116     /// Try and get the collection from the internal storage and return a
0117     /// pointer to it if found. Otherwise return a nullptr
0118     const podio::CollectionBase* get(const std::string& name) const final;
0119 
0120     /// Try and place the collection into the internal storage and return a
0121     /// pointer to it. If a collection already exists or insertion fails, return
0122     /// a nullptr
0123     const podio::CollectionBase* put(std::unique_ptr<CollectionBase> coll, const std::string& name) final;
0124 
0125     /// Get a reference to the internally used GenericParameters
0126     podio::GenericParameters& parameters() override {
0127       return *m_parameters;
0128     }
0129     /// Get a const reference to the internally used GenericParameters
0130     const podio::GenericParameters& parameters() const override {
0131       return *m_parameters;
0132     }
0133 
0134     bool get(uint32_t collectionID, podio::CollectionBase*& collection) const override;
0135 
0136     podio::CollectionIDTable getIDTable() const override {
0137       // Make a copy
0138       return {m_idTable.ids(), m_idTable.names()};
0139     }
0140 
0141     std::vector<std::string> availableCollections() const override;
0142 
0143   private:
0144     podio::CollectionBase* doGet(const std::string& name, bool setReferences = true) const;
0145 
0146     using CollectionMapT = std::unordered_map<std::string, std::unique_ptr<podio::CollectionBase>>;
0147 
0148     mutable CollectionMapT m_collections{};                 ///< The internal map for storing unpacked collections
0149     mutable std::unique_ptr<std::mutex> m_mapMtx{nullptr};  ///< The mutex for guarding the internal collection map
0150     std::unique_ptr<FrameDataT> m_data{nullptr};            ///< The raw data read from file
0151     mutable std::unique_ptr<std::mutex> m_dataMtx{nullptr}; ///< The mutex for guarding the raw data
0152     podio::CollectionIDTable m_idTable{};                   ///< The collection ID table
0153     std::unique_ptr<podio::GenericParameters> m_parameters{nullptr}; ///< The generic parameter store for this frame
0154     mutable std::set<uint32_t> m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet
0155                                                  ///< put into the map)
0156   };
0157 
0158   std::unique_ptr<FrameConcept> m_self; ///< The internal concept pointer through which all the work is done
0159 
0160 public:
0161   /// Empty Frame constructor
0162   Frame();
0163 
0164   /// Frame constructor from (almost) arbitrary raw data.
0165   ///
0166   /// @tparam FrameData Arbitrary data container that provides access to the
0167   ///                   collection buffers as well as the metadata, when
0168   ///                   requested by the Frame. The unique_ptr has to be checked
0169   ///                   for validity before calling this constructor.
0170   ///
0171   /// @throws std::invalid_argument if the passed pointer is a nullptr.
0172   template <FrameDataType FrameData>
0173   Frame(std::unique_ptr<FrameData>);
0174 
0175   /// Frame constructor from (almost) arbitrary raw data.
0176   ///
0177   /// This r-value overload is mainly present for enabling the python bindings,
0178   /// where cppyy seems to strip the std::unique_ptr somewhere in the process
0179   ///
0180   /// @tparam FrameData Arbitrary data container that provides access to the
0181   ///                   collection buffers as well as the metadata, when
0182   ///                   requested by the Frame.
0183   template <RValueFrameDataType FrameData>
0184   Frame(FrameData&&);
0185 
0186   /// A Frame is move-only
0187   Frame(const Frame&) = delete;
0188   /// A Frame is move-only
0189   Frame& operator=(const Frame&) = delete;
0190 
0191   /// Frame move constructor
0192   Frame(Frame&&) = default;
0193 
0194   /// Frame move assignment operator
0195   Frame& operator=(Frame&&) = default;
0196 
0197   /// Frame destructor
0198   ///
0199   /// @note Since the Frame owns all the collections that have been put into it,
0200   /// or that can be obtained from it, this invalidates all references to these
0201   /// collections.
0202   ~Frame() = default;
0203 
0204   /// Get a collection from the Frame by name.
0205   ///
0206   /// @tparam CollT The type of the desired collection
0207   /// @param  name  The name of the collection
0208   ///
0209   /// @returns      A const reference to the collection if it is available or to
0210   ///               an empty (static) collection
0211   template <CollectionType CollT>
0212   const CollT& get(const std::string& name) const;
0213 
0214   /// Get a collection pointer from the Frame by name.
0215   ///
0216   /// This is a type-erased version that is also used by the python bindings.
0217   ///
0218   /// @returns A const pointer to a collection if it is available or a nullptr
0219   ///          if it is not
0220   const podio::CollectionBase* get(const std::string& name) const;
0221 
0222   /// (Destructively) move a collection into the Frame and get a reference to
0223   /// the inserted collection back for further use.
0224   ///
0225   /// The collection that is passed into the Frame has to be moved into it
0226   /// explicitly and the moved-from collection will be in the typical *valid but
0227   /// undefined state* in c++.
0228   ///
0229   /// @tparam CollT The type of the collection
0230   /// @param  coll  An rvalue reference to the collection to put into the Frame.
0231   /// @param  name  The name under which this collection should be stored in the
0232   ///               Frame
0233   ///
0234   /// @returns      A const reference to the collection that has just been
0235   ///               inserted
0236   template <CollectionRValueType CollT>
0237   const CollT& put(CollT&& coll, const std::string& name);
0238 
0239   /// (Destructively) move a collection into the Frame.
0240   ///
0241   /// @param coll The collection that should be moved into the Frame
0242   /// @param name The name under which this collection should be stored in the
0243   ///             Frame
0244   void put(std::unique_ptr<podio::CollectionBase> coll, const std::string& name);
0245 
0246   /// Add a value to the parameters of the Frame (if the type is supported).
0247   ///
0248   /// @tparam T    The type of the parameter. Has to be one of the types that
0249   ///              is supported by GenericParameters
0250   /// @param key   The name under which this parameter should be stored
0251   /// @param value The value of the parameter. A copy will be put into the Frame
0252   template <ValidGenericDataType T>
0253   inline void putParameter(const std::string& key, T value) {
0254     m_self->parameters().set(key, std::move(value));
0255   }
0256 
0257   /// Add a string value to the parameters of the Frame.
0258   ///
0259   /// This is a dedicated overload for enabling on-the-fly conversion from
0260   /// string literals.
0261   ///
0262   /// @param key   The name under which this parameter should be stored
0263   /// @param value The value of the parameter. A copy will be put into the Frame
0264   inline void putParameter(const std::string& key, std::string value) {
0265     putParameter<std::string>(key, std::move(value));
0266   }
0267 
0268   /// Add a vector of strings value the parameters of the Frame.
0269   ///
0270   /// This is a dedicated overload for enabling on-the-fly conversion from
0271   /// an initializer_list of string literals
0272   ///
0273   /// @param key    The name under which this parameter should be stored
0274   /// @param values The values of the parameter. A copy will be put into the Frame
0275   inline void putParameter(const std::string& key, std::vector<std::string> values) {
0276     putParameter<std::vector<std::string>>(key, std::move(values));
0277   }
0278 
0279   /// Add a vector of values to the parameters of the Frame (if the type is
0280   /// supported).
0281   ///
0282   /// This is a dedicated overload for enabling on-the-fly conversions of
0283   /// initializer_list of values
0284   ///
0285   /// @tparam T    The type of the parameter. Has to be one of the types that
0286   ///              is supported by GenericParameters
0287   /// @param key    The name under which this parameter should be stored
0288   /// @param values The values of the parameter. A copy will be put into the Frame
0289   template <ValidGenericDataType T>
0290   inline void putParameter(const std::string& key, std::initializer_list<T>&& values) {
0291     putParameter<std::vector<T>>(key, std::move(values));
0292   }
0293 
0294   /// Retrieve parameters via key from the internal store.
0295   ///
0296   /// @tparam T  The desired type of the parameter (can also be std::vector<T>)
0297   /// @param key The key under which the value is stored
0298   ///
0299   /// @returns   An optional holding the value if it is present
0300   template <ValidGenericDataType T>
0301   inline auto getParameter(const std::string& key) const {
0302     return m_self->parameters().get<T>(key);
0303   }
0304 
0305   /// Retrieve all parameters stored in this Frame.
0306   ///
0307   /// This is mainly intended for I/O purposes and we encourage to use the Frame
0308   /// functionality of getParameter or getParameterKeys in general.
0309   ///
0310   /// @returns The internally used GenericParameters
0311   inline const podio::GenericParameters& getParameters() const {
0312     return m_self->parameters();
0313   }
0314 
0315   /// Get the keys of all stored parameters for a given type
0316   ///
0317   /// @tparam T The desired parameter type
0318   ///
0319   /// @returns  A vector of keys for this parameter type
0320   template <ValidGenericDataType T>
0321   inline std::vector<std::string> getParameterKeys() const {
0322     return m_self->parameters().getKeys<T>();
0323   }
0324 
0325   /// Get all **currently** available collection names.
0326   ///
0327   /// @returns The names of all collections, including those that might still
0328   ///          need unpacking from the internal FrameData
0329   std::vector<std::string> getAvailableCollections() const {
0330     return m_self->availableCollections();
0331   }
0332 
0333   /// Get the name of the passed collection
0334   ///
0335   /// @param coll The collection for which the name should be obtained
0336   ///
0337   /// @returns The name of the collection or an empty optional if this
0338   ///          collection is not known to the Frame
0339   inline std::optional<std::string> getName(const podio::CollectionBase& coll) const {
0340     return getName(coll.getID());
0341   }
0342 
0343   /// Get the name for the passed collectionID
0344   ///
0345   /// @param collectionID The collection ID of the collection for which the name
0346   ///                     should be obtained
0347   /// @returns The name of the collection or an empty optional if this
0348   ///          collectionID is not known to the Frame
0349   inline std::optional<std::string> getName(const uint32_t collectionID) const {
0350     return m_self->getIDTable().name(collectionID);
0351   }
0352 
0353   // Interfaces for writing below
0354 
0355   /// Get a collection for writing.
0356   ///
0357   /// @note This method is intended for I/O purposes only and should not be used
0358   /// in other code.
0359   ///
0360   /// @returns The collection pointer in a prepared and "ready-to-write" state
0361   const podio::CollectionBase* getCollectionForWrite(const std::string& name) const {
0362     const auto* coll = m_self->get(name);
0363     if (coll) {
0364       coll->prepareForWrite();
0365     }
0366 
0367     return coll;
0368   }
0369 
0370   /// Get the internal CollectionIDTable for writing.
0371   ///
0372   /// @note This method is intended for I/O purposes only and should not be used
0373   /// in other code.
0374   ///
0375   /// @returns A copy of the internal collection id table
0376   podio::CollectionIDTable getCollectionIDTableForWrite() const {
0377     return m_self->getIDTable();
0378   }
0379 };
0380 
0381 // implementations below
0382 
0383 inline Frame::Frame() : Frame(std::make_unique<detail::EmptyFrameData>()) {
0384 }
0385 
0386 template <FrameDataType FrameData>
0387 Frame::Frame(std::unique_ptr<FrameData> data) : m_self(std::make_unique<FrameModel<FrameData>>(std::move(data))) {
0388 }
0389 
0390 template <RValueFrameDataType FrameData>
0391 Frame::Frame(FrameData&& data) : Frame(std::make_unique<FrameData>(std::move(data))) {
0392 }
0393 
0394 template <CollectionType CollT>
0395 const CollT& Frame::get(const std::string& name) const {
0396   const auto* coll = dynamic_cast<const CollT*>(m_self->get(name));
0397   if (coll) {
0398     return *coll;
0399   }
0400   // TODO: Handle non-existing collections
0401   static const auto emptyColl = CollT();
0402   return emptyColl;
0403 }
0404 
0405 inline const podio::CollectionBase* Frame::get(const std::string& name) const {
0406   return m_self->get(name);
0407 }
0408 
0409 inline void Frame::put(std::unique_ptr<podio::CollectionBase> coll, const std::string& name) {
0410   const auto* retColl = m_self->put(std::move(coll), name);
0411   if (!retColl) {
0412     // TODO: Handle collisions
0413   }
0414 }
0415 
0416 template <CollectionRValueType CollT>
0417 const CollT& Frame::put(CollT&& coll, const std::string& name) {
0418   const auto* retColl = static_cast<const CollT*>(m_self->put(std::make_unique<CollT>(std::move(coll)), name));
0419   if (retColl) {
0420     return *retColl;
0421   }
0422   // TODO: Handle collision case
0423   static const auto emptyColl = CollT();
0424   return emptyColl;
0425 }
0426 
0427 template <typename FrameDataT>
0428 Frame::FrameModel<FrameDataT>::FrameModel(std::unique_ptr<FrameDataT> data) :
0429     m_mapMtx(std::make_unique<std::mutex>()), m_dataMtx(std::make_unique<std::mutex>()) {
0430   if (!data) {
0431     throw std::invalid_argument(
0432         "FrameData is a nullptr. If you are reading from a file it may be corrupted or you may reading beyond the end "
0433         "of the file, please check the validity of the data before creating a Frame.");
0434   }
0435   m_data = std::move(data);
0436   m_idTable = std::move(m_data->getIDTable());
0437   m_parameters = std::move(m_data->getParameters());
0438 }
0439 
0440 template <typename FrameDataT>
0441 const podio::CollectionBase* Frame::FrameModel<FrameDataT>::get(const std::string& name) const {
0442   return doGet(name);
0443 }
0444 
0445 template <typename FrameDataT>
0446 podio::CollectionBase* Frame::FrameModel<FrameDataT>::doGet(const std::string& name, bool setReferences) const {
0447   {
0448     // First check whether the collection is in the map already
0449     //
0450     // Collections only land here if they are fully unpacked, i.e.
0451     // prepareAfterRead has been called or it has been put into the Frame
0452     std::lock_guard lock{*m_mapMtx};
0453     if (const auto it = m_collections.find(name); it != m_collections.end()) {
0454       return it->second.get();
0455     }
0456   }
0457 
0458   podio::CollectionBase* retColl = nullptr;
0459 
0460   // Now try to get it from the raw data if we have the possibility
0461   if (m_data) {
0462     // Have the buffers in the outer scope here to hold the raw data lock as
0463     // briefly as possible
0464     std::optional<podio::CollectionReadBuffers> buffers;
0465     {
0466       std::lock_guard lock{*m_dataMtx};
0467       buffers = unpack(m_data.get(), name);
0468     }
0469     if (buffers) {
0470       std::unique_ptr<podio::CollectionBase> coll{nullptr};
0471       // Subset collections do not need schema evolution (by definition)
0472       if (buffers->data == nullptr) {
0473         coll = buffers->createCollection(buffers.value(), true);
0474       } else {
0475         auto evolvedBuffers = podio::SchemaEvolution::instance().evolveBuffers(buffers.value(), buffers->schemaVersion,
0476                                                                                std::string(buffers->type));
0477         coll = evolvedBuffers.createCollection(evolvedBuffers, false);
0478       }
0479 
0480       coll->prepareAfterRead();
0481       coll->setID(m_idTable.collectionID(name).value());
0482       {
0483         std::lock_guard mapLock{*m_mapMtx};
0484         auto [it, success] = m_collections.emplace(name, std::move(coll));
0485         // TODO: Check success? Or simply assume that everything is fine at this point?
0486         // TODO: Collision handling?
0487         retColl = it->second.get();
0488       }
0489 
0490       if (setReferences) {
0491         retColl->setReferences(this);
0492       }
0493     }
0494   }
0495 
0496   return retColl;
0497 }
0498 
0499 template <typename FrameDataT>
0500 bool Frame::FrameModel<FrameDataT>::get(uint32_t collectionID, CollectionBase*& collection) const {
0501   const auto name = m_idTable.name(collectionID);
0502   if (!name) {
0503     return false;
0504   }
0505   const auto& [_, inserted] = m_retrievedIDs.insert(collectionID);
0506 
0507   if (inserted) {
0508     auto coll = doGet(name.value());
0509     if (coll) {
0510       collection = coll;
0511       return true;
0512     }
0513   } else {
0514     auto coll = doGet(name.value(), false);
0515     if (coll) {
0516       collection = coll;
0517       return true;
0518     }
0519   }
0520 
0521   return false;
0522 }
0523 
0524 template <typename FrameDataT>
0525 const podio::CollectionBase* Frame::FrameModel<FrameDataT>::put(std::unique_ptr<podio::CollectionBase> coll,
0526                                                                 const std::string& name) {
0527   {
0528     std::lock_guard lock{*m_mapMtx};
0529     auto [it, success] = m_collections.try_emplace(name, std::move(coll));
0530     if (success) {
0531       // TODO: Check whether this collection is already known to the idTable
0532       // -> What to do on collision?
0533       // -> Check before we emplace it into the internal map to prevent possible
0534       //    collisions from collections that are potentially present from rawdata?
0535       it->second->setID(m_idTable.add(name));
0536       return it->second.get();
0537     } else {
0538       throw std::invalid_argument("An object with key " + name + " already exists in the frame");
0539     }
0540   }
0541 
0542   return nullptr;
0543 }
0544 
0545 template <typename FrameDataT>
0546 std::vector<std::string> Frame::FrameModel<FrameDataT>::availableCollections() const {
0547   // TODO: Check if there is a more efficient way to do this. Currently this is
0548   // done very conservatively, but in a way that should always work, regardless
0549   // of assumptions. It might be possible to simply return what is in the
0550   // idTable here, because that should in principle encompass everything that is
0551   // in the raw data as well as things that have been put into the frame
0552 
0553   // Lock both the internal map and the rawdata for this
0554   std::scoped_lock lock{*m_mapMtx, *m_dataMtx};
0555 
0556   auto collections = m_data->getAvailableCollections();
0557   collections.reserve(collections.size() + m_collections.size());
0558 
0559   for (const auto& [name, _] : m_collections) {
0560     collections.push_back(name);
0561   }
0562 
0563   return collections;
0564 }
0565 
0566 } // namespace podio
0567 
0568 #endif // PODIO_FRAME_H