Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/root/ROOT/RNTupleJoinTable.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /// \file ROOT/RNTupleJoinTable.hxx
0002 /// \ingroup NTuple
0003 /// \author Florine de Geus <florine.de.geus@cern.ch>
0004 /// \date 2024-04-02
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT_RNTupleJoinTable
0017 #define ROOT_RNTupleJoinTable
0018 
0019 #include <ROOT/RField.hxx>
0020 
0021 #include <memory>
0022 #include <string>
0023 #include <unordered_map>
0024 #include <vector>
0025 
0026 namespace ROOT {
0027 namespace Experimental {
0028 namespace Internal {
0029 // clang-format off
0030 /**
0031 \class ROOT::Experimental::Internal::RNTupleJoinTable
0032 \ingroup NTuple
0033 \brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
0034 */
0035 // clang-format on
0036 class RNTupleJoinTable {
0037 public:
0038    using JoinValue_t = std::uint64_t;
0039    using PartitionKey_t = std::uint64_t;
0040    static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1);
0041 
0042 private:
0043    // clang-format off
0044    /**
0045    \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping
0046    \ingroup NTuple
0047    \brief Provides a mapping from one or several join field values to an entry index.
0048    */
0049    // clang-format on
0050    class REntryMapping {
0051    private:
0052       //////////////////////////////////////////////////////////////////////////
0053       /// Container for the combined hashes of join field values.
0054       struct RCombinedJoinFieldValue {
0055          std::vector<JoinValue_t> fJoinFieldValues;
0056 
0057          RCombinedJoinFieldValue(const std::vector<JoinValue_t> &joinFieldValues) : fJoinFieldValues(joinFieldValues) {}
0058 
0059          inline bool operator==(const RCombinedJoinFieldValue &other) const
0060          {
0061             return other.fJoinFieldValues == fJoinFieldValues;
0062          }
0063       };
0064 
0065       /////////////////////////////////////////////////////////////////////////////
0066       /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation
0067       /// from `boost::hash_combine`. See
0068       /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more
0069       /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently
0070       /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle
0071       /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon
0072       /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function."
0073       ///
0074       /// \note
0075       /// \parblock
0076       /// Copyright 2005-2014 Daniel James.
0077       /// Copyright 2021, 2022 Peter Dimov.
0078       /// Distributed under the Boost Software License, Version 1.0.
0079       /// https://www.boost.org/LICENSE_1_0.txt
0080       ///
0081       /// Based on Peter Dimov's proposal
0082       /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
0083       /// issue 6.18.
0084       ///
0085       /// Boost Software License - Version 1.0 - August 17th, 2003
0086       ///
0087       /// Permission is hereby granted, free of charge, to any person or organization
0088       /// obtaining a copy of the software and accompanying documentation covered by
0089       /// this license (the "Software") to use, reproduce, display, distribute,
0090       /// execute, and transmit the Software, and to prepare derivative works of the
0091       /// Software, and to permit third-parties to whom the Software is furnished to
0092       /// do so, all subject to the following:
0093       ///
0094       /// The copyright notices in the Software and this entire statement, including
0095       /// the above license grant, this restriction and the following disclaimer,
0096       /// must be included in all copies of the Software, in whole or in part, and
0097       /// all derivative works of the Software, unless such copies or derivative
0098       /// works are solely in the form of machine-executable object code generated by
0099       /// a source language processor.
0100       ///
0101       /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0102       /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0103       /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
0104       /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
0105       /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
0106       /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
0107       /// DEALINGS IN THE SOFTWARE.
0108       /// \endparblock
0109       struct RCombinedJoinFieldValueHash {
0110          inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const
0111          {
0112             std::size_t seed = 0;
0113             for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) {
0114                seed ^= mix(seed + 0x9e3779b9 + fieldVal);
0115             }
0116             return seed;
0117          }
0118 
0119          inline std::size_t mix(std::size_t init) const
0120          {
0121             init ^= init >> 32;
0122             init *= 0xe9846af9b1a615d;
0123             init ^= init >> 32;
0124             init *= 0xe9846af9b1a615d;
0125             init ^= init >> 28;
0126             return init;
0127          }
0128       };
0129 
0130       /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple
0131       /// fields) to their respective entry numbers.
0132       std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash>
0133          fMapping;
0134 
0135       /// Names of the join fields used for the mapping to their respective entry indexes.
0136       std::vector<std::string> fJoinFieldNames;
0137 
0138       /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be
0139       /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`.
0140       std::vector<std::size_t> fJoinFieldValueSizes;
0141 
0142    public:
0143       //////////////////////////////////////////////////////////////////////////
0144       /// \brief Get the entry indexes for this entry mapping.
0145       const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const;
0146 
0147       //////////////////////////////////////////////////////////////////////////
0148       /// \brief Create a new entry mapping.
0149       ///
0150       /// \param[in] pageSource The page source of the RNTuple with the entries to map.
0151       /// \param[in] joinFieldNames Names of the join fields to use in the mapping.
0152       REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames);
0153    };
0154    /// Names of the join fields used for the mapping to their respective entry indexes.
0155    std::vector<std::string> fJoinFieldNames;
0156 
0157    /// Partitions of one or multiple entry mappings.
0158    std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions;
0159 
0160    /////////////////////////////////////////////////////////////////////////////
0161    /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
0162    ///
0163    /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
0164    /// allowed.
0165    RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {}
0166 
0167 public:
0168    RNTupleJoinTable(const RNTupleJoinTable &other) = delete;
0169    RNTupleJoinTable &operator=(const RNTupleJoinTable &other) = delete;
0170    RNTupleJoinTable(RNTupleJoinTable &&other) = delete;
0171    RNTupleJoinTable &operator=(RNTupleJoinTable &&other) = delete;
0172    ~RNTupleJoinTable() = default;
0173 
0174    /////////////////////////////////////////////////////////////////////////////
0175    /// \brief Create an RNTupleJoinTable from an existing RNTuple.
0176    ///
0177    /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
0178    /// allowed.
0179    ///
0180    /// \return A pointer to the newly-created join table.
0181    static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames);
0182 
0183    /////////////////////////////////////////////////////////////////////////////
0184    /// \brief Add an entry mapping to the join table.
0185    ///
0186    ///
0187    /// \param[in] pageSource The page source of the RNTuple with the entries to map.
0188    /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default
0189    /// partition.
0190    ///
0191    /// \return A reference to the updated join table.
0192    RNTupleJoinTable &Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey = kDefaultPartitionKey);
0193 
0194    /////////////////////////////////////////////////////////////////////////////
0195    /// \brief Get all entry indexes for the given join field value(s) within a partition.
0196    ///
0197    /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
0198    /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default
0199    /// partition key.
0200    ///
0201    /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty
0202    /// vector is returned.
0203    std::vector<ROOT::NTupleSize_t>
0204    GetEntryIndexes(const std::vector<void *> &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const;
0205 
0206    /////////////////////////////////////////////////////////////////////////////
0207    /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions.
0208    ///
0209    /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
0210    /// \param[in] partitionKeys The partition keys to use for the lookup.
0211    ///
0212    /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
0213    /// entries, an empty map is returned.
0214    std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
0215    GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs,
0216                               const std::vector<PartitionKey_t> &partitionKeys) const;
0217 
0218    /////////////////////////////////////////////////////////////////////////////
0219    /// \brief Get all entry indexes for the given join field value(s) for all partitions.
0220    ///
0221    /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
0222    ///
0223    /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
0224    /// entries, an empty map is returned.
0225    std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
0226    GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs) const;
0227 };
0228 } // namespace Internal
0229 } // namespace Experimental
0230 } // namespace ROOT
0231 
0232 #endif // ROOT_RNTupleJoinTable