![]() |
|
|||
Warning, file /include/root/ROOT/RNTupleJoinTable.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /// \file ROOT/RNTupleJoinTable.hxx 0002 /// \ingroup NTuple 0003 /// \author Florine de Geus <florine.de.geus@cern.ch> 0004 /// \date 2024-04-02 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT_RNTupleJoinTable 0017 #define ROOT_RNTupleJoinTable 0018 0019 #include <ROOT/RField.hxx> 0020 0021 #include <memory> 0022 #include <string> 0023 #include <unordered_map> 0024 #include <vector> 0025 0026 namespace ROOT { 0027 namespace Experimental { 0028 namespace Internal { 0029 // clang-format off 0030 /** 0031 \class ROOT::Experimental::Internal::RNTupleJoinTable 0032 \ingroup NTuple 0033 \brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples. 0034 */ 0035 // clang-format on 0036 class RNTupleJoinTable { 0037 public: 0038 using JoinValue_t = std::uint64_t; 0039 using PartitionKey_t = std::uint64_t; 0040 static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1); 0041 0042 private: 0043 // clang-format off 0044 /** 0045 \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping 0046 \ingroup NTuple 0047 \brief Provides a mapping from one or several join field values to an entry index. 0048 */ 0049 // clang-format on 0050 class REntryMapping { 0051 private: 0052 ////////////////////////////////////////////////////////////////////////// 0053 /// Container for the combined hashes of join field values. 0054 struct RCombinedJoinFieldValue { 0055 std::vector<JoinValue_t> fJoinFieldValues; 0056 0057 RCombinedJoinFieldValue(const std::vector<JoinValue_t> &joinFieldValues) : fJoinFieldValues(joinFieldValues) {} 0058 0059 inline bool operator==(const RCombinedJoinFieldValue &other) const 0060 { 0061 return other.fJoinFieldValues == fJoinFieldValues; 0062 } 0063 }; 0064 0065 ///////////////////////////////////////////////////////////////////////////// 0066 /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation 0067 /// from `boost::hash_combine`. See 0068 /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more 0069 /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently 0070 /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle 0071 /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon 0072 /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function." 0073 /// 0074 /// \note 0075 /// \parblock 0076 /// Copyright 2005-2014 Daniel James. 0077 /// Copyright 2021, 2022 Peter Dimov. 0078 /// Distributed under the Boost Software License, Version 1.0. 0079 /// https://www.boost.org/LICENSE_1_0.txt 0080 /// 0081 /// Based on Peter Dimov's proposal 0082 /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf 0083 /// issue 6.18. 0084 /// 0085 /// Boost Software License - Version 1.0 - August 17th, 2003 0086 /// 0087 /// Permission is hereby granted, free of charge, to any person or organization 0088 /// obtaining a copy of the software and accompanying documentation covered by 0089 /// this license (the "Software") to use, reproduce, display, distribute, 0090 /// execute, and transmit the Software, and to prepare derivative works of the 0091 /// Software, and to permit third-parties to whom the Software is furnished to 0092 /// do so, all subject to the following: 0093 /// 0094 /// The copyright notices in the Software and this entire statement, including 0095 /// the above license grant, this restriction and the following disclaimer, 0096 /// must be included in all copies of the Software, in whole or in part, and 0097 /// all derivative works of the Software, unless such copies or derivative 0098 /// works are solely in the form of machine-executable object code generated by 0099 /// a source language processor. 0100 /// 0101 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 0102 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 0103 /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 0104 /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 0105 /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 0106 /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 0107 /// DEALINGS IN THE SOFTWARE. 0108 /// \endparblock 0109 struct RCombinedJoinFieldValueHash { 0110 inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const 0111 { 0112 std::size_t seed = 0; 0113 for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) { 0114 seed ^= mix(seed + 0x9e3779b9 + fieldVal); 0115 } 0116 return seed; 0117 } 0118 0119 inline std::size_t mix(std::size_t init) const 0120 { 0121 init ^= init >> 32; 0122 init *= 0xe9846af9b1a615d; 0123 init ^= init >> 32; 0124 init *= 0xe9846af9b1a615d; 0125 init ^= init >> 28; 0126 return init; 0127 } 0128 }; 0129 0130 /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple 0131 /// fields) to their respective entry numbers. 0132 std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash> 0133 fMapping; 0134 0135 /// Names of the join fields used for the mapping to their respective entry indexes. 0136 std::vector<std::string> fJoinFieldNames; 0137 0138 /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be 0139 /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`. 0140 std::vector<std::size_t> fJoinFieldValueSizes; 0141 0142 public: 0143 ////////////////////////////////////////////////////////////////////////// 0144 /// \brief Get the entry indexes for this entry mapping. 0145 const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const; 0146 0147 ////////////////////////////////////////////////////////////////////////// 0148 /// \brief Create a new entry mapping. 0149 /// 0150 /// \param[in] pageSource The page source of the RNTuple with the entries to map. 0151 /// \param[in] joinFieldNames Names of the join fields to use in the mapping. 0152 REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames); 0153 }; 0154 /// Names of the join fields used for the mapping to their respective entry indexes. 0155 std::vector<std::string> fJoinFieldNames; 0156 0157 /// Partitions of one or multiple entry mappings. 0158 std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions; 0159 0160 ///////////////////////////////////////////////////////////////////////////// 0161 /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source. 0162 /// 0163 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are 0164 /// allowed. 0165 RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {} 0166 0167 public: 0168 RNTupleJoinTable(const RNTupleJoinTable &other) = delete; 0169 RNTupleJoinTable &operator=(const RNTupleJoinTable &other) = delete; 0170 RNTupleJoinTable(RNTupleJoinTable &&other) = delete; 0171 RNTupleJoinTable &operator=(RNTupleJoinTable &&other) = delete; 0172 ~RNTupleJoinTable() = default; 0173 0174 ///////////////////////////////////////////////////////////////////////////// 0175 /// \brief Create an RNTupleJoinTable from an existing RNTuple. 0176 /// 0177 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are 0178 /// allowed. 0179 /// 0180 /// \return A pointer to the newly-created join table. 0181 static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames); 0182 0183 ///////////////////////////////////////////////////////////////////////////// 0184 /// \brief Add an entry mapping to the join table. 0185 /// 0186 /// 0187 /// \param[in] pageSource The page source of the RNTuple with the entries to map. 0188 /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default 0189 /// partition. 0190 /// 0191 /// \return A reference to the updated join table. 0192 RNTupleJoinTable &Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey = kDefaultPartitionKey); 0193 0194 ///////////////////////////////////////////////////////////////////////////// 0195 /// \brief Get all entry indexes for the given join field value(s) within a partition. 0196 /// 0197 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0198 /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default 0199 /// partition key. 0200 /// 0201 /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty 0202 /// vector is returned. 0203 std::vector<ROOT::NTupleSize_t> 0204 GetEntryIndexes(const std::vector<void *> &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const; 0205 0206 ///////////////////////////////////////////////////////////////////////////// 0207 /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions. 0208 /// 0209 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0210 /// \param[in] partitionKeys The partition keys to use for the lookup. 0211 /// 0212 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding 0213 /// entries, an empty map is returned. 0214 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>> 0215 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs, 0216 const std::vector<PartitionKey_t> &partitionKeys) const; 0217 0218 ///////////////////////////////////////////////////////////////////////////// 0219 /// \brief Get all entry indexes for the given join field value(s) for all partitions. 0220 /// 0221 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0222 /// 0223 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding 0224 /// entries, an empty map is returned. 0225 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>> 0226 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs) const; 0227 }; 0228 } // namespace Internal 0229 } // namespace Experimental 0230 } // namespace ROOT 0231 0232 #endif // ROOT_RNTupleJoinTable
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |