|
|
|||
Warning, file /include/root/ROOT/RNTupleJoinTable.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /// \file ROOT/RNTupleJoinTable.hxx 0002 /// \ingroup NTuple 0003 /// \author Florine de Geus <florine.de.geus@cern.ch> 0004 /// \date 2024-04-02 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT_RNTupleJoinTable 0017 #define ROOT_RNTupleJoinTable 0018 0019 #include <ROOT/RField.hxx> 0020 0021 #include <memory> 0022 #include <string> 0023 #include <unordered_map> 0024 #include <vector> 0025 0026 namespace ROOT { 0027 namespace Experimental { 0028 namespace Internal { 0029 // clang-format off 0030 /** 0031 \class ROOT::Experimental::Internal::RNTupleJoinTable 0032 \ingroup NTuple 0033 \brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples. 0034 */ 0035 // clang-format on 0036 class RNTupleJoinTable { 0037 public: 0038 using JoinValue_t = std::uint64_t; 0039 using PartitionKey_t = std::uint64_t; 0040 static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1); 0041 0042 private: 0043 // clang-format off 0044 /** 0045 \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping 0046 \ingroup NTuple 0047 \brief Provides a mapping from one or several join field values to an entry index. 0048 */ 0049 // clang-format on 0050 class REntryMapping { 0051 private: 0052 ////////////////////////////////////////////////////////////////////////// 0053 /// Container for the combined hashes of join field values. 0054 struct RCombinedJoinFieldValue { 0055 std::vector<JoinValue_t> fJoinFieldValues; 0056 0057 RCombinedJoinFieldValue(const std::vector<JoinValue_t> &joinFieldValues) : fJoinFieldValues(joinFieldValues) {} 0058 0059 inline bool operator==(const RCombinedJoinFieldValue &other) const 0060 { 0061 return other.fJoinFieldValues == fJoinFieldValues; 0062 } 0063 }; 0064 0065 ///////////////////////////////////////////////////////////////////////////// 0066 /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation 0067 /// from `boost::hash_combine`. See 0068 /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more 0069 /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently 0070 /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle 0071 /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon 0072 /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function." 0073 /// 0074 /// \note 0075 /// \parblock 0076 /// Copyright 2005-2014 Daniel James. 0077 /// Copyright 2021, 2022 Peter Dimov. 0078 /// Distributed under the Boost Software License, Version 1.0. 0079 /// https://www.boost.org/LICENSE_1_0.txt 0080 /// 0081 /// Based on Peter Dimov's proposal 0082 /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf 0083 /// issue 6.18. 0084 /// 0085 /// Boost Software License - Version 1.0 - August 17th, 2003 0086 /// 0087 /// Permission is hereby granted, free of charge, to any person or organization 0088 /// obtaining a copy of the software and accompanying documentation covered by 0089 /// this license (the "Software") to use, reproduce, display, distribute, 0090 /// execute, and transmit the Software, and to prepare derivative works of the 0091 /// Software, and to permit third-parties to whom the Software is furnished to 0092 /// do so, all subject to the following: 0093 /// 0094 /// The copyright notices in the Software and this entire statement, including 0095 /// the above license grant, this restriction and the following disclaimer, 0096 /// must be included in all copies of the Software, in whole or in part, and 0097 /// all derivative works of the Software, unless such copies or derivative 0098 /// works are solely in the form of machine-executable object code generated by 0099 /// a source language processor. 0100 /// 0101 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 0102 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 0103 /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 0104 /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 0105 /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 0106 /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 0107 /// DEALINGS IN THE SOFTWARE. 0108 /// \endparblock 0109 struct RCombinedJoinFieldValueHash { 0110 inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const 0111 { 0112 std::size_t seed = 0; 0113 for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) { 0114 seed ^= mix(seed + 0x9e3779b9 + fieldVal); 0115 } 0116 return seed; 0117 } 0118 0119 inline std::size_t mix(std::size_t init) const 0120 { 0121 #ifdef R__B64 0122 init ^= init >> 32; 0123 init *= 0xe9846af9b1a615d; 0124 init ^= init >> 32; 0125 init *= 0xe9846af9b1a615d; 0126 init ^= init >> 28; 0127 #else 0128 init ^= init >> 16; 0129 init *= 0x21f0aaad; 0130 init ^= init >> 15; 0131 init *= 0x735a2d97; 0132 init ^= init >> 15; 0133 #endif 0134 return init; 0135 } 0136 }; 0137 0138 /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple 0139 /// fields) to their respective entry numbers. 0140 std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash> 0141 fMapping; 0142 0143 /// Names of the join fields used for the mapping to their respective entry indexes. 0144 std::vector<std::string> fJoinFieldNames; 0145 0146 /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be 0147 /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`. 0148 std::vector<std::size_t> fJoinFieldValueSizes; 0149 0150 public: 0151 ////////////////////////////////////////////////////////////////////////// 0152 /// \brief Get the entry indexes for this entry mapping. 0153 const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const; 0154 0155 ////////////////////////////////////////////////////////////////////////// 0156 /// \brief Create a new entry mapping. 0157 /// 0158 /// \param[in] pageSource The page source of the RNTuple with the entries to map. 0159 /// \param[in] joinFieldNames Names of the join fields to use in the mapping. 0160 /// \param[in] entryOffset Offset to add to each entry index in the mapping. This can can be used when the 0161 /// RNTuple represented by the provided page source is part of a chain of RNTuples. 0162 REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames, 0163 ROOT::NTupleSize_t entryOffset = 0); 0164 }; 0165 /// Names of the join fields used for the mapping to their respective entry indexes. 0166 std::vector<std::string> fJoinFieldNames; 0167 0168 /// Partitions of one or multiple entry mappings. 0169 std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions; 0170 0171 ///////////////////////////////////////////////////////////////////////////// 0172 /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source. 0173 /// 0174 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are 0175 /// allowed. 0176 RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {} 0177 0178 public: 0179 RNTupleJoinTable(const RNTupleJoinTable &other) = delete; 0180 RNTupleJoinTable &operator=(const RNTupleJoinTable &other) = delete; 0181 RNTupleJoinTable(RNTupleJoinTable &&other) = delete; 0182 RNTupleJoinTable &operator=(RNTupleJoinTable &&other) = delete; 0183 ~RNTupleJoinTable() = default; 0184 0185 ///////////////////////////////////////////////////////////////////////////// 0186 /// \brief Create an RNTupleJoinTable from an existing RNTuple. 0187 /// 0188 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are 0189 /// allowed. 0190 /// 0191 /// \return A pointer to the newly-created join table. 0192 static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames); 0193 0194 ///////////////////////////////////////////////////////////////////////////// 0195 /// \brief Add an entry mapping to the join table. 0196 /// 0197 /// 0198 /// \param[in] pageSource The page source of the RNTuple with the entries to map. 0199 /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default 0200 /// partition. 0201 /// \param[in] entryOffset Offset to add to each entry index in the mapping. This can can be used when the 0202 /// RNTuple represented by the provided page source is part of a chain of RNTuples. 0203 /// 0204 /// \return A reference to the updated join table. 0205 RNTupleJoinTable &Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey = kDefaultPartitionKey, 0206 ROOT::NTupleSize_t entryOffset = 0); 0207 0208 ///////////////////////////////////////////////////////////////////////////// 0209 /// \brief Get an entry index (if it exists) for the given join field value(s), from any partition. 0210 /// 0211 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0212 /// 0213 /// \note If one or more corresponding entries exist for the given value(s), the first entry index found in the join 0214 /// table is returned. To get *all* the entry indexes, use GetEntryIndexes. 0215 /// 0216 /// \return An entry number that corresponds to `valuePtrs`. When there are no corresponding entries, 0217 /// `kInvalidNTupleIndex` is returned. 0218 ROOT::NTupleSize_t GetEntryIndex(const std::vector<void *> &valuePtrs) const; 0219 0220 ///////////////////////////////////////////////////////////////////////////// 0221 /// \brief Get all entry indexes for the given join field value(s) within a partition. 0222 /// 0223 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0224 /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default 0225 /// partition key. 0226 /// 0227 /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty 0228 /// vector is returned. 0229 std::vector<ROOT::NTupleSize_t> 0230 GetEntryIndexes(const std::vector<void *> &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const; 0231 0232 ///////////////////////////////////////////////////////////////////////////// 0233 /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions. 0234 /// 0235 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0236 /// \param[in] partitionKeys The partition keys to use for the lookup. 0237 /// 0238 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding 0239 /// entries, an empty map is returned. 0240 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>> 0241 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs, 0242 const std::vector<PartitionKey_t> &partitionKeys) const; 0243 0244 ///////////////////////////////////////////////////////////////////////////// 0245 /// \brief Get all entry indexes for the given join field value(s) for all partitions. 0246 /// 0247 /// \param[in] valuePtrs A vector of pointers to the join field values to look up. 0248 /// 0249 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding 0250 /// entries, an empty map is returned. 0251 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>> 0252 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs) const; 0253 }; 0254 } // namespace Internal 0255 } // namespace Experimental 0256 } // namespace ROOT 0257 0258 #endif // ROOT_RNTupleJoinTable
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|