Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:30:01

0001 /// \file ROOT/RNTupleWriteOptions.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2024-02-22
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleWriteOptions
0015 #define ROOT_RNTupleWriteOptions
0016 
0017 #include <Compression.h>
0018 
0019 #include <cstdint>
0020 #include <cstddef>
0021 #include <memory>
0022 
0023 namespace ROOT {
0024 
0025 class RNTupleWriteOptions;
0026 
0027 namespace Internal {
0028 
0029 class RNTupleWriteOptionsManip final {
0030 public:
0031    static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
0032 };
0033 
0034 } // namespace Internal
0035 
0036 // clang-format off
0037 /**
0038 \class ROOT::RNTupleWriteOptions
0039 \ingroup NTuple
0040 \brief Common user-tunable settings for storing RNTuples
0041 
0042 All page sink classes need to support the common options.
0043 
0044 <table>
0045 <tr>
0046 <th>Option name</th>
0047 <th>Type</th>
0048 <th>Default</th>
0049 <th>Description</th>
0050 </tr>
0051 
0052 <tr>
0053 <td>`Compression`</td>
0054 <td>`std::uint32_t`</td>
0055 <td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
0056 <td>
0057 The compression settings for this RNTuple
0058 </td>
0059 </tr>
0060 
0061 <tr>
0062 <td>`ApproxZippedClusterSize`</td>
0063 <td>`std::size_t`</td>
0064 <td>128 MiB</td>
0065 <td>
0066 Approximation of the target compressed cluster size
0067 </td>
0068 </tr>
0069 
0070 <tr>
0071 <td>`MaxUnzippedClusterSize`</td>
0072 <td>`std::size_t`</td>
0073 <td>1280 MiB</td>
0074 <td>
0075 Memory limit for committing a cluster: with very high compression ratio, we need a limit
0076 on how large the I/O buffer can grow during writing.
0077 </td>
0078 </tr>
0079 
0080 <tr>
0081 <td>`InitialUnzippedPageSize`</td>
0082 <td>`std::size_t`</td>
0083 <td>256</td>
0084 <td>
0085 Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
0086 of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
0087 given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
0088 The total write buffer limit needs to be large enough to hold the initial pages of all columns.
0089 </td>
0090 </tr>
0091 
0092 <tr>
0093 <td>`MaxUnzippedPageSize`</td>
0094 <td>`std::size_t`</td>
0095 <td>1 MiB</td>
0096 <td>
0097 Pages can grow only to the given limit in bytes.
0098 </td>
0099 </tr>
0100 
0101 <tr>
0102 <td>`PageBufferBudget`</td>
0103 <td>`std::size_t`</td>
0104 <td>0 / auto</td>
0105 <td>
0106 The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
0107 If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
0108 If set manually, the size needs to be large enough to hold all initial page buffers.
0109 The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
0110 Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
0111 The default values are tuned for a total write memory of around 400 MiB per fill context.
0112 </td>
0113 </tr>
0114 
0115 <tr>
0116 <td>`UseBufferedWrite`</td>
0117 <td>`bool`</td>
0118 <td>`true`</td>
0119 <td>
0120 Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
0121 to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
0122 </td>
0123 </tr>
0124 
0125 <tr>
0126 <td>`UseDirectIO`</td>
0127 <td>`bool`</td>
0128 <td>`false`</td>
0129 <td>
0130 Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
0131 filesystems and platforms.
0132 </td>
0133 </tr>
0134 
0135 <tr>
0136 <td>`WriteBufferSize`</td>
0137 <td>`std::size_t`</td>
0138 <td>4 MiB</td>
0139 <td>
0140 Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
0141 performance (with Direct I/O) at a reasonable memory consumption.
0142 </td>
0143 </tr>
0144 
0145 <tr>
0146 <td>`UseImplicitMT`</td>
0147 <td>EImplicitMT</td>
0148 <td>EImplicitMT::kDefault</td>
0149 <td>
0150 Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
0151 The meaning of EImplicitMT::kDefault depends on the used writer: For the (sequential) RNTupleWriter, it translates
0152 to EImplicitMT::kOn and the user has to manually disable the use of implicit multi-threading if it is not wanted.
0153 For the RNTupleParalellWriter, the implementation defaults to EImplicitMT::kOff in order to avoid interference with
0154 explicit parallelism that might create one RNTupleFillContext per thread. If implicit multi-threading is wanted on
0155 top of this, the user has to explicitly request EImplicitMT::kOn.
0156 </td>
0157 </tr>
0158 
0159 <tr>
0160 <td>`EnablePageChecksums`</td>
0161 <td>`bool`</td>
0162 <td>`true`</td>
0163 <td>
0164 If set, checksums will be calculated and written for every page.
0165 If turned off, will also turn off `EnableSamePageMerging`.
0166 </td>
0167 </tr>
0168 
0169 <tr>
0170 <td>`EnableSamePageMerging`</td>
0171 <td>`bool`</td>
0172 <td>`true`</td>
0173 <td>
0174 If set, identical pages are deduplicated and aliased on disk.
0175 Requires `EnablePageChecksums` and will throw if previously disabled.
0176 </td>
0177 </tr>
0178 
0179 </table>
0180 */
0181 // clang-format on
0182 class RNTupleWriteOptions {
0183 public:
0184    enum class EImplicitMT {
0185       kOff,
0186       kOn,
0187       kDefault,
0188    };
0189 
0190    // clang-format off
0191    static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
0192    // clang-format on
0193 
0194    friend Internal::RNTupleWriteOptionsManip;
0195 
0196 protected:
0197    std::uint32_t fCompression{RCompressionSetting::EDefaults::kUseGeneralPurpose};
0198    std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
0199    std::size_t fMaxUnzippedClusterSize = 10 * fApproxZippedClusterSize;
0200    std::size_t fInitialUnzippedPageSize = 256;
0201    std::size_t fMaxUnzippedPageSize = 1024 * 1024;
0202    std::size_t fPageBufferBudget = 0;
0203    bool fUseBufferedWrite = true;
0204    bool fUseDirectIO = false;
0205    std::size_t fWriteBufferSize = 4 * 1024 * 1024;
0206    EImplicitMT fUseImplicitMT = EImplicitMT::kDefault;
0207    bool fEnablePageChecksums = true;
0208    bool fEnableSamePageMerging = true;
0209    /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
0210    /// any payload whose size exceeds this will be split into multiple keys.
0211    std::uint64_t fMaxKeySize = kDefaultMaxKeySize;
0212 
0213 public:
0214 
0215    virtual ~RNTupleWriteOptions() = default;
0216    virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
0217 
0218    std::uint32_t GetCompression() const { return fCompression; }
0219    void SetCompression(std::uint32_t val) { fCompression = val; }
0220    void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
0221    {
0222       fCompression = CompressionSettings(algorithm, compressionLevel);
0223    }
0224 
0225    std::size_t GetApproxZippedClusterSize() const { return fApproxZippedClusterSize; }
0226    void SetApproxZippedClusterSize(std::size_t val);
0227 
0228    std::size_t GetMaxUnzippedClusterSize() const { return fMaxUnzippedClusterSize; }
0229    void SetMaxUnzippedClusterSize(std::size_t val);
0230 
0231    std::size_t GetInitialUnzippedPageSize() const { return fInitialUnzippedPageSize; }
0232    void SetInitialUnzippedPageSize(std::size_t val);
0233 
0234    std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
0235    void SetMaxUnzippedPageSize(std::size_t val);
0236 
0237    std::size_t GetPageBufferBudget() const;
0238    void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
0239 
0240    bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
0241    void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
0242 
0243    bool GetUseDirectIO() const { return fUseDirectIO; }
0244    void SetUseDirectIO(bool val) { fUseDirectIO = val; }
0245 
0246    std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
0247    void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
0248 
0249    EImplicitMT GetUseImplicitMT() const { return fUseImplicitMT; }
0250    void SetUseImplicitMT(EImplicitMT val) { fUseImplicitMT = val; }
0251 
0252    bool GetEnablePageChecksums() const { return fEnablePageChecksums; }
0253    /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
0254    void SetEnablePageChecksums(bool val)
0255    {
0256       fEnablePageChecksums = val;
0257       if (!fEnablePageChecksums) {
0258          fEnableSamePageMerging = false;
0259       }
0260    }
0261 
0262    bool GetEnableSamePageMerging() const { return fEnableSamePageMerging; }
0263    void SetEnableSamePageMerging(bool val);
0264 
0265    std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
0266 
0267    friend bool operator==(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs)
0268    {
0269       return lhs.fCompression == rhs.fCompression && lhs.fApproxZippedClusterSize == rhs.fApproxZippedClusterSize &&
0270              lhs.fMaxUnzippedClusterSize == rhs.fMaxUnzippedClusterSize &&
0271              lhs.fInitialUnzippedPageSize == rhs.fInitialUnzippedPageSize &&
0272              lhs.fMaxUnzippedPageSize == rhs.fMaxUnzippedPageSize && lhs.fPageBufferBudget == rhs.fPageBufferBudget &&
0273              lhs.fUseBufferedWrite == rhs.fUseBufferedWrite && lhs.fUseDirectIO == rhs.fUseDirectIO &&
0274              lhs.fWriteBufferSize == rhs.fWriteBufferSize && lhs.fUseImplicitMT == rhs.fUseImplicitMT &&
0275              lhs.fEnablePageChecksums == rhs.fEnablePageChecksums &&
0276              lhs.fEnableSamePageMerging == rhs.fEnableSamePageMerging && lhs.fMaxKeySize == rhs.fMaxKeySize;
0277    }
0278 
0279    friend bool operator!=(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs) { return !(lhs == rhs); }
0280 };
0281 
0282 namespace Internal {
0283 inline void RNTupleWriteOptionsManip::SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
0284 {
0285    options.fMaxKeySize = maxKeySize;
0286 }
0287 
0288 } // namespace Internal
0289 } // namespace ROOT
0290 
0291 #endif // ROOT_RNTupleWriteOptions