Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-10-24 09:19:05

0001 /// \file ROOT/RNTupleWriteOptions.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2024-02-22
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleWriteOptions
0015 #define ROOT_RNTupleWriteOptions
0016 
0017 #include <Compression.h>
0018 
0019 #include <cstdint>
0020 #include <cstddef>
0021 #include <memory>
0022 
0023 namespace ROOT {
0024 
0025 class RNTupleWriteOptions;
0026 
0027 namespace Internal {
0028 
0029 class RNTupleWriteOptionsManip final {
0030 public:
0031    static void SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize);
0032 };
0033 
0034 } // namespace Internal
0035 
0036 // clang-format off
0037 /**
0038 \class ROOT::RNTupleWriteOptions
0039 \ingroup NTuple
0040 \brief Common user-tunable settings for storing RNTuples
0041 
0042 All page sink classes need to support the common options.
0043 
0044 <table>
0045 <tr>
0046 <th>Option name</th>
0047 <th>Type</th>
0048 <th>Default</th>
0049 <th>Description</th>
0050 </tr>
0051 
0052 <tr>
0053 <td>`Compression`</td>
0054 <td>`std::uint32_t`</td>
0055 <td>RCompressionSetting::EDefaults::kUseGeneralPurpose</td>
0056 <td>
0057 The compression settings for this RNTuple
0058 </td>
0059 </tr>
0060 
0061 <tr>
0062 <td>`ApproxZippedClusterSize`</td>
0063 <td>`std::size_t`</td>
0064 <td>128 MiB</td>
0065 <td>
0066 Approximation of the target compressed cluster size
0067 </td>
0068 </tr>
0069 
0070 <tr>
0071 <td>`MaxUnzippedClusterSize`</td>
0072 <td>`std::size_t`</td>
0073 <td>1280 MiB</td>
0074 <td>
0075 Memory limit for committing a cluster: with very high compression ratio, we need a limit
0076 on how large the I/O buffer can grow during writing.
0077 </td>
0078 </tr>
0079 
0080 <tr>
0081 <td>`InitialUnzippedPageSize`</td>
0082 <td>`std::size_t`</td>
0083 <td>256</td>
0084 <td>
0085 Initially, columns start with a page of this size. The default value is chosen to accomodate at least 32 elements
0086 of 64 bits, or 64 elements of 32 bits. If more elements are needed, pages are increased up until the byte limit
0087 given by the option `MaxUnzippedPageSize` or until the total page buffer limit is reached (as a sum of all page buffers).
0088 The total write buffer limit needs to be large enough to hold the initial pages of all columns.
0089 </td>
0090 </tr>
0091 
0092 <tr>
0093 <td>`MaxUnzippedPageSize`</td>
0094 <td>`std::size_t`</td>
0095 <td>1 MiB</td>
0096 <td>
0097 Pages can grow only to the given limit in bytes.
0098 </td>
0099 </tr>
0100 
0101 <tr>
0102 <td>`PageBufferBudget`</td>
0103 <td>`std::size_t`</td>
0104 <td>0 / auto</td>
0105 <td>
0106 The maximum size that the sum of all page buffers used for writing into a persistent sink are allowed to use.
0107 If set to zero, RNTuple will auto-adjust the budget based on the value of `ApproxZippedClusterSize`.
0108 If set manually, the size needs to be large enough to hold all initial page buffers.
0109 The total amount of memory for writing is larger, e.g. for the additional compressed buffers etc.
0110 Use RNTupleModel::EstimateWriteMemoryUsage() for the total estimated memory use for writing.
0111 The default values are tuned for a total write memory of around 400 MiB per fill context.
0112 </td>
0113 </tr>
0114 
0115 <tr>
0116 <td>`UseBufferedWrite`</td>
0117 <td>`bool`</td>
0118 <td>`true`</td>
0119 <td>
0120 Whether to use buffered writing (with RPageSinkBuf). This buffers compressed pages in memory, reorders them
0121 to keep pages of the same column adjacent, and coalesces the writes when committing a cluster.
0122 </td>
0123 </tr>
0124 
0125 <tr>
0126 <td>`UseDirectIO`</td>
0127 <td>`bool`</td>
0128 <td>`false`</td>
0129 <td>
0130 Whether to use Direct I/O for writing. Note that this introduces alignment requirements that may very between
0131 filesystems and platforms.
0132 </td>
0133 </tr>
0134 
0135 <tr>
0136 <td>`WriteBufferSize`</td>
0137 <td>`std::size_t`</td>
0138 <td>4 MiB</td>
0139 <td>
0140 Buffer size to use for writing to files, must be a multiple of 4096 bytes. Testing suggests that 4MiB gives best
0141 performance (with Direct I/O) at a reasonable memory consumption.
0142 </td>
0143 </tr>
0144 
0145 <tr>
0146 <td>`UseImplicitMT`</td>
0147 <td>EImplicitMT</td>
0148 <td>EImplicitMT::kDefault</td>
0149 <td>
0150 Whether to use implicit multi-threading to compress pages. Only has an effect if buffered writing is turned on.
0151 </td>
0152 </tr>
0153 
0154 <tr>
0155 <td>`EnablePageChecksums`</td>
0156 <td>`bool`</td>
0157 <td>`true`</td>
0158 <td>
0159 If set, checksums will be calculated and written for every page.
0160 If turned off, will also turn off `EnableSamePageMerging`.
0161 </td>
0162 </tr>
0163 
0164 <tr>
0165 <td>`EnableSamePageMerging`</td>
0166 <td>`bool`</td>
0167 <td>`true`</td>
0168 <td>
0169 If set, identical pages are deduplicated and aliased on disk.
0170 Requires `EnablePageChecksums` and will throw if previously disabled.
0171 </td>
0172 </tr>
0173 
0174 </table>
0175 */
0176 // clang-format on
0177 class RNTupleWriteOptions {
0178 public:
0179    enum class EImplicitMT {
0180       kOff,
0181       kDefault,
0182    };
0183 
0184    // clang-format off
0185    static constexpr std::uint64_t kDefaultMaxKeySize = 0x4000'0000; // 1 GiB
0186    // clang-format on
0187 
0188    friend Internal::RNTupleWriteOptionsManip;
0189 
0190 protected:
0191    std::uint32_t fCompression{RCompressionSetting::EDefaults::kUseGeneralPurpose};
0192    std::size_t fApproxZippedClusterSize = 128 * 1024 * 1024;
0193    std::size_t fMaxUnzippedClusterSize = 10 * fApproxZippedClusterSize;
0194    std::size_t fInitialUnzippedPageSize = 256;
0195    std::size_t fMaxUnzippedPageSize = 1024 * 1024;
0196    std::size_t fPageBufferBudget = 0;
0197    bool fUseBufferedWrite = true;
0198    bool fUseDirectIO = false;
0199    std::size_t fWriteBufferSize = 4 * 1024 * 1024;
0200    EImplicitMT fUseImplicitMT = EImplicitMT::kDefault;
0201    bool fEnablePageChecksums = true;
0202    bool fEnableSamePageMerging = true;
0203    /// Specifies the max size of a payload storeable into a single TKey. When writing an RNTuple to a ROOT file,
0204    /// any payload whose size exceeds this will be split into multiple keys.
0205    std::uint64_t fMaxKeySize = kDefaultMaxKeySize;
0206 
0207 public:
0208 
0209    virtual ~RNTupleWriteOptions() = default;
0210    virtual std::unique_ptr<RNTupleWriteOptions> Clone() const;
0211 
0212    std::uint32_t GetCompression() const { return fCompression; }
0213    void SetCompression(std::uint32_t val) { fCompression = val; }
0214    void SetCompression(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
0215    {
0216       fCompression = CompressionSettings(algorithm, compressionLevel);
0217    }
0218 
0219    std::size_t GetApproxZippedClusterSize() const { return fApproxZippedClusterSize; }
0220    void SetApproxZippedClusterSize(std::size_t val);
0221 
0222    std::size_t GetMaxUnzippedClusterSize() const { return fMaxUnzippedClusterSize; }
0223    void SetMaxUnzippedClusterSize(std::size_t val);
0224 
0225    std::size_t GetInitialUnzippedPageSize() const { return fInitialUnzippedPageSize; }
0226    void SetInitialUnzippedPageSize(std::size_t val);
0227 
0228    std::size_t GetMaxUnzippedPageSize() const { return fMaxUnzippedPageSize; }
0229    void SetMaxUnzippedPageSize(std::size_t val);
0230 
0231    std::size_t GetPageBufferBudget() const;
0232    void SetPageBufferBudget(std::size_t val) { fPageBufferBudget = val; }
0233 
0234    bool GetUseBufferedWrite() const { return fUseBufferedWrite; }
0235    void SetUseBufferedWrite(bool val) { fUseBufferedWrite = val; }
0236 
0237    bool GetUseDirectIO() const { return fUseDirectIO; }
0238    void SetUseDirectIO(bool val) { fUseDirectIO = val; }
0239 
0240    std::size_t GetWriteBufferSize() const { return fWriteBufferSize; }
0241    void SetWriteBufferSize(std::size_t val) { fWriteBufferSize = val; }
0242 
0243    EImplicitMT GetUseImplicitMT() const { return fUseImplicitMT; }
0244    void SetUseImplicitMT(EImplicitMT val) { fUseImplicitMT = val; }
0245 
0246    bool GetEnablePageChecksums() const { return fEnablePageChecksums; }
0247    /// Note that turning off page checksums will also turn off the same page merging optimization (see tuning.md)
0248    void SetEnablePageChecksums(bool val)
0249    {
0250       fEnablePageChecksums = val;
0251       if (!fEnablePageChecksums) {
0252          fEnableSamePageMerging = false;
0253       }
0254    }
0255 
0256    bool GetEnableSamePageMerging() const { return fEnableSamePageMerging; }
0257    void SetEnableSamePageMerging(bool val);
0258 
0259    std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
0260 };
0261 
0262 namespace Internal {
0263 inline void RNTupleWriteOptionsManip::SetMaxKeySize(RNTupleWriteOptions &options, std::uint64_t maxKeySize)
0264 {
0265    options.fMaxKeySize = maxKeySize;
0266 }
0267 
0268 } // namespace Internal
0269 } // namespace ROOT
0270 
0271 #endif // ROOT_RNTupleWriteOptions