Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:47

0001 /// \file ROOT/RNTupleZip.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-11-21
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT7_RNTupleZip
0017 #define ROOT7_RNTupleZip
0018 
0019 #include <RZip.h>
0020 #include <TError.h>
0021 
0022 #include <algorithm>
0023 #include <array>
0024 #include <cstring>
0025 #include <functional>
0026 #include <memory>
0027 #include <utility>
0028 
0029 namespace ROOT {
0030 namespace Experimental {
0031 namespace Internal {
0032 
0033 // clang-format off
0034 /**
0035 \class ROOT::Experimental::Internal::RNTupleCompressor
0036 \ingroup NTuple
0037 \brief Helper class to compress data blocks in the ROOT compression frame format
0038 */
0039 // clang-format on
0040 class RNTupleCompressor {
0041 private:
0042    using Buffer_t = std::array<unsigned char, kMAXZIPBUF>;
0043    std::unique_ptr<Buffer_t> fZipBuffer;
0044 
0045 public:
0046    /// Data might be overwritten, if a zipped block in the middle of a large input data stream
0047    /// turns out to be uncompressible
0048    using Writer_t = std::function<void(const void *buffer, size_t nbytes, size_t offset)>;
0049    static Writer_t MakeMemCopyWriter(unsigned char *dest)
0050    {
0051       return [=](const void *b, size_t n, size_t o) { memcpy(dest + o, b, n); };
0052    }
0053    static constexpr size_t kMaxSingleBlock = kMAXZIPBUF;
0054 
0055    RNTupleCompressor() : fZipBuffer(std::make_unique<Buffer_t>()) {}
0056    RNTupleCompressor(const RNTupleCompressor &other) = delete;
0057    RNTupleCompressor &operator =(const RNTupleCompressor &other) = delete;
0058    RNTupleCompressor(RNTupleCompressor &&other) = default;
0059    RNTupleCompressor &operator =(RNTupleCompressor &&other) = default;
0060 
0061    /// Returns the size of the compressed data. Data is compressed in 16MB (kMAXZIPBUF) blocks and written
0062    /// piecewise using the provided writer
0063    size_t Zip(const void *from, size_t nbytes, int compression, Writer_t fnWriter) {
0064       R__ASSERT(from != nullptr);
0065 
0066       auto cxLevel = compression % 100;
0067       if (cxLevel == 0) {
0068          fnWriter(from, nbytes, 0);
0069          return nbytes;
0070       }
0071 
0072       auto cxAlgorithm = static_cast<ROOT::RCompressionSetting::EAlgorithm::EValues>(compression / 100);
0073       unsigned int nZipBlocks = 1 + (nbytes - 1) / kMAXZIPBUF;
0074       char *source = const_cast<char *>(static_cast<const char *>(from));
0075       int szTarget = kMAXZIPBUF;
0076       char *target = reinterpret_cast<char *>(fZipBuffer->data());
0077       int szOutBlock = 0;
0078       int szRemaining = nbytes;
0079       size_t szZipData = 0;
0080       for (unsigned int i = 0; i < nZipBlocks; ++i) {
0081          int szSource = std::min(static_cast<int>(kMAXZIPBUF), szRemaining);
0082          R__zipMultipleAlgorithm(cxLevel, &szSource, source, &szTarget, target, &szOutBlock, cxAlgorithm);
0083          R__ASSERT(szOutBlock >= 0);
0084          if ((szOutBlock == 0) || (szOutBlock >= szSource)) {
0085             // Uncompressible block, we have to store the entire input data stream uncompressed
0086             fnWriter(from, nbytes, 0);
0087             return nbytes;
0088          }
0089 
0090          fnWriter(target, szOutBlock, szZipData);
0091          szZipData += szOutBlock;
0092          source += szSource;
0093          szRemaining -= szSource;
0094       }
0095       R__ASSERT(szRemaining == 0);
0096       R__ASSERT(szZipData < nbytes);
0097       return szZipData;
0098    }
0099 
0100    /// Returns the size of the compressed data block. The data is written into the zip buffer.
0101    /// This works only for small input buffer up to 16MB (kMAXZIPBUF)
0102    size_t Zip(const void *from, size_t nbytes, int compression) {
0103       R__ASSERT(from != nullptr);
0104       R__ASSERT(nbytes <= kMAXZIPBUF);
0105 
0106       auto cxLevel = compression % 100;
0107       if (cxLevel == 0) {
0108          memcpy(fZipBuffer->data(), from, nbytes);
0109          return nbytes;
0110       }
0111 
0112       auto cxAlgorithm = static_cast<ROOT::RCompressionSetting::EAlgorithm::EValues>(compression / 100);
0113       int szSource = nbytes;
0114       char *source = const_cast<char *>(static_cast<const char *>(from));
0115       int szTarget = nbytes;
0116       char *target = reinterpret_cast<char *>(fZipBuffer->data());
0117       int szOut = 0;
0118       R__zipMultipleAlgorithm(cxLevel, &szSource, source, &szTarget, target, &szOut, cxAlgorithm);
0119       R__ASSERT(szOut >= 0);
0120       if ((szOut > 0) && (static_cast<unsigned int>(szOut) < nbytes))
0121          return szOut;
0122 
0123       memcpy(fZipBuffer->data(), from, nbytes);
0124       return nbytes;
0125    }
0126 
0127    /// Returns the size of the compressed data, written into the provided output buffer.
0128    static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to) {
0129       R__ASSERT(from != nullptr);
0130       R__ASSERT(to != nullptr);
0131       auto cxLevel = compression % 100;
0132       if (cxLevel == 0) {
0133          memcpy(to, from, nbytes);
0134          return nbytes;
0135       }
0136 
0137       auto cxAlgorithm = static_cast<ROOT::RCompressionSetting::EAlgorithm::EValues>(compression / 100);
0138       unsigned int nZipBlocks = 1 + (nbytes - 1) / kMAXZIPBUF;
0139       char *source = const_cast<char *>(static_cast<const char *>(from));
0140       int szTarget = nbytes;
0141       char *target = reinterpret_cast<char *>(to);
0142       int szOutBlock = 0;
0143       int szRemaining = nbytes;
0144       size_t szZipData = 0;
0145       for (unsigned int i = 0; i < nZipBlocks; ++i) {
0146          int szSource = std::min(static_cast<int>(kMAXZIPBUF), szRemaining);
0147          R__zipMultipleAlgorithm(cxLevel, &szSource, source, &szTarget, target, &szOutBlock, cxAlgorithm);
0148          R__ASSERT(szOutBlock >= 0);
0149          if ((szOutBlock == 0) || (szOutBlock >= szSource)) {
0150             // Uncompressible block, we have to store the entire input data stream uncompressed
0151             memcpy(to, from, nbytes);
0152             return nbytes;
0153          }
0154 
0155          szZipData += szOutBlock;
0156          source += szSource;
0157          target += szOutBlock;
0158          szRemaining -= szSource;
0159       }
0160       R__ASSERT(szRemaining == 0);
0161       R__ASSERT(szZipData < nbytes);
0162       return szZipData;
0163    }
0164 
0165    void *GetZipBuffer() { return fZipBuffer->data(); }
0166 };
0167 
0168 
0169 // clang-format off
0170 /**
0171 \class ROOT::Experimental::Internal::RNTupleDecompressor
0172 \ingroup NTuple
0173 \brief Helper class to uncompress data blocks in the ROOT compression frame format
0174 */
0175 // clang-format on
0176 class RNTupleDecompressor {
0177 private:
0178    using Buffer_t = std::array<unsigned char, kMAXZIPBUF>;
0179    std::unique_ptr<Buffer_t> fUnzipBuffer;
0180 
0181 public:
0182    RNTupleDecompressor() : fUnzipBuffer(std::make_unique<Buffer_t>()) {}
0183    RNTupleDecompressor(const RNTupleDecompressor &other) = delete;
0184    RNTupleDecompressor &operator =(const RNTupleDecompressor &other) = delete;
0185    RNTupleDecompressor(RNTupleDecompressor &&other) = default;
0186    RNTupleDecompressor &operator =(RNTupleDecompressor &&other) = default;
0187 
0188    /**
0189     * The nbytes parameter provides the size ls of the from buffer. The dataLen gives the size of the uncompressed data.
0190     * The block is uncompressed iff nbytes == dataLen.
0191     */
0192    void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to) {
0193       if (dataLen == nbytes) {
0194          memcpy(to, from, nbytes);
0195          return;
0196       }
0197       R__ASSERT(dataLen > nbytes);
0198 
0199       unsigned char *source = const_cast<unsigned char *>(static_cast<const unsigned char *>(from));
0200       unsigned char *target = static_cast<unsigned char *>(to);
0201       int szRemaining = dataLen;
0202       do {
0203          int szSource;
0204          int szTarget;
0205          int retval = R__unzip_header(&szSource, source, &szTarget);
0206          R__ASSERT(retval == 0);
0207          R__ASSERT(szSource > 0);
0208          R__ASSERT(szTarget > szSource);
0209          R__ASSERT(static_cast<unsigned int>(szSource) <= nbytes);
0210          R__ASSERT(static_cast<unsigned int>(szTarget) <= dataLen);
0211 
0212          int unzipBytes = 0;
0213          R__unzip(&szSource, source, &szTarget, target, &unzipBytes);
0214          R__ASSERT(unzipBytes == szTarget);
0215 
0216          target += szTarget;
0217          source += szSource;
0218          szRemaining -= unzipBytes;
0219       } while (szRemaining > 0);
0220       R__ASSERT(szRemaining == 0);
0221    }
0222 
0223    /**
0224     * In-place decompression via unzip buffer
0225     */
0226    void Unzip(void *fromto, size_t nbytes, size_t dataLen) {
0227       R__ASSERT(dataLen <= kMAXZIPBUF);
0228       Unzip(fromto, nbytes, dataLen, fUnzipBuffer->data());
0229       memcpy(fromto, fUnzipBuffer->data(), dataLen);
0230    }
0231 };
0232 
0233 } // namespace Internal
0234 } // namespace Experimental
0235 } // namespace ROOT
0236 
0237 #endif