Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:07

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <limits>
0022 #include <memory>
0023 #include <optional>
0024 #include <string>
0025 
0026 #include "arrow/result.h"
0027 #include "arrow/status.h"
0028 #include "arrow/util/type_fwd.h"
0029 #include "arrow/util/visibility.h"
0030 
0031 namespace arrow {
0032 namespace util {
0033 
0034 constexpr int kUseDefaultCompressionLevel = std::numeric_limits<int>::min();
0035 
0036 /// \brief Streaming compressor interface
0037 ///
0038 class ARROW_EXPORT Compressor {
0039  public:
0040   virtual ~Compressor() = default;
0041 
0042   struct CompressResult {
0043     int64_t bytes_read;
0044     int64_t bytes_written;
0045   };
0046   struct FlushResult {
0047     int64_t bytes_written;
0048     bool should_retry;
0049   };
0050   struct EndResult {
0051     int64_t bytes_written;
0052     bool should_retry;
0053   };
0054 
0055   /// \brief Compress some input.
0056   ///
0057   /// If bytes_read is 0 on return, then a larger output buffer should be supplied.
0058   virtual Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
0059                                           int64_t output_len, uint8_t* output) = 0;
0060 
0061   /// \brief Flush part of the compressed output.
0062   ///
0063   /// If should_retry is true on return, Flush() should be called again
0064   /// with a larger buffer.
0065   virtual Result<FlushResult> Flush(int64_t output_len, uint8_t* output) = 0;
0066 
0067   /// \brief End compressing, doing whatever is necessary to end the stream.
0068   ///
0069   /// If should_retry is true on return, End() should be called again
0070   /// with a larger buffer.  Otherwise, the Compressor should not be used anymore.
0071   ///
0072   /// End() implies Flush().
0073   virtual Result<EndResult> End(int64_t output_len, uint8_t* output) = 0;
0074 
0075   // XXX add methods for buffer size heuristics?
0076 };
0077 
0078 /// \brief Streaming decompressor interface
0079 ///
0080 class ARROW_EXPORT Decompressor {
0081  public:
0082   virtual ~Decompressor() = default;
0083 
0084   struct DecompressResult {
0085     // XXX is need_more_output necessary? (Brotli?)
0086     int64_t bytes_read;
0087     int64_t bytes_written;
0088     bool need_more_output;
0089   };
0090 
0091   /// \brief Decompress some input.
0092   ///
0093   /// If need_more_output is true on return, a larger output buffer needs
0094   /// to be supplied.
0095   virtual Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
0096                                               int64_t output_len, uint8_t* output) = 0;
0097 
0098   /// \brief Return whether the compressed stream is finished.
0099   ///
0100   /// This is a heuristic.  If true is returned, then it is guaranteed
0101   /// that the stream is finished.  If false is returned, however, it may
0102   /// simply be that the underlying library isn't able to provide the information.
0103   virtual bool IsFinished() = 0;
0104 
0105   /// \brief Reinitialize decompressor, making it ready for a new compressed stream.
0106   virtual Status Reset() = 0;
0107 
0108   // XXX add methods for buffer size heuristics?
0109 };
0110 
0111 /// \brief Compression codec options
0112 class ARROW_EXPORT CodecOptions {
0113  public:
0114   explicit CodecOptions(int compression_level = kUseDefaultCompressionLevel)
0115       : compression_level(compression_level) {}
0116 
0117   virtual ~CodecOptions() = default;
0118 
0119   int compression_level;
0120 };
0121 
0122 // ----------------------------------------------------------------------
0123 // GZip codec options implementation
0124 
0125 enum class GZipFormat {
0126   ZLIB,
0127   DEFLATE,
0128   GZIP,
0129 };
0130 
0131 class ARROW_EXPORT GZipCodecOptions : public CodecOptions {
0132  public:
0133   GZipFormat gzip_format = GZipFormat::GZIP;
0134   std::optional<int> window_bits;
0135 };
0136 
0137 // ----------------------------------------------------------------------
0138 // brotli codec options implementation
0139 
0140 class ARROW_EXPORT BrotliCodecOptions : public CodecOptions {
0141  public:
0142   std::optional<int> window_bits;
0143 };
0144 
0145 /// \brief Compression codec
0146 class ARROW_EXPORT Codec {
0147  public:
0148   virtual ~Codec() = default;
0149 
0150   /// \brief Return special value to indicate that a codec implementation
0151   /// should use its default compression level
0152   static int UseDefaultCompressionLevel();
0153 
0154   /// \brief Return a string name for compression type
0155   static const std::string& GetCodecAsString(Compression::type t);
0156 
0157   /// \brief Return compression type for name (all lower case)
0158   static Result<Compression::type> GetCompressionType(const std::string& name);
0159 
0160   /// \brief Create a codec for the given compression algorithm with CodecOptions
0161   static Result<std::unique_ptr<Codec>> Create(
0162       Compression::type codec, const CodecOptions& codec_options = CodecOptions{});
0163 
0164   /// \brief Create a codec for the given compression algorithm
0165   static Result<std::unique_ptr<Codec>> Create(Compression::type codec,
0166                                                int compression_level);
0167 
0168   /// \brief Return true if support for indicated codec has been enabled
0169   static bool IsAvailable(Compression::type codec);
0170 
0171   /// \brief Return true if indicated codec supports setting a compression level
0172   static bool SupportsCompressionLevel(Compression::type codec);
0173 
0174   /// \brief Return the smallest supported compression level for the codec
0175   /// Note: This function creates a temporary Codec instance
0176   static Result<int> MinimumCompressionLevel(Compression::type codec);
0177 
0178   /// \brief Return the largest supported compression level for the codec
0179   /// Note: This function creates a temporary Codec instance
0180   static Result<int> MaximumCompressionLevel(Compression::type codec);
0181 
0182   /// \brief Return the default compression level
0183   /// Note: This function creates a temporary Codec instance
0184   static Result<int> DefaultCompressionLevel(Compression::type codec);
0185 
0186   /// \brief Return the smallest supported compression level
0187   virtual int minimum_compression_level() const = 0;
0188 
0189   /// \brief Return the largest supported compression level
0190   virtual int maximum_compression_level() const = 0;
0191 
0192   /// \brief Return the default compression level
0193   virtual int default_compression_level() const = 0;
0194 
0195   /// \brief One-shot decompression function
0196   ///
0197   /// output_buffer_len must be correct and therefore be obtained in advance.
0198   /// The actual decompressed length is returned.
0199   ///
0200   /// \note One-shot decompression is not always compatible with streaming
0201   /// compression.  Depending on the codec (e.g. LZ4), different formats may
0202   /// be used.
0203   virtual Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
0204                                      int64_t output_buffer_len,
0205                                      uint8_t* output_buffer) = 0;
0206 
0207   /// \brief One-shot compression function
0208   ///
0209   /// output_buffer_len must first have been computed using MaxCompressedLen().
0210   /// The actual compressed length is returned.
0211   ///
0212   /// \note One-shot compression is not always compatible with streaming
0213   /// decompression.  Depending on the codec (e.g. LZ4), different formats may
0214   /// be used.
0215   virtual Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
0216                                    int64_t output_buffer_len, uint8_t* output_buffer) = 0;
0217 
0218   virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
0219 
0220   /// \brief Create a streaming compressor instance
0221   virtual Result<std::shared_ptr<Compressor>> MakeCompressor() = 0;
0222 
0223   /// \brief Create a streaming compressor instance
0224   virtual Result<std::shared_ptr<Decompressor>> MakeDecompressor() = 0;
0225 
0226   /// \brief This Codec's compression type
0227   virtual Compression::type compression_type() const = 0;
0228 
0229   /// \brief The name of this Codec's compression type
0230   const std::string& name() const { return GetCodecAsString(compression_type()); }
0231 
0232   /// \brief This Codec's compression level, if applicable
0233   virtual int compression_level() const { return UseDefaultCompressionLevel(); }
0234 
0235  private:
0236   /// \brief Initializes the codec's resources.
0237   virtual Status Init();
0238 };
0239 
0240 }  // namespace util
0241 }  // namespace arrow