Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-17 08:28:53

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cassert>
0021 #include <map>
0022 #include <memory>
0023 #include <string>
0024 #include <utility>
0025 
0026 #include "arrow/util/secure_string.h"
0027 #include "parquet/exception.h"
0028 #include "parquet/schema.h"
0029 #include "parquet/types.h"
0030 
0031 namespace parquet {
0032 
0033 static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
0034     ParquetCipher::AES_GCM_V1;
0035 static constexpr int32_t kMaximalAadMetadataLength = 256;
0036 static constexpr bool kDefaultEncryptedFooter = true;
0037 static constexpr bool kDefaultCheckSignature = true;
0038 static constexpr bool kDefaultAllowPlaintextFiles = false;
0039 static constexpr int32_t kAadFileUniqueLength = 8;
0040 
0041 class ColumnDecryptionProperties;
0042 using ColumnPathToDecryptionPropertiesMap =
0043     std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
0044 
0045 class ColumnEncryptionProperties;
0046 using ColumnPathToEncryptionPropertiesMap =
0047     std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
0048 
0049 class PARQUET_EXPORT DecryptionKeyRetriever {
0050  public:
0051   /// \brief Retrieve a key.
0052   virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0;
0053 
0054   virtual ~DecryptionKeyRetriever() {}
0055 };
0056 
0057 /// Simple integer key retriever
0058 class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
0059  public:
0060   void PutKey(uint32_t key_id, ::arrow::util::SecureString key);
0061 
0062   ::arrow::util::SecureString GetKey(const std::string& key_id_string) override {
0063     // key_id_string is string but for IntegerKeyIdRetriever it encodes
0064     // a native-endian 32 bit unsigned integer key_id
0065     uint32_t key_id;
0066     assert(key_id_string.size() == sizeof(key_id));
0067     memcpy(&key_id, key_id_string.data(), sizeof(key_id));
0068 
0069     return key_map_.at(key_id);
0070   }
0071 
0072  private:
0073   std::map<uint32_t, ::arrow::util::SecureString> key_map_;
0074 };
0075 
0076 // Simple string key retriever
0077 class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
0078  public:
0079   void PutKey(std::string key_id, ::arrow::util::SecureString key);
0080   ::arrow::util::SecureString GetKey(const std::string& key_id) override;
0081 
0082  private:
0083   std::map<std::string, ::arrow::util::SecureString> key_map_;
0084 };
0085 
0086 class PARQUET_EXPORT HiddenColumnException : public ParquetException {
0087  public:
0088   explicit HiddenColumnException(const std::string& columnPath)
0089       : ParquetException(columnPath.c_str()) {}
0090 };
0091 
0092 class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
0093  public:
0094   explicit KeyAccessDeniedException(const std::string& columnPath)
0095       : ParquetException(columnPath.c_str()) {}
0096 };
0097 
0098 inline const uint8_t* str2bytes(const std::string& str) {
0099   if (str.empty()) return NULLPTR;
0100 
0101   char* cbytes = const_cast<char*>(str.c_str());
0102   return reinterpret_cast<const uint8_t*>(cbytes);
0103 }
0104 
0105 inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
0106   if (str.empty()) {
0107     return {};
0108   }
0109 
0110   return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
0111 }
0112 
0113 class PARQUET_EXPORT ColumnEncryptionProperties {
0114  public:
0115   class PARQUET_EXPORT Builder {
0116    public:
0117     /// Convenience builder for encrypted columns.
0118     explicit Builder(std::string name) : Builder(std::move(name), true) {}
0119 
0120     /// Convenience builder for encrypted columns.
0121     explicit Builder(const schema::ColumnPath& path)
0122         : Builder(path.ToDotString(), true) {}
0123 
0124     /// Set a column-specific key.
0125     /// If key is not set on an encrypted column, the column will
0126     /// be encrypted with the footer key.
0127     /// keyBytes Key length must be either 16, 24 or 32 bytes.
0128     /// Caller is responsible for wiping out the input key array.
0129     Builder* key(::arrow::util::SecureString column_key);
0130 
0131     /// Set a key retrieval metadata.
0132     /// use either key_metadata() or key_id(), not both
0133     Builder* key_metadata(std::string key_metadata);
0134 
0135     /// A convenience function to set key metadata using a string id.
0136     /// Set a key retrieval metadata (converted from String).
0137     /// use either key_metadata() or key_id(), not both
0138     /// key_id will be converted to metadata (UTF-8 array).
0139     Builder* key_id(std::string key_id);
0140 
0141     std::shared_ptr<ColumnEncryptionProperties> build() {
0142       return std::shared_ptr<ColumnEncryptionProperties>(
0143           new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
0144     }
0145 
0146    private:
0147     std::string column_path_;
0148     bool encrypted_;
0149     ::arrow::util::SecureString key_;
0150     std::string key_metadata_;
0151 
0152     Builder(std::string path, bool encrypted)
0153         : column_path_(std::move(path)), encrypted_(encrypted) {}
0154   };
0155 
0156   const std::string& column_path() const { return column_path_; }
0157   bool is_encrypted() const { return encrypted_; }
0158   bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
0159   const ::arrow::util::SecureString& key() const { return key_; }
0160   const std::string& key_metadata() const { return key_metadata_; }
0161 
0162  private:
0163   std::string column_path_;
0164   bool encrypted_;
0165   bool encrypted_with_footer_key_;
0166   ::arrow::util::SecureString key_;
0167   std::string key_metadata_;
0168   explicit ColumnEncryptionProperties(bool encrypted, std::string column_path,
0169                                       ::arrow::util::SecureString key,
0170                                       std::string key_metadata);
0171 };
0172 
0173 class PARQUET_EXPORT ColumnDecryptionProperties {
0174  public:
0175   class PARQUET_EXPORT Builder {
0176    public:
0177     explicit Builder(std::string name) : column_path_(std::move(name)) {}
0178 
0179     explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {}
0180 
0181     /// Set an explicit column key. If applied on a file that contains
0182     /// key metadata for this column the metadata will be ignored,
0183     /// the column will be decrypted with this key.
0184     /// key length must be either 16, 24 or 32 bytes.
0185     Builder* key(::arrow::util::SecureString key);
0186 
0187     std::shared_ptr<ColumnDecryptionProperties> build();
0188 
0189    private:
0190     std::string column_path_;
0191     ::arrow::util::SecureString key_;
0192   };
0193 
0194   const std::string& column_path() const { return column_path_; }
0195   const ::arrow::util::SecureString& key() const { return key_; }
0196 
0197  private:
0198   std::string column_path_;
0199   ::arrow::util::SecureString key_;
0200 
0201   /// This class is only required for setting explicit column decryption keys -
0202   /// to override key retriever (or to provide keys when key metadata and/or
0203   /// key retriever are not available)
0204   explicit ColumnDecryptionProperties(std::string column_path,
0205                                       ::arrow::util::SecureString key);
0206 };
0207 
0208 class PARQUET_EXPORT AADPrefixVerifier {
0209  public:
0210   /// Verifies identity (AAD Prefix) of individual file,
0211   /// or of file collection in a data set.
0212   /// Throws exception if an AAD prefix is wrong.
0213   /// In a data set, AAD Prefixes should be collected,
0214   /// and then checked for missing files.
0215   virtual void Verify(const std::string& aad_prefix) = 0;
0216   virtual ~AADPrefixVerifier() {}
0217 };
0218 
0219 class PARQUET_EXPORT FileDecryptionProperties {
0220  public:
0221   class PARQUET_EXPORT Builder {
0222    public:
0223     Builder() {
0224       check_plaintext_footer_integrity_ = kDefaultCheckSignature;
0225       plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
0226     }
0227 
0228     /// Set an explicit footer key. If applied on a file that contains
0229     /// footer key metadata the metadata will be ignored, the footer
0230     /// will be decrypted/verified with this key.
0231     /// If explicit key is not set, footer key will be fetched from
0232     /// key retriever.
0233     /// With explicit keys or AAD prefix, new encryption properties object must be
0234     /// created for each encrypted file.
0235     /// Explicit encryption keys (footer and column) are cloned.
0236     /// Upon completion of file reading, the cloned encryption keys in the properties
0237     /// will be wiped out (array values set to 0).
0238     /// Caller is responsible for wiping out the input key array.
0239     /// param footerKey Key length must be either 16, 24 or 32 bytes.
0240     Builder* footer_key(::arrow::util::SecureString footer_key);
0241 
0242     /// Set explicit column keys (decryption properties).
0243     /// Its also possible to set a key retriever on this property object.
0244     /// Upon file decryption, availability of explicit keys is checked before
0245     /// invocation of the retriever callback.
0246     /// If an explicit key is available for a footer or a column,
0247     /// its key metadata will be ignored.
0248     Builder* column_keys(
0249         ColumnPathToDecryptionPropertiesMap column_decryption_properties);
0250 
0251     /// Set a key retriever callback. Its also possible to
0252     /// set explicit footer or column keys on this file property object.
0253     /// Upon file decryption, availability of explicit keys is checked before
0254     /// invocation of the retriever callback.
0255     /// If an explicit key is available for a footer or a column,
0256     /// its key metadata will be ignored.
0257     Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever);
0258 
0259     /// Skip integrity verification of plaintext footers.
0260     /// If not called, integrity of plaintext footers will be checked in runtime,
0261     /// and an exception will be thrown in the following situations:
0262     /// - footer signing key is not available
0263     /// (not passed, or not found by key retriever)
0264     /// - footer content and signature don't match
0265     Builder* disable_footer_signature_verification() {
0266       check_plaintext_footer_integrity_ = false;
0267       return this;
0268     }
0269 
0270     /// Explicitly supply the file AAD prefix.
0271     /// A must when a prefix is used for file encryption, but not stored in file.
0272     /// If AAD prefix is stored in file, it will be compared to the explicitly
0273     /// supplied value and an exception will be thrown if they differ.
0274     Builder* aad_prefix(std::string aad_prefix);
0275 
0276     /// Set callback for verification of AAD Prefixes stored in file.
0277     Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
0278 
0279     /// By default, reading plaintext (unencrypted) files is not
0280     /// allowed when using a decryptor
0281     /// - in order to detect files that were not encrypted by mistake.
0282     /// However, the default behavior can be overridden by calling this method.
0283     /// The caller should use then a different method to ensure encryption
0284     /// of files with sensitive data.
0285     Builder* plaintext_files_allowed() {
0286       plaintext_files_allowed_ = true;
0287       return this;
0288     }
0289 
0290     std::shared_ptr<FileDecryptionProperties> build() {
0291       return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
0292           footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
0293           aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
0294     }
0295 
0296    private:
0297     ::arrow::util::SecureString footer_key_;
0298     std::string aad_prefix_;
0299     std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
0300     ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
0301 
0302     std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
0303     bool check_plaintext_footer_integrity_;
0304     bool plaintext_files_allowed_;
0305   };
0306 
0307   const ::arrow::util::SecureString& column_key(const std::string& column_path) const;
0308 
0309   const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
0310 
0311   const std::string& aad_prefix() const { return aad_prefix_; }
0312 
0313   const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
0314     return key_retriever_;
0315   }
0316 
0317   bool check_plaintext_footer_integrity() const {
0318     return check_plaintext_footer_integrity_;
0319   }
0320 
0321   bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
0322 
0323   const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
0324     return aad_prefix_verifier_;
0325   }
0326 
0327  private:
0328   ::arrow::util::SecureString footer_key_;
0329   std::string aad_prefix_;
0330   std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
0331   ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
0332   std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
0333   bool check_plaintext_footer_integrity_;
0334   bool plaintext_files_allowed_;
0335 
0336   FileDecryptionProperties(
0337       ::arrow::util::SecureString footer_key,
0338       std::shared_ptr<DecryptionKeyRetriever> key_retriever,
0339       bool check_plaintext_footer_integrity, std::string aad_prefix,
0340       std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
0341       ColumnPathToDecryptionPropertiesMap column_decryption_properties,
0342       bool plaintext_files_allowed);
0343 };
0344 
0345 class PARQUET_EXPORT FileEncryptionProperties {
0346  public:
0347   class PARQUET_EXPORT Builder {
0348    public:
0349     explicit Builder(::arrow::util::SecureString footer_key)
0350         : parquet_cipher_(kDefaultEncryptionAlgorithm),
0351           encrypted_footer_(kDefaultEncryptedFooter),
0352           footer_key_(std::move(footer_key)) {
0353       store_aad_prefix_in_file_ = false;
0354     }
0355 
0356     /// Create files with plaintext footer.
0357     /// If not called, the files will be created with encrypted footer (default).
0358     Builder* set_plaintext_footer() {
0359       encrypted_footer_ = false;
0360       return this;
0361     }
0362 
0363     /// Set encryption algorithm.
0364     /// If not called, files will be encrypted with AES_GCM_V1 (default).
0365     Builder* algorithm(ParquetCipher::type parquet_cipher) {
0366       parquet_cipher_ = parquet_cipher;
0367       return this;
0368     }
0369 
0370     /// Set a key retrieval metadata (converted from String).
0371     /// use either footer_key_metadata or footer_key_id, not both.
0372     Builder* footer_key_id(std::string key_id);
0373 
0374     /// Set a key retrieval metadata.
0375     /// use either footer_key_metadata or footer_key_id, not both.
0376     Builder* footer_key_metadata(std::string footer_key_metadata);
0377 
0378     /// Set the file AAD Prefix.
0379     Builder* aad_prefix(std::string aad_prefix);
0380 
0381     /// Skip storing AAD Prefix in file.
0382     /// If not called, and if AAD Prefix is set, it will be stored.
0383     Builder* disable_aad_prefix_storage();
0384 
0385     /// Set the list of encrypted columns and their properties (keys etc).
0386     /// If not called, all columns will be encrypted with the footer key.
0387     /// If called, the file columns not in the list will be left unencrypted.
0388     Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns);
0389 
0390     std::shared_ptr<FileEncryptionProperties> build() {
0391       return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
0392           parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
0393           aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
0394     }
0395 
0396    private:
0397     ParquetCipher::type parquet_cipher_;
0398     bool encrypted_footer_;
0399     ::arrow::util::SecureString footer_key_;
0400     std::string footer_key_metadata_;
0401 
0402     std::string aad_prefix_;
0403     bool store_aad_prefix_in_file_;
0404     ColumnPathToEncryptionPropertiesMap encrypted_columns_;
0405   };
0406 
0407   bool encrypted_footer() const { return encrypted_footer_; }
0408 
0409   EncryptionAlgorithm algorithm() const { return algorithm_; }
0410 
0411   const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
0412 
0413   const std::string& footer_key_metadata() const { return footer_key_metadata_; }
0414 
0415   const std::string& file_aad() const { return file_aad_; }
0416 
0417   std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
0418       const std::string& column_path);
0419 
0420   const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const {
0421     return encrypted_columns_;
0422   }
0423 
0424  private:
0425   EncryptionAlgorithm algorithm_;
0426   ::arrow::util::SecureString footer_key_;
0427   std::string footer_key_metadata_;
0428   bool encrypted_footer_;
0429   std::string file_aad_;
0430   std::string aad_prefix_;
0431   bool store_aad_prefix_in_file_;
0432   ColumnPathToEncryptionPropertiesMap encrypted_columns_;
0433 
0434   FileEncryptionProperties(ParquetCipher::type cipher,
0435                            ::arrow::util::SecureString footer_key,
0436                            std::string footer_key_metadata, bool encrypted_footer,
0437                            std::string aad_prefix, bool store_aad_prefix_in_file,
0438                            ColumnPathToEncryptionPropertiesMap encrypted_columns);
0439 };
0440 
0441 }  // namespace parquet