File indexing completed on 2026-04-17 08:28:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <utility>
0023
0024 #include "parquet/metadata.h"
0025 #include "parquet/platform.h"
0026 #include "parquet/properties.h"
0027 #include "parquet/schema.h"
0028
0029 namespace parquet {
0030
0031 class ColumnWriter;
0032
0033
0034 static constexpr uint8_t kParquetMagic[4] = {'P', 'A', 'R', '1'};
0035 static constexpr uint8_t kParquetEMagic[4] = {'P', 'A', 'R', 'E'};
0036
0037 class PARQUET_EXPORT RowGroupWriter {
0038 public:
0039
0040
0041
0042 struct Contents {
0043 virtual ~Contents() = default;
0044 virtual int num_columns() const = 0;
0045 virtual int64_t num_rows() const = 0;
0046
0047
0048 virtual ColumnWriter* NextColumn() = 0;
0049
0050 virtual ColumnWriter* column(int i) = 0;
0051
0052 virtual int current_column() const = 0;
0053 virtual void Close() = 0;
0054
0055
0056 virtual int64_t total_bytes_written() const = 0;
0057
0058 virtual int64_t total_compressed_bytes() const = 0;
0059
0060 virtual int64_t total_compressed_bytes_written() const = 0;
0061
0062 virtual bool buffered() const = 0;
0063 };
0064
0065 explicit RowGroupWriter(std::unique_ptr<Contents> contents);
0066
0067
0068
0069
0070
0071
0072
0073
0074 ColumnWriter* NextColumn();
0075
0076
0077 int current_column();
0078 void Close();
0079
0080 int num_columns() const;
0081
0082
0083
0084
0085
0086
0087
0088 ColumnWriter* column(int i);
0089
0090
0091
0092
0093 int64_t num_rows() const;
0094
0095
0096 int64_t total_bytes_written() const;
0097
0098
0099 int64_t total_compressed_bytes() const;
0100
0101 int64_t total_compressed_bytes_written() const;
0102
0103
0104
0105 bool buffered() const;
0106
0107 private:
0108
0109 std::unique_ptr<Contents> contents_;
0110 };
0111
0112 PARQUET_EXPORT
0113 void WriteFileMetaData(const FileMetaData& file_metadata,
0114 ::arrow::io::OutputStream* sink);
0115
0116 PARQUET_EXPORT
0117 void WriteMetaDataFile(const FileMetaData& file_metadata,
0118 ::arrow::io::OutputStream* sink);
0119
0120 PARQUET_EXPORT
0121 void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
0122 ArrowOutputStream* sink,
0123 const std::shared_ptr<Encryptor>& encryptor,
0124 bool encrypt_footer);
0125
0126 PARQUET_EXPORT
0127 void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
0128 ::arrow::io::OutputStream* sink,
0129 const std::shared_ptr<Encryptor>& encryptor = NULLPTR,
0130 bool encrypt_footer = false);
0131 PARQUET_EXPORT
0132 void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata,
0133 ::arrow::io::OutputStream* sink);
0134
0135 class PARQUET_EXPORT ParquetFileWriter {
0136 public:
0137
0138
0139
0140 struct Contents {
0141 Contents(std::shared_ptr<::parquet::schema::GroupNode> schema,
0142 std::shared_ptr<const KeyValueMetadata> key_value_metadata)
0143 : schema_(), key_value_metadata_(std::move(key_value_metadata)) {
0144 schema_.Init(std::move(schema));
0145 }
0146 virtual ~Contents() {}
0147
0148 virtual void Close() = 0;
0149
0150 virtual RowGroupWriter* AppendRowGroup() = 0;
0151 virtual RowGroupWriter* AppendBufferedRowGroup() = 0;
0152
0153 virtual int64_t num_rows() const = 0;
0154 virtual int num_columns() const = 0;
0155 virtual int num_row_groups() const = 0;
0156
0157 virtual const std::shared_ptr<WriterProperties>& properties() const = 0;
0158
0159 const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const {
0160 return key_value_metadata_;
0161 }
0162
0163 virtual void AddKeyValueMetadata(
0164 const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) = 0;
0165
0166
0167 const SchemaDescriptor* schema() const { return &schema_; }
0168
0169 SchemaDescriptor schema_;
0170
0171
0172 std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
0173
0174 const std::shared_ptr<FileMetaData>& metadata() const { return file_metadata_; }
0175 std::shared_ptr<FileMetaData> file_metadata_;
0176 };
0177
0178 ParquetFileWriter();
0179 ~ParquetFileWriter();
0180
0181 static std::unique_ptr<ParquetFileWriter> Open(
0182 std::shared_ptr<::arrow::io::OutputStream> sink,
0183 std::shared_ptr<schema::GroupNode> schema,
0184 std::shared_ptr<WriterProperties> properties = default_writer_properties(),
0185 std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
0186
0187 void Open(std::unique_ptr<Contents> contents);
0188 void Close();
0189
0190
0191
0192
0193
0194 RowGroupWriter* AppendRowGroup();
0195
0196
0197
0198
0199
0200
0201 RowGroupWriter* AppendBufferedRowGroup();
0202
0203
0204
0205
0206
0207 void AddKeyValueMetadata(
0208 const std::shared_ptr<const KeyValueMetadata>& key_value_metadata);
0209
0210
0211
0212
0213
0214 int num_columns() const;
0215
0216
0217
0218
0219 int64_t num_rows() const;
0220
0221
0222 int num_row_groups() const;
0223
0224
0225 const std::shared_ptr<WriterProperties>& properties() const;
0226
0227
0228 const SchemaDescriptor* schema() const;
0229
0230
0231 const ColumnDescriptor* descr(int i) const;
0232
0233
0234 const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
0235
0236
0237 const std::shared_ptr<FileMetaData> metadata() const;
0238
0239 private:
0240
0241 std::unique_ptr<Contents> contents_;
0242 std::shared_ptr<FileMetaData> file_metadata_;
0243 };
0244
0245 }