File indexing completed on 2026-04-17 08:28:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #pragma once
0023
0024 #include <cstdint>
0025 #include <memory>
0026 #include <optional>
0027 #include <string>
0028
0029 #include "parquet/size_statistics.h"
0030 #include "parquet/statistics.h"
0031 #include "parquet/types.h"
0032
0033 namespace parquet {
0034
0035
0036
0037
0038
0039
0040
0041
0042 class Page {
0043 public:
0044 Page(const std::shared_ptr<Buffer>& buffer, PageType::type type)
0045 : buffer_(buffer), type_(type) {}
0046
0047 PageType::type type() const { return type_; }
0048
0049 std::shared_ptr<Buffer> buffer() const { return buffer_; }
0050
0051
0052 const uint8_t* data() const { return buffer_->data(); }
0053
0054
0055 int32_t size() const { return static_cast<int32_t>(buffer_->size()); }
0056
0057 private:
0058 std::shared_ptr<Buffer> buffer_;
0059 PageType::type type_;
0060 };
0061
0062
0063 class DataPage : public Page {
0064 public:
0065 int32_t num_values() const { return num_values_; }
0066 Encoding::type encoding() const { return encoding_; }
0067 int64_t uncompressed_size() const { return uncompressed_size_; }
0068 const EncodedStatistics& statistics() const { return statistics_; }
0069
0070
0071
0072 std::optional<int64_t> first_row_index() const { return first_row_index_; }
0073 const SizeStatistics& size_statistics() const { return size_statistics_; }
0074
0075 virtual ~DataPage() = default;
0076
0077 protected:
0078 DataPage(PageType::type type, const std::shared_ptr<Buffer>& buffer, int32_t num_values,
0079 Encoding::type encoding, int64_t uncompressed_size,
0080 EncodedStatistics statistics, std::optional<int64_t> first_row_index,
0081 SizeStatistics size_statistics)
0082 : Page(buffer, type),
0083 num_values_(num_values),
0084 encoding_(encoding),
0085 uncompressed_size_(uncompressed_size),
0086 statistics_(std::move(statistics)),
0087 first_row_index_(std::move(first_row_index)),
0088 size_statistics_(std::move(size_statistics)) {}
0089
0090 int32_t num_values_;
0091 Encoding::type encoding_;
0092 int64_t uncompressed_size_;
0093 EncodedStatistics statistics_;
0094
0095 std::optional<int64_t> first_row_index_;
0096 SizeStatistics size_statistics_;
0097 };
0098
0099 class DataPageV1 : public DataPage {
0100 public:
0101 DataPageV1(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
0102 Encoding::type encoding, Encoding::type definition_level_encoding,
0103 Encoding::type repetition_level_encoding, int64_t uncompressed_size,
0104 EncodedStatistics statistics = EncodedStatistics(),
0105 std::optional<int64_t> first_row_index = std::nullopt,
0106 SizeStatistics size_statistics = SizeStatistics())
0107 : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size,
0108 std::move(statistics), std::move(first_row_index),
0109 std::move(size_statistics)),
0110 definition_level_encoding_(definition_level_encoding),
0111 repetition_level_encoding_(repetition_level_encoding) {}
0112
0113 Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; }
0114
0115 Encoding::type definition_level_encoding() const { return definition_level_encoding_; }
0116
0117 private:
0118 Encoding::type definition_level_encoding_;
0119 Encoding::type repetition_level_encoding_;
0120 };
0121
0122 class DataPageV2 : public DataPage {
0123 public:
0124 DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls,
0125 int32_t num_rows, Encoding::type encoding,
0126 int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length,
0127 int64_t uncompressed_size, bool is_compressed = false,
0128 EncodedStatistics statistics = EncodedStatistics(),
0129 std::optional<int64_t> first_row_index = std::nullopt,
0130 SizeStatistics size_statistics = SizeStatistics())
0131 : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size,
0132 std::move(statistics), std::move(first_row_index),
0133 std::move(size_statistics)),
0134 num_nulls_(num_nulls),
0135 num_rows_(num_rows),
0136 definition_levels_byte_length_(definition_levels_byte_length),
0137 repetition_levels_byte_length_(repetition_levels_byte_length),
0138 is_compressed_(is_compressed) {}
0139
0140 int32_t num_nulls() const { return num_nulls_; }
0141
0142 int32_t num_rows() const { return num_rows_; }
0143
0144 int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; }
0145
0146 int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; }
0147
0148 bool is_compressed() const { return is_compressed_; }
0149
0150 private:
0151 int32_t num_nulls_;
0152 int32_t num_rows_;
0153 int32_t definition_levels_byte_length_;
0154 int32_t repetition_levels_byte_length_;
0155 bool is_compressed_;
0156 };
0157
0158 class DictionaryPage : public Page {
0159 public:
0160 DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
0161 Encoding::type encoding, bool is_sorted = false)
0162 : Page(buffer, PageType::DICTIONARY_PAGE),
0163 num_values_(num_values),
0164 encoding_(encoding),
0165 is_sorted_(is_sorted) {}
0166
0167 int32_t num_values() const { return num_values_; }
0168
0169 Encoding::type encoding() const { return encoding_; }
0170
0171 bool is_sorted() const { return is_sorted_; }
0172
0173 private:
0174 int32_t num_values_;
0175 Encoding::type encoding_;
0176 bool is_sorted_;
0177 };
0178
0179 }