File indexing completed on 2025-02-21 09:58:12
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #pragma once
0028
0029 #include <algorithm>
0030 #include <array>
0031 #include <fstream>
0032 #include <limits>
0033 #include <sstream>
0034 #include <stdexcept>
0035 #include <string>
0036 #include <tuple>
0037 #include <type_traits>
0038 #include <utility>
0039 #include <vector>
0040
0041 namespace dfe {
0042 namespace io_dsv_impl {
0043
0044
0045 template<char Delimiter>
0046 class DsvWriter {
0047 public:
0048 DsvWriter() = delete;
0049 DsvWriter(const DsvWriter&) = delete;
0050 DsvWriter(DsvWriter&&) = default;
0051 ~DsvWriter() = default;
0052 DsvWriter& operator=(const DsvWriter&) = delete;
0053 DsvWriter& operator=(DsvWriter&&) = default;
0054
0055
0056
0057
0058
0059
0060 DsvWriter(
0061 const std::vector<std::string>& columns, const std::string& path,
0062 int precision = std::numeric_limits<double>::max_digits10);
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072 template<typename Arg0, typename... Args>
0073 void append(Arg0&& arg0, Args&&... args);
0074
0075 private:
0076 std::ofstream m_file;
0077 std::size_t m_num_columns;
0078
0079
0080 template<typename T>
0081 static std::enable_if_t<
0082 std::is_arithmetic<std::decay_t<T>>::value
0083 or std::is_convertible<T, std::string>::value,
0084 unsigned>
0085 write(T&& x, std::ostream& os);
0086 template<typename T, typename Allocator>
0087 static unsigned write(const std::vector<T, Allocator>& xs, std::ostream& os);
0088 };
0089
0090
0091 template<char Delimiter>
0092 class DsvReader {
0093 public:
0094 DsvReader() = delete;
0095 DsvReader(const DsvReader&) = delete;
0096 DsvReader(DsvReader&&) = default;
0097 ~DsvReader() = default;
0098 DsvReader& operator=(const DsvReader&) = delete;
0099 DsvReader& operator=(DsvReader&&) = default;
0100
0101
0102
0103
0104 DsvReader(const std::string& path);
0105
0106
0107
0108
0109
0110 bool read(std::vector<std::string>& columns);
0111
0112
0113 std::size_t num_lines() const { return m_num_lines; }
0114
0115 private:
0116 std::ifstream m_file;
0117 std::string m_line;
0118 std::size_t m_num_lines = 0;
0119 };
0120
0121
0122 template<char Delimiter, typename NamedTuple>
0123 class NamedTupleDsvWriter {
0124 public:
0125 NamedTupleDsvWriter() = delete;
0126 NamedTupleDsvWriter(const NamedTupleDsvWriter&) = delete;
0127 NamedTupleDsvWriter(NamedTupleDsvWriter&&) = default;
0128 ~NamedTupleDsvWriter() = default;
0129 NamedTupleDsvWriter& operator=(const NamedTupleDsvWriter&) = delete;
0130 NamedTupleDsvWriter& operator=(NamedTupleDsvWriter&&) = default;
0131
0132
0133
0134
0135
0136 NamedTupleDsvWriter(
0137 const std::string& path,
0138 int precision = std::numeric_limits<double>::max_digits10)
0139 : m_writer(colum_names(), path, precision) {}
0140
0141
0142 void append(const NamedTuple& record) {
0143 append_impl(
0144 record, std::make_index_sequence<
0145 std::tuple_size<typename NamedTuple::Tuple>::value>{});
0146 }
0147
0148 private:
0149 DsvWriter<Delimiter> m_writer;
0150
0151 static std::vector<std::string> colum_names() {
0152 const auto& from_record = NamedTuple::names();
0153 return {from_record.begin(), from_record.end()};
0154 }
0155 template<std::size_t... I>
0156 void append_impl(const NamedTuple& values, std::index_sequence<I...>) {
0157 using std::get;
0158 m_writer.append(get<I>(values)...);
0159 }
0160 };
0161
0162
0163
0164 template<typename T>
0165 static void
0166 parse(const std::string& str, T& value) {
0167
0168 std::istringstream is(str);
0169 is >> value;
0170 }
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182 template<char Delimiter, typename NamedTuple>
0183 class NamedTupleDsvReader {
0184 public:
0185 NamedTupleDsvReader() = delete;
0186 NamedTupleDsvReader(const NamedTupleDsvReader&) = delete;
0187 NamedTupleDsvReader(NamedTupleDsvReader&&) = default;
0188 ~NamedTupleDsvReader() = default;
0189 NamedTupleDsvReader& operator=(const NamedTupleDsvReader&) = delete;
0190 NamedTupleDsvReader& operator=(NamedTupleDsvReader&&) = default;
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200 NamedTupleDsvReader(
0201 const std::string& path,
0202 const std::vector<std::string>& optional_columns = {},
0203 bool verify_header = true);
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213 bool read(NamedTuple& record);
0214
0215
0216
0217
0218
0219 template<typename T>
0220 bool read(NamedTuple& record, std::vector<T>& extra);
0221
0222
0223 std::size_t num_extra_columns() const { return m_extra_columns.size(); }
0224
0225 std::size_t num_records() const { return m_reader.num_lines() - 1u; }
0226
0227 private:
0228
0229 using Tuple = typename NamedTuple::Tuple;
0230
0231 DsvReader<Delimiter> m_reader;
0232 std::vector<std::string> m_columns;
0233
0234 std::size_t m_num_columns = SIZE_MAX;
0235
0236 std::array<std::size_t, std::tuple_size<Tuple>::value> m_tuple_column_map;
0237
0238 std::vector<std::size_t> m_extra_columns;
0239
0240 void use_default_columns();
0241 void parse_header(const std::vector<std::string>& optional_columns);
0242 template<std::size_t... I>
0243 void parse_record(NamedTuple& record, std::index_sequence<I...>) const {
0244
0245
0246 using Vacuum = int[];
0247 (void)Vacuum{(parse_element<I>(record), 0)...};
0248 }
0249 template<std::size_t I>
0250 void parse_element(NamedTuple& record) const {
0251 using std::get;
0252 if (m_tuple_column_map[I] != SIZE_MAX) {
0253 parse(m_columns[m_tuple_column_map[I]], get<I>(record));
0254 }
0255 }
0256 };
0257
0258
0259
0260 template<char Delimiter>
0261 inline DsvWriter<Delimiter>::DsvWriter(
0262 const std::vector<std::string>& columns, const std::string& path,
0263 int precision)
0264 : m_file(
0265 path, std::ios_base::binary | std::ios_base::out | std::ios_base::trunc)
0266 , m_num_columns(columns.size()) {
0267 if (not m_file.is_open() or m_file.fail()) {
0268 throw std::runtime_error("Could not open file '" + path + "'");
0269 }
0270 m_file.precision(precision);
0271 if (m_num_columns == 0) {
0272 throw std::invalid_argument("No columns were specified");
0273 }
0274
0275 append(columns);
0276 }
0277
0278 template<char Delimiter>
0279 template<typename Arg0, typename... Args>
0280 inline void
0281 DsvWriter<Delimiter>::append(Arg0&& arg0, Args&&... args) {
0282
0283
0284 std::stringstream line;
0285
0286 line.precision(m_file.precision());
0287 unsigned written_columns[] = {
0288
0289 write(std::forward<Arg0>(arg0), line),
0290
0291
0292
0293
0294
0295 (line << Delimiter, write(std::forward<Args>(args), line))...,
0296 };
0297 line << '\n';
0298
0299 unsigned total_columns = 0;
0300 for (auto nc : written_columns) {
0301 total_columns += nc;
0302 }
0303 if (total_columns < m_num_columns) {
0304 throw std::invalid_argument("Not enough columns");
0305 }
0306 if (m_num_columns < total_columns) {
0307 throw std::invalid_argument("Too many columns");
0308 }
0309
0310 m_file << line.rdbuf();
0311 if (not m_file.good()) {
0312 throw std::runtime_error("Could not write data to file");
0313 }
0314 }
0315
0316 template<char Delimiter>
0317 template<typename T>
0318 inline std::enable_if_t<
0319 std::is_arithmetic<std::decay_t<T>>::value
0320 or std::is_convertible<T, std::string>::value,
0321 unsigned>
0322 DsvWriter<Delimiter>::write(T&& x, std::ostream& os) {
0323 os << x;
0324 return 1u;
0325 }
0326
0327 template<char Delimiter>
0328 template<typename T, typename Allocator>
0329 inline unsigned
0330 DsvWriter<Delimiter>::write(
0331 const std::vector<T, Allocator>& xs, std::ostream& os) {
0332 unsigned n = 0;
0333 for (const auto& x : xs) {
0334 if (0 < n) {
0335 os << Delimiter;
0336 }
0337 os << x;
0338 n += 1;
0339 }
0340 return n;
0341 }
0342
0343
0344
0345 template<char Delimiter>
0346 inline DsvReader<Delimiter>::DsvReader(const std::string& path)
0347 : m_file(path, std::ios_base::binary | std::ios_base::in) {
0348 if (not m_file.is_open() or m_file.fail()) {
0349 throw std::runtime_error("Could not open file '" + path + "'");
0350 }
0351 }
0352
0353 template<char Delimiter>
0354 inline bool
0355 DsvReader<Delimiter>::read(std::vector<std::string>& columns) {
0356
0357 std::getline(m_file, m_line);
0358 if (m_file.eof()) {
0359 return false;
0360 }
0361 if (m_file.fail()) {
0362 throw std::runtime_error(
0363 "Could not read line " + std::to_string(m_num_lines));
0364 }
0365 m_num_lines += 1;
0366
0367
0368 columns.clear();
0369 for (std::string::size_type pos = 0; pos < m_line.size();) {
0370 auto del = m_line.find_first_of(Delimiter, pos);
0371 if (del == std::string::npos) {
0372
0373 columns.emplace_back(m_line, pos);
0374 break;
0375 } else {
0376 columns.emplace_back(m_line, pos, del - pos);
0377
0378 pos = del + 1;
0379 }
0380 }
0381 return true;
0382 }
0383
0384
0385
0386 template<char Delimiter, typename NamedTuple>
0387 inline NamedTupleDsvReader<Delimiter, NamedTuple>::NamedTupleDsvReader(
0388 const std::string& path, const std::vector<std::string>& optional_columns,
0389 bool verify_header)
0390 : m_reader(path) {
0391
0392 if ((not optional_columns.empty()) and (not verify_header)) {
0393 throw std::runtime_error(
0394 "Optional columns can not be used without header verification");
0395 }
0396
0397 if (not m_reader.read(m_columns)) {
0398 throw std::runtime_error("Could not read header from '" + path + "'");
0399 }
0400 if (verify_header) {
0401 parse_header(optional_columns);
0402 } else {
0403 use_default_columns();
0404 }
0405 }
0406
0407 template<char Delimiter, typename NamedTuple>
0408 inline bool
0409 NamedTupleDsvReader<Delimiter, NamedTuple>::read(NamedTuple& record) {
0410 if (not m_reader.read(m_columns)) {
0411 return false;
0412 }
0413
0414 if (m_columns.size() < m_num_columns) {
0415 throw std::runtime_error(
0416 "Too few columns in line " + std::to_string(m_reader.num_lines()));
0417 }
0418 if (m_num_columns < m_columns.size()) {
0419 throw std::runtime_error(
0420 "Too many columns in line " + std::to_string(m_reader.num_lines()));
0421 }
0422
0423 parse_record(
0424 record, std::make_index_sequence<std::tuple_size<Tuple>::value>{});
0425 return true;
0426 }
0427
0428 template<char Delimiter, typename NamedTuple>
0429 template<typename T>
0430 inline bool
0431 NamedTupleDsvReader<Delimiter, NamedTuple>::read(
0432 NamedTuple& record, std::vector<T>& extra) {
0433
0434 if (not read(record)) {
0435 return false;
0436 }
0437
0438 extra.resize(m_extra_columns.size());
0439 for (std::size_t i = 0; i < m_extra_columns.size(); ++i) {
0440 parse(m_columns[m_extra_columns[i]], extra[i]);
0441 }
0442 return true;
0443 }
0444
0445 template<char Delimiter, typename NamedTuple>
0446 inline void
0447 NamedTupleDsvReader<Delimiter, NamedTuple>::use_default_columns() {
0448
0449 m_num_columns = std::tuple_size<Tuple>::value;
0450 for (std::size_t i = 0; i < m_tuple_column_map.size(); ++i) {
0451 m_tuple_column_map[i] = i;
0452 }
0453
0454 m_extra_columns.clear();
0455 }
0456
0457 template<char Delimiter, typename NamedTuple>
0458 inline void
0459 NamedTupleDsvReader<Delimiter, NamedTuple>::parse_header(
0460 const std::vector<std::string>& optional_columns) {
0461 const auto& names = NamedTuple::names();
0462
0463
0464 m_num_columns = m_columns.size();
0465
0466
0467 for (const auto& name : names) {
0468
0469 auto o = std::find(optional_columns.begin(), optional_columns.end(), name);
0470 if (o != optional_columns.end()) {
0471 continue;
0472 }
0473
0474 auto c = std::find(m_columns.begin(), m_columns.end(), name);
0475 if (c == m_columns.end()) {
0476 throw std::runtime_error("Missing header column '" + name + "'");
0477 }
0478 }
0479
0480
0481 m_tuple_column_map.fill(SIZE_MAX);
0482
0483
0484 m_extra_columns.clear();
0485 for (std::size_t i = 0; i < m_columns.size(); ++i) {
0486
0487 auto it = std::find(names.begin(), names.end(), m_columns[i]);
0488 if (it != names.end()) {
0489
0490 m_tuple_column_map[std::distance(names.begin(), it)] = i;
0491 } else {
0492
0493 m_extra_columns.push_back(i);
0494 }
0495 }
0496 }
0497
0498 }
0499
0500
0501 using CsvWriter = io_dsv_impl::DsvWriter<','>;
0502
0503
0504 using TsvWriter = io_dsv_impl::DsvWriter<'\t'>;
0505
0506
0507 template<typename T>
0508 using NamedTupleCsvWriter = io_dsv_impl::NamedTupleDsvWriter<',', T>;
0509
0510
0511 template<typename T>
0512 using NamedTupleCsvReader = io_dsv_impl::NamedTupleDsvReader<',', T>;
0513
0514
0515 template<typename T>
0516 using NamedTupleTsvWriter = io_dsv_impl::NamedTupleDsvWriter<'\t', T>;
0517
0518
0519 template<typename T>
0520 using NamedTupleTsvReader = io_dsv_impl::NamedTupleDsvReader<'\t', T>;
0521
0522 }