File indexing completed on 2025-01-18 10:10:37
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef ROOT_RCSVTDS
0012 #define ROOT_RCSVTDS
0013
0014 #include "ROOT/RDataFrame.hxx"
0015 #include "ROOT/RDataSource.hxx"
0016
0017 #include <cstdint>
0018 #include <deque>
0019 #include <list>
0020 #include <unordered_map>
0021 #include <set>
0022 #include <memory>
0023 #include <vector>
0024
0025 #include <TRegexp.h>
0026
0027 namespace ROOT {
0028
0029 namespace Internal {
0030 class RRawFile;
0031 }
0032
0033 namespace RDF {
0034
0035 class RCsvDS final : public ROOT::RDF::RDataSource {
0036
0037 private:
0038
0039 using ColType_t = char;
0040 static const std::unordered_map<ColType_t, std::string> fgColTypeMap;
0041
0042
0043 static const TRegexp fgIntRegex, fgDoubleRegex1, fgDoubleRegex2, fgDoubleRegex3, fgTrueRegex, fgFalseRegex;
0044
0045 std::uint64_t fDataPos = 0;
0046 bool fReadHeaders = false;
0047 unsigned int fNSlots = 0U;
0048 std::unique_ptr<ROOT::Internal::RRawFile> fCsvFile;
0049 const char fDelimiter;
0050 const Long64_t fLinesChunkSize;
0051 ULong64_t fEntryRangesRequested = 0ULL;
0052 ULong64_t fProcessedLines = 0ULL;
0053 std::vector<std::string> fHeaders;
0054 std::unordered_map<std::string, ColType_t> fColTypes;
0055 std::set<std::string> fColContainingEmpty;
0056 std::list<ColType_t> fColTypesList;
0057 std::vector<std::vector<void *>> fColAddresses;
0058 std::vector<Record_t> fRecords;
0059 std::vector<std::vector<double>> fDoubleEvtValues;
0060 std::vector<std::vector<Long64_t>> fLong64EvtValues;
0061 std::vector<std::vector<std::string>> fStringEvtValues;
0062
0063
0064 std::vector<std::deque<bool>> fBoolEvtValues;
0065
0066 void FillHeaders(const std::string &);
0067 void FillRecord(const std::string &, Record_t &);
0068 void GenerateHeaders(size_t);
0069 std::vector<void *> GetColumnReadersImpl(std::string_view, const std::type_info &) final;
0070 void ValidateColTypes(std::vector<std::string> &) const;
0071 void InferColTypes(std::vector<std::string> &);
0072 void InferType(const std::string &, unsigned int);
0073 std::vector<std::string> ParseColumns(const std::string &);
0074 size_t ParseValue(const std::string &, std::vector<std::string> &, size_t);
0075 ColType_t GetType(std::string_view colName) const;
0076 void FreeRecords();
0077
0078 protected:
0079 std::string AsString() final;
0080
0081 public:
0082 RCsvDS(std::string_view fileName, bool readHeaders = true, char delimiter = ',', Long64_t linesChunkSize = -1LL,
0083 std::unordered_map<std::string, char> &&colTypes = {});
0084 void Finalize() final;
0085 ~RCsvDS();
0086 const std::vector<std::string> &GetColumnNames() const final;
0087 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
0088 std::string GetTypeName(std::string_view colName) const final;
0089 bool HasColumn(std::string_view colName) const final;
0090 bool SetEntry(unsigned int slot, ULong64_t entry) final;
0091 void SetNSlots(unsigned int nSlots) final;
0092 std::string GetLabel() final;
0093 };
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105 RDataFrame FromCSV(std::string_view fileName, bool readHeaders = true, char delimiter = ',',
0106 Long64_t linesChunkSize = -1LL, std::unordered_map<std::string, char> &&colTypes = {});
0107
0108 }
0109
0110 }
0111
0112 #endif