Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:48

0001 // Author: Jakob Blomer CERN  07/2018
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RSQLITEDS
0012 #define ROOT_RSQLITEDS
0013 
0014 #include "ROOT/RDataFrame.hxx"
0015 #include "ROOT/RDataSource.hxx"
0016 #include <string_view>
0017 
0018 #include <memory>
0019 #include <string>
0020 #include <vector>
0021 
0022 namespace ROOT {
0023 
0024 namespace RDF {
0025 
0026 namespace Internal {
0027 // Members are defined in RSqliteDS.cxx in order to not pullute this header file with sqlite3.h
0028 struct RSqliteDSDataSet;
0029 }
0030 
0031 // clang-format off
0032 /**
0033 \class ROOT::RDF::RSqliteDS
0034 \ingroup dataframe
0035 \brief RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
0036 
0037 The RSqliteDS is able to feed an RDataFrame with data from a SQlite SELECT query. One can use it like
0038 
0039     auto rdf = ROOT::RDF::FromSqlite("/path/to/file.sqlite", "select name from table");
0040     auto h = rdf.Define("lName", "name.length()").Histo1D("lName");
0041 
0042 The data source has to provide column types for all the columns. Determining column types in SQlite is tricky
0043 as it is dynamically typed and in principle each row can have different column types. The following heuristics
0044 is used:
0045 
0046   - If a table column is queried as is ("SELECT colname FROM table"), the default/declared column type is taken.
0047   - For expressions ("SELECT 1+1 FROM table"), the type of the first row of the result set determines the column type.
0048     That can result in a column to be of thought of type NULL where subsequent rows actually have meaningful values.
0049     The provided SELECT query can be used to avoid such ambiguities.
0050 */
0051 class RSqliteDS final : public ROOT::RDF::RDataSource {
0052 private:
0053    // clang-format off
0054    /// All the types known to SQlite. Changes require changing fgTypeNames, too.
0055    enum class ETypes {
0056       kInteger,
0057       kReal,
0058       kText,
0059       kBlob,
0060       kNull
0061    };
0062    // clang-format on
0063 
0064    /// Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once available.
0065    struct Value_t {
0066       explicit Value_t(ETypes type);
0067 
0068       ETypes fType;
0069       bool fIsActive; ///< Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
0070       Long64_t fInteger;
0071       double fReal;
0072       std::string fText;
0073       std::vector<unsigned char> fBlob;
0074       void *fNull;
0075       void *fPtr; ///< Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
0076    };
0077 
0078    void SqliteError(int errcode);
0079 
0080    std::unique_ptr<Internal::RSqliteDSDataSet> fDataSet;
0081    unsigned int fNSlots;
0082    ULong64_t fNRow;
0083    std::vector<std::string> fColumnNames;
0084    std::vector<ETypes> fColumnTypes;
0085    /// The data source is inherently single-threaded and returns only one row at a time. This vector holds the results.
0086    std::vector<Value_t> fValues;
0087 
0088    // clang-format off
0089    /// Corresponds to the types defined in ETypes.
0090    static constexpr char const *fgTypeNames[] = {
0091       "Long64_t",
0092       "double",
0093       "std::string",
0094       "std::vector<unsigned char>",
0095       "void *"
0096    };
0097    // clang-format on
0098 
0099 public:
0100    RSqliteDS(const std::string &fileName, const std::string &query);
0101    ~RSqliteDS();
0102    void SetNSlots(unsigned int nSlots) final;
0103    const std::vector<std::string> &GetColumnNames() const final;
0104    bool HasColumn(std::string_view colName) const final;
0105    std::string GetTypeName(std::string_view colName) const final;
0106    std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
0107    bool SetEntry(unsigned int slot, ULong64_t entry) final;
0108    void Initialize() final;
0109    std::string GetLabel() final;
0110 
0111 protected:
0112    Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
0113 };
0114 
0115 RDataFrame FromSqlite(std::string_view fileName, std::string_view query);
0116 
0117 } // namespace RDF
0118 
0119 } // namespace ROOT
0120 
0121 #endif