Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-11-05 09:55:27

0001 // Author: Jakob Blomer CERN  07/2018
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RSQLITEDS
0012 #define ROOT_RSQLITEDS
0013 
0014 #include "ROOT/RDataFrame.hxx"
0015 #include "ROOT/RDataSource.hxx"
0016 #include <string_view>
0017 
0018 #include <memory>
0019 #include <string>
0020 #include <vector>
0021 
0022 namespace ROOT {
0023 
0024 namespace RDF {
0025 
0026 namespace Internal {
0027 // Members are defined in RSqliteDS.cxx in order to not pullute this header file with sqlite3.h
0028 struct RSqliteDSDataSet;
0029 }
0030 
0031 // clang-format off
0032 /**
0033 \class ROOT::RDF::RSqliteDS
0034 \ingroup dataframe
0035 \brief RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
0036 
0037 The RSqliteDS is able to feed an RDataFrame with data from a SQlite SELECT query. One can use it like
0038 
0039     auto rdf = ROOT::RDF::FromSqlite("/path/to/file.sqlite", "select name from table");
0040     auto h = rdf.Define("lName", "name.length()").Histo1D("lName");
0041 
0042 The data source has to provide column types for all the columns. Determining column types in SQlite is tricky
0043 as it is dynamically typed and in principle each row can have different column types. The following heuristics
0044 is used:
0045 
0046   - If a table column is queried as is ("SELECT colname FROM table"), the default/declared column type is taken.
0047   - For expressions ("SELECT 1+1 FROM table"), the type of the first row of the result set determines the column type.
0048     That can result in a column to be of thought of type NULL where subsequent rows actually have meaningful values.
0049     The provided SELECT query can be used to avoid such ambiguities.
0050 */
0051 class RSqliteDS final : public ROOT::RDF::RDataSource {
0052 private:
0053    // clang-format off
0054    /// All the types known to SQlite. Changes require changing fgTypeNames, too.
0055    enum class ETypes {
0056       kInteger,
0057       kReal,
0058       kText,
0059       kBlob,
0060       kNull
0061    };
0062    // clang-format on
0063 
0064    /// Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once available.
0065    struct Value_t {
0066       explicit Value_t(ETypes type);
0067 
0068       ETypes fType;
0069       bool fIsActive; ///< Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
0070       Long64_t fInteger;
0071       double fReal;
0072       std::string fText;
0073       std::vector<unsigned char> fBlob;
0074       void *fNull;
0075       void *fPtr; ///< Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
0076    };
0077 
0078    void SqliteError(int errcode);
0079 
0080    std::unique_ptr<Internal::RSqliteDSDataSet> fDataSet;
0081    ULong64_t fNRow;
0082    std::vector<std::string> fColumnNames;
0083    std::vector<ETypes> fColumnTypes;
0084    /// The data source is inherently single-threaded and returns only one row at a time. This vector holds the results.
0085    std::vector<Value_t> fValues;
0086 
0087    // clang-format off
0088    /// Corresponds to the types defined in ETypes.
0089    static constexpr char const *fgTypeNames[] = {
0090       "Long64_t",
0091       "double",
0092       "std::string",
0093       "std::vector<unsigned char>",
0094       "void *"
0095    };
0096    // clang-format on
0097 
0098 public:
0099    RSqliteDS(const std::string &fileName, const std::string &query);
0100    // Rule of five
0101    RSqliteDS(const RSqliteDS &) = delete;
0102    RSqliteDS &operator=(const RSqliteDS &) = delete;
0103    RSqliteDS(RSqliteDS &&) = delete;
0104    RSqliteDS &operator=(RSqliteDS &&) = delete;
0105    ~RSqliteDS() final;
0106 
0107    void SetNSlots(unsigned int nSlots) final;
0108    const std::vector<std::string> &GetColumnNames() const final;
0109    bool HasColumn(std::string_view colName) const final;
0110    std::string GetTypeName(std::string_view colName) const final;
0111    std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
0112    bool SetEntry(unsigned int slot, ULong64_t entry) final;
0113    void Initialize() final;
0114    std::string GetLabel() final;
0115 
0116 protected:
0117    Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
0118 };
0119 
0120 RDataFrame FromSqlite(std::string_view fileName, std::string_view query);
0121 
0122 } // namespace RDF
0123 
0124 } // namespace ROOT
0125 
0126 #endif