Warning, /include/Geant4/tools/rcsv_ntuple is written in an unsupported language. File is not indexed.
0001 // Copyright (C) 2010, Guy Barrand. All rights reserved.
0002 // See the file tools.license for terms.
0003
0004 #ifndef tools_rcsv_ntuple
0005 #define tools_rcsv_ntuple
0006
0007 // A simple ntuple class to read at the csv format.
0008 // (csv = comma separated value).
0009
0010 // This reader can be use to read file at the hippodraw format
0011 // which is :
0012 // - one header line for the ntuple title.
0013 // - one csv line for column names.
0014 // - data at csv format.
0015
0016 #include "rntuple"
0017
0018 #include <istream>
0019 #include <sstream>
0020
0021 #include "vfind"
0022 #include "vmanip"
0023 #include "words"
0024 #include "snums"
0025 #include "sto"
0026 #include "s2time"
0027 #include "chars"
0028 #include "strip"
0029 #include "cids"
0030 #include "ntuple_binding"
0031 #include "sout"
0032 #include "num2s"
0033 //#include "srep"
0034
0035 #ifdef TOOLS_MEM
0036 #include "mem"
0037 #endif
0038
0039 #include <utility>
0040
0041 namespace tools {
0042 namespace rcsv {
0043
0044 class ntuple : public virtual read::intuple {
0045 typedef read::intuple parent;
0046 public: //read::intuple
0047 virtual void start() {
0048 m_reader.clear();
0049 m_reader.seekg(0,std::ios::beg);
0050 if(m_hippo) {
0051 skip_line(m_reader,m_sz);
0052 skip_line(m_reader,m_sz);
0053 }
0054 }
0055 virtual bool next() {
0056 if(!m_sep) return false; //not inited.
0057 if(m_reader.tellg()>=m_sz) return false;
0058 // first time we are at bol but else we are at eol.
0059 char c;
0060 m_reader.get(c);
0061 if(c==LF()){
0062 if(m_reader.tellg()>=m_sz) {
0063 //eof. Tell caller to stop looping on ntuple rows.
0064 return false;
0065 }
0066 //eol. Next char read is going to be at bol.
0067 } else {
0068 m_reader.putback(c);
0069 //bol
0070 }
0071 // ready for a new row :
0072
0073 while(skip_comment(m_reader,m_sz)){}
0074 if(m_reader.tellg()>=m_sz) return false;
0075
0076 return _read_line();
0077 }
0078
0079 virtual read::icol* find_icol(const std::string& a_name){
0080 return find_named<read::icol>(m_cols,a_name);
0081 }
0082
0083 virtual const std::vector<read::icol*>& columns() const {return m_cols;}
0084
0085 virtual const std::string& title() const {return m_title;}
0086
0087 virtual bool number_of_entries(tools::uint64 & a_value) const {
0088 if(!m_sep) {a_value = 0;return false;} //not inited.
0089 ntuple& self = const_cast<ntuple&>(*this);
0090 if(m_rows==(-1)) {
0091 self.m_rows = 0;
0092 self.start();
0093 while(self.next()) {self.m_rows++;}
0094 }
0095 a_value = (uint64)m_rows;
0096 return true;
0097 }
0098 public:
0099 template <class T>
0100 class column : public virtual read::icolumn<T> {
0101 typedef read::icolumn<T> parent;
0102 public:
0103 static cid id_class() {
0104 static const T s_v = T(); //do that for T = std::string.
0105 return 200+_cid(s_v);
0106 }
0107 public: //icol
0108 virtual void* cast(cid a_class) const {
0109 if(void* p = cmp_cast<column>(this,a_class)) {return p;}
0110 return parent::cast(a_class);
0111 }
0112 virtual cid id_cls() const {return id_class();}
0113 public: //icol
0114 virtual const std::string& name() const {return m_name;}
0115 virtual bool fetch_entry() const {
0116 if(m_user_var) *m_user_var = m_tmp;
0117 return true;
0118 }
0119 public: //icolumn<T>
0120 virtual bool get_entry(T& a_v) const {
0121 a_v = m_tmp;
0122 return true;
0123 }
0124 public:
0125 column(const std::string& a_name,T* a_user_var = 0)
0126 :m_name(a_name)
0127 ,m_tmp(T())
0128 ,m_user_var(a_user_var) //not owner
0129 {}
0130 virtual ~column(){}
0131 protected:
0132 column(const column& a_from)
0133 :read::icol(a_from)
0134 ,parent(a_from)
0135 ,m_name(a_from.m_name)
0136 ,m_tmp(a_from.m_tmp)
0137 ,m_user_var(a_from.m_user_var)
0138 {}
0139 column& operator=(const column& a_from){
0140 m_name = a_from.m_name;
0141 m_tmp = a_from.m_tmp;
0142 m_user_var = a_from.m_user_var;
0143 return *this;
0144 }
0145 public:
0146 // should be used in ntuple _read_line only :
0147 void set_value(const T& a_v){m_tmp = a_v;}
0148 protected:
0149 std::string m_name;
0150 T m_tmp;
0151 T* m_user_var;
0152 };
0153
0154 #ifdef TOOLS_MEM
0155 public:
0156 static const std::string& s_class() {
0157 static const std::string s_v("tools::rcsv::ntuple");
0158 return s_v;
0159 }
0160 #endif
0161 public:
0162 ntuple(std::istream& a_reader)
0163 :m_reader(a_reader)
0164 ,m_title()
0165 ,m_sep(0)
0166 ,m_vec_sep(';')
0167 ,m_sz(0)
0168 ,m_rows(-1)
0169 ,m_hippo(false)
0170 {
0171 #ifdef TOOLS_MEM
0172 mem::increment(s_class().c_str());
0173 #endif
0174 }
0175 virtual ~ntuple() {
0176 safe_clear<read::icol>(m_cols);
0177 #ifdef TOOLS_MEM
0178 mem::decrement(s_class().c_str());
0179 #endif
0180 }
0181 protected:
0182 ntuple(const ntuple& a_from)
0183 :parent(a_from)
0184 ,m_reader(a_from.m_reader)
0185 ,m_title(a_from.m_title)
0186 ,m_sep(a_from.m_sep)
0187 ,m_vec_sep(a_from.m_vec_sep)
0188 ,m_sz(a_from.m_sz)
0189 ,m_rows(-1)
0190 ,m_hippo(a_from.m_hippo)
0191 {
0192 #ifdef TOOLS_MEM
0193 mem::increment(s_class().c_str());
0194 #endif
0195 }
0196 ntuple& operator=(const ntuple& a_from){
0197 m_title = a_from.m_title;
0198 m_sep = a_from.m_sep;
0199 m_vec_sep = a_from.m_vec_sep;
0200 m_hippo = a_from.m_hippo;
0201 m_rows = a_from.m_rows;
0202 return *this;
0203 }
0204 public:
0205 void set_vec_sep(char a_c) {m_vec_sep = a_c;}
0206 void set_sep(char a_c) {m_sep = a_c;}
0207 void set_hippo(bool a_hippo) {m_hippo = a_hippo;}
0208
0209 std::istream& istrm() {return m_reader;}
0210
0211 /* use file::is_hippo for that.
0212 static bool is_hippo(std::ostream& a_out,std::istream& a_reader) {
0213 // analyse two first data line.
0214
0215 a_reader.clear();
0216 a_reader.seekg(0,std::ios::end);
0217 std::streampos sz = a_reader.tellg();
0218 a_reader.seekg(0,std::ios::beg);
0219 if(!sz) {
0220 a_out << "tools::rcsv::ntuple::is_hippo :"
0221 << " stream is empty."
0222 << std::endl;
0223 return false;
0224 } //file empty.
0225
0226 std::string _title;
0227 if(!read_line(a_reader,sz,_title)) return false;
0228 std::string _s;
0229 if(!read_line(a_reader,sz,_s)) return false;
0230 if(_s.find('\t')==std::string::npos) return false;
0231
0232 //std::vector<std::string> labels;
0233 //words(s,"\t",false,labels);
0234 //return labels.size()?true:false;
0235
0236 return true;
0237 }
0238 */
0239 static bool find_sep(std::ostream& a_out,
0240 std::istream& a_reader,bool a_hippo,
0241 bool a_verbose,
0242 char& a_sep){
0243 // analyse first data line to find the char separator.
0244
0245 a_reader.clear();
0246 a_reader.seekg(0,std::ios::end);
0247 std::streampos sz = a_reader.tellg();
0248 a_reader.seekg(0,std::ios::beg);
0249 if(!sz) {
0250 a_out << "tools::rcsv::ntuple::find_sep :"
0251 << " stream is empty."
0252 << std::endl;
0253 a_sep = 0;
0254 return false;
0255 } //file empty.
0256 if(a_verbose) a_out << "file size " << sz << std::endl;
0257
0258 if(a_hippo) { //skip first two lines :
0259 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;}
0260 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;}
0261 } else {
0262 while(skip_comment(a_reader,sz)){}
0263 }
0264 if(a_reader.tellg()>=sz) {a_sep=0;return false;} //no data line.
0265
0266 // get first data line :
0267 std::string sfirst;
0268 {char c;
0269 while(true) {
0270 if(a_reader.tellg()>=sz) break;
0271 a_reader.get(c);
0272 if((c==CR())||(c==LF())) break;
0273 sfirst += c;
0274 }}
0275 if(sfirst.empty()) {
0276 a_out << "tools::rcsv::ntuple::find_set :"
0277 << " first datat line is empty."
0278 << std::endl;
0279 a_sep = 0;
0280 return false;
0281 }
0282 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl;
0283
0284 //guess sep from first data line :
0285 std::istringstream strm(sfirst.c_str());
0286 double d;
0287 strm >> d;
0288 std::streampos pos = strm.tellg();
0289 if(pos==std::streampos(-1)) {
0290 a_out << "tools::rcsv::ntuple::find_sep :"
0291 << " first line does not start with a number."
0292 << std::endl;
0293 a_sep = 0;
0294 return false;
0295 } //not a number.
0296 if(a_verbose) a_out << "first number " << d
0297 << " ending at pos " << pos << std::endl;
0298 if(pos>=(std::streampos)sfirst.size()) {
0299 a_out << "tools::rcsv::ntuple::find_sep :"
0300 << " no separator found in first line."
0301 << " pos " << pos
0302 << " sfirst.size() " << sfirst.size()
0303 << std::endl;
0304 a_sep = 0;
0305 return false;
0306 } //no sep.
0307
0308 strm.get(a_sep);
0309
0310 return true;
0311 }
0312
0313 public:
0314 bool initialize(std::ostream& a_out,
0315 char a_sep = 0, //guessed
0316 const std::string& a_suffix = "x", //col suffix
0317 bool a_verbose = false) {
0318 safe_clear<read::icol>(m_cols);
0319 m_sep = 0;
0320 m_sz = 0;
0321 m_rows = -1;
0322
0323 if(a_suffix.empty()) {
0324 a_out << "tools::rcsv::ntuple::initialize : expect a column suffix." << std::endl;
0325 return false;
0326 }
0327
0328 m_reader.clear();
0329 m_reader.seekg(0,std::ios::end);
0330 m_sz = m_reader.tellg();
0331 m_reader.seekg(0,std::ios::beg);
0332 if(!m_sz) {
0333 a_out << "tools::rcsv::ntuple::initialize :"
0334 << " stream is empty."
0335 << std::endl;
0336 return false; //file empty.
0337 }
0338 if(a_verbose) a_out << "file size " << m_sz << std::endl;
0339
0340 std::vector<std::string> labels;
0341 if(m_hippo) { //skip first two lines :
0342 std::string _title;
0343 if(!read_line(m_reader,m_sz,_title)) {
0344 a_out << "tools::rcsv::ntuple::initialize : read_line() failed." << std::endl;
0345 m_sz = 0;
0346 m_rows = -1;
0347 return false;
0348 }
0349 std::string _s;
0350 if(!read_line(m_reader,m_sz,_s)) {
0351 a_out << "tools::rcsv::ntuple::initialize : (2) read_line() failed." << std::endl;
0352 m_sz = 0;
0353 m_rows = -1;
0354 return false;
0355 }
0356 words(_s,"\t",false,labels); //false for glast.tnt that has a trailing \t.
0357 } else {
0358 while(skip_comment(m_reader,m_sz)){}
0359 }
0360 if(m_reader.tellg()>=m_sz) {
0361 a_out << "tools::rcsv::ntuple::initialize : tellg() >= sz." << std::endl;
0362 m_sz = 0;
0363 m_rows = -1;
0364 return false;
0365 }
0366
0367 // get first data line :
0368 std::string sfirst;
0369 {{char c;
0370 while(true) {
0371 if(m_reader.tellg()>=m_sz) break;
0372 m_reader.get(c);
0373 if((c==CR())||(c==LF())) break;
0374 sfirst += c;
0375 }}
0376 if(sfirst.empty()) {
0377 a_out << "tools::rcsv::ntuple::initialize :"
0378 << " first datat line is empty."
0379 << std::endl;
0380 m_sz = 0;
0381 m_rows = -1;
0382 return false;
0383 }}
0384 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl;
0385
0386 if(a_sep) {
0387 m_sep = a_sep;
0388 } else {
0389 //guess sep from first data line :
0390 std::istringstream strm(sfirst.c_str());
0391 double d;
0392 strm >> d;
0393 std::streampos pos = strm.tellg();
0394 if(pos==std::streampos(-1)) {
0395 a_out << "tools::rcsv::ntuple::initialize :"
0396 << " first line does not start with a number."
0397 << std::endl;
0398 m_sz = 0;
0399 m_rows = -1;
0400 return false;
0401 }
0402 if(a_verbose) a_out << "first number " << d << " ending at pos " << pos << std::endl;
0403 if(pos>=(std::streampos)sfirst.size()) {
0404 a_out << "tools::rcsv::ntuple::initialize :"
0405 << " no separator found in first line."
0406 << std::endl;
0407 m_sz = 0;
0408 m_rows = -1;
0409 return false;
0410 }
0411 strm.get(m_sep);
0412 }
0413 if(a_verbose) a_out << "sep " << (int)m_sep << std::endl;
0414
0415 // in case sep is ' ', there is an ambiguity with some leading
0416 // space in front of first number.
0417 if(m_sep==' ') strip(sfirst,leading,' ');
0418
0419 std::vector<std::string> ws;
0420 {std::string sep;
0421 sep += m_sep;
0422 words(sfirst,sep,m_hippo?false:true,ws);}
0423
0424 // look if words are numbers :
0425 if(a_verbose) a_out << "words " << ws.size() << std::endl;
0426 unsigned int index = 0;
0427 std::vector<std::string>::iterator it;
0428 for(it=ws.begin();it!=ws.end();++it,index++) {
0429 if(a_verbose) a_out << "word " << sout(*it) << "" << std::endl;
0430 /* with glast.tnt there is trailing \t that will induce an extra empty column.
0431 if((*it).empty()) {
0432 // do not accept :
0433 // <num><sep><num><sep><sep><num>...
0434 // but accept a trailing <sep> (glast.tnt) :
0435 // <num><sep><num>....<sep><num><sep>
0436 if(index==(ws.size()-1)) {
0437 break;
0438 } else {
0439 a_out << "tools::rcsv::ntuple::initialize :"
0440 << " empty word."
0441 << std::endl;
0442 safe_clear<read::icol>(m_cols);
0443 m_sep = 0;
0444 m_sz = 0;
0445 m_rows = -1;
0446 return false;
0447 }
0448 }
0449 */
0450 std::string name = a_suffix;
0451 if(!numas<uint64>(m_cols.size(),name)){}
0452 if(m_hippo) {
0453 if(index>=labels.size()) {
0454 a_out << "tools::rcsv::ntuple::initialize :"
0455 << " warning : not enough labels."
0456 << std::endl;
0457 } else {
0458 name = labels[index];
0459 }
0460 }
0461 double d;
0462 if(to<double>(*it,d)) {
0463 if(a_verbose) a_out << "number " << d << std::endl;
0464 create_column<double>(name);
0465 } else {
0466 time_t time;
0467 if(s2time(*it,time)) {
0468 create_column<csv_time>(name);
0469 } else {
0470 std::vector<double> v;
0471 std::string vec_sep;vec_sep += m_vec_sep;
0472 if(snums<double>(*it,vec_sep,v)&&v.size()) {
0473 create_column< std::vector<double> >(name);
0474 } else {
0475 create_column<std::string>(name);
0476 }
0477 }
0478 }
0479 }
0480 size_t num = m_cols.size();
0481 if(!num) {
0482 a_out << "tools::rcsv::ntuple::initialize :"
0483 << " zero columns."
0484 << std::endl;
0485 m_sep = 0;
0486 m_sz = 0;
0487 m_rows = -1;
0488 return false;
0489 }
0490
0491 return true;
0492 }
0493
0494 static const std::string& s_cid(cid a_id) {
0495
0496 #define TOOLS_RCSV_NTUPLE_IF_CID(a__name,a__type) \
0497 if(a_id==column<a__type>::id_class()) {\
0498 static const std::string s_v(#a__name);\
0499 return s_v;\
0500 }
0501
0502 #define TOOLS_RCSV_NTUPLE_IF_VEC_CID(a__name,a__type) \
0503 if(a_id==column< std::vector<a__type> >::id_class()) {\
0504 static const std::string s_v(#a__name+std::string("[]"));\
0505 return s_v;\
0506 }
0507
0508 TOOLS_RCSV_NTUPLE_IF_CID(char,char)
0509 else TOOLS_RCSV_NTUPLE_IF_CID(short,short)
0510 else TOOLS_RCSV_NTUPLE_IF_CID(int,int)
0511 else TOOLS_RCSV_NTUPLE_IF_CID(int64,int64)
0512
0513 else TOOLS_RCSV_NTUPLE_IF_CID(float,float)
0514 else TOOLS_RCSV_NTUPLE_IF_CID(double,double)
0515
0516 else TOOLS_RCSV_NTUPLE_IF_CID(uchar,uchar)
0517 else TOOLS_RCSV_NTUPLE_IF_CID(ushort,ushort)
0518 else TOOLS_RCSV_NTUPLE_IF_CID(uint,uint32) //WARNING
0519 else TOOLS_RCSV_NTUPLE_IF_CID(uint64,uint64)
0520
0521 else TOOLS_RCSV_NTUPLE_IF_CID(bool,bool)
0522 else if(a_id==column<std::string>::id_class()) {
0523 static const std::string s_v("string");
0524 return s_v;
0525 }
0526
0527 else if(a_id==column<csv_time>::id_class()) {
0528 static const std::string s_v("time");
0529 return s_v;
0530 }
0531
0532 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(char,char)
0533 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(short,short)
0534 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(int,int)
0535 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(int64,int64)
0536
0537 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(float,float)
0538 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(double,double)
0539
0540 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uchar,uchar)
0541 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(ushort,ushort)
0542 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uint,uint32) //WARNING
0543 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uint64,uint64)
0544
0545 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(bool,bool)
0546 else if(a_id==column< std::vector<std::string> >::id_class()) {
0547 static const std::string s_v("string[]");
0548 return s_v;
0549 }
0550
0551 #undef TOOLS_RCSV_NTUPLE_IF_CID
0552 #undef TOOLS_RCSV_NTUPLE_IF_VEC_CID
0553
0554 else {
0555 static const std::string s_v("unknown");
0556 return s_v;
0557 }
0558 }
0559
0560 void dump_columns(std::ostream& a_out) const {
0561 if((m_sep>=32)&&(m_sep<=126)) { //printable
0562 a_out << "separator is '" << m_sep << "'" << std::endl;
0563 } else {
0564 a_out << "separator is " << (unsigned int)m_sep << std::endl;
0565 }
0566 a_out << "number of columns " << m_cols.size() << std::endl;
0567 std::vector<read::icol*>::const_iterator it;
0568 for(it=m_cols.begin();it!=m_cols.end();++it) {
0569 a_out << sout((*it)->name())
0570 << " " << s_cid((*it)->id_cls())
0571 << std::endl;
0572 }
0573 }
0574 public:
0575 typedef std::pair<std::string,std::string> col_desc;
0576
0577 bool initialize(std::ostream& a_out,const ntuple_binding& a_bd = ntuple_binding()) {
0578 // it assumes a "commented header".
0579
0580 safe_clear<read::icol>(m_cols);
0581 m_sep = 0;
0582 m_sz = 0;
0583 m_rows = -1;
0584 m_hippo = false;
0585
0586 m_reader.clear();
0587 m_reader.seekg(0,std::ios::end);
0588 m_sz = m_reader.tellg();
0589 m_reader.seekg(0,std::ios::beg);
0590 if(!m_sz) {
0591 a_out << "tools::rcsv::ntuple::initialize(booking) :"
0592 << " stream is empty."
0593 << std::endl;
0594 return false; //file empty.
0595 }
0596 //if(a_verbose) a_out << "file size " << m_sz << std::endl;
0597
0598 std::string _title;
0599 char _sep,_vec_sep;
0600 std::vector<col_desc> _cols;
0601 if(!read_commented_header(a_out,m_reader,_title,_sep,_vec_sep,_cols)) return false;
0602
0603 m_sep = _sep;
0604 m_title = std::move(_title);
0605
0606 tools_vforcit(col_desc,_cols,it) {
0607 const std::string& type = (*it).first;
0608 const std::string& name = (*it).second;
0609
0610 #define TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(a__name,a__type) \
0611 if(type==(std::string(#a__name)+"[]")) {\
0612 create_column< std::vector<a__type> >(name,a_bd.find_variable< std::vector<a__type> >(name));\
0613 }
0614
0615 // see cid2s() for string types.
0616
0617 if(type=="char") create_column<char>(name,a_bd.find_variable<char>(name));
0618 else if(type=="short") create_column<short>(name,a_bd.find_variable<short>(name));
0619 else if(type=="int") create_column<int>(name,a_bd.find_variable<int>(name));
0620 else if(type=="float") create_column<float>(name,a_bd.find_variable<float>(name));
0621 else if(type=="double") create_column<double>(name,a_bd.find_variable<double>(name));
0622 else if(type=="string") create_column<std::string>(name,a_bd.find_variable<std::string>(name));
0623
0624 else if(type=="uchar") create_column<unsigned char>(name,a_bd.find_variable<unsigned char>(name));
0625 else if(type=="ushort") create_column<unsigned short>(name,a_bd.find_variable<unsigned short>(name));
0626 else if(type=="uint") create_column<uint32>(name,a_bd.find_variable<uint32>(name)); //WARNING
0627 else if(type=="bool") create_column<bool>(name,a_bd.find_variable<bool>(name));
0628 else if(type=="int64") create_column<int64>(name,a_bd.find_variable<int64>(name));
0629 else if(type=="uint64") create_column<uint64>(name,a_bd.find_variable<uint64>(name));
0630
0631 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(char,char)
0632 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(short,short)
0633 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(int,int)
0634 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(float,float)
0635 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(double,double)
0636
0637 else if(type=="string[]") create_column< std::vector<std::string> >(name,a_bd.find_variable< std::vector<std::string> >(name));
0638
0639 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uchar,uchar)
0640 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(ushort,ushort)
0641 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uint,uint32) //WARNING
0642 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(bool,bool)
0643 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(int64,int64)
0644 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uint64,uint64)
0645
0646 else {
0647 a_out << "tools::rcsv::ntuple::initialize(booking) :"
0648 << " unhandled column type " << sout(type)
0649 << std::endl;
0650 safe_clear<read::icol>(m_cols);
0651 m_sep = 0;
0652 m_sz = 0;
0653 m_rows = -1;
0654 m_hippo = false;
0655 return false;
0656 }
0657
0658 #undef TOOLS_RCSV_NTUPLE_CREATE_VEC_COL
0659
0660 }
0661
0662 size_t num = m_cols.size();
0663 if(!num) {
0664 a_out << "tools::rcsv::ntuple::initialize(booking) :"
0665 << " zero columns."
0666 << std::endl;
0667 return false;
0668 }
0669
0670 //a_out << "tools::rroot::ntuple::initialize :"
0671 // << " number of columns " << num << "."
0672 // << std::endl;
0673
0674 return true;
0675 }
0676
0677 bool initialize_from_commented_header(std::ostream& a_out) { // it assumes a "commented header".
0678 std::string _title;
0679 char _sep,_vec_sep;
0680 std::vector<col_desc> _cols;
0681 if(!read_commented_header(a_out,m_reader,_title,_sep,_vec_sep,_cols)) return false;
0682 ntuple_binding nbd;
0683 {tools_vforcit(col_desc,_cols,it) nbd.add_column_no_var((*it).second);} //user_var is 0.
0684 return initialize(a_out,nbd);
0685 }
0686
0687 bool get_row() const {
0688 bool status = true;
0689 tools_vforcit(read::icol*,m_cols,it) {
0690 if(!(*it)->fetch_entry()) status = false;
0691 }
0692 return status;
0693 }
0694
0695 protected:
0696 bool read_commented_header(std::ostream& a_out,std::istream& a_reader,
0697 std::string& a_title,char& a_sep,char& a_vec_sep,std::vector<col_desc>& a_cols) {
0698 // analyse first lines starting with '#'.
0699 a_title.clear();
0700 a_sep = 0;
0701 a_cols.clear();
0702
0703 a_reader.clear();
0704 a_reader.seekg(0,std::ios::end);
0705 std::streampos sz = a_reader.tellg();
0706 a_reader.seekg(0,std::ios::beg);
0707 if(!sz) {
0708 a_out << "tools::rcsv::ntuple::read_commented_header :"
0709 << " stream is empty."
0710 << std::endl;
0711 return false;
0712 } //file empty.
0713
0714
0715 std::string _class;
0716
0717 while(true) {
0718 if(a_reader.tellg()>=sz) break;
0719 //we should be at bol :
0720 char c;
0721 a_reader.get(c);
0722 a_reader.putback(c);
0723 if(c!='#') break; //finished, probably a data line now.
0724 std::string line;
0725 if(!read_line(a_reader,sz,line)) break; //or return false ?
0726
0727 std::vector<std::string> _words;
0728 words(line," ",false,_words);
0729 if(!_words.size()) {
0730 a_out << "tools::rcsv::ntuple::read_commented_header :"
0731 << " syntax error : empty header line."
0732 << std::endl;
0733 return false;
0734 }
0735 if((_words[0]=="#class")) {
0736 if(_words.size()!=2) {
0737 a_out << "tools::rcsv::ntuple::read_commented_header :"
0738 << " syntax error in " << sout(line)
0739 << std::endl;
0740 return false;
0741 }
0742 _class = _words[1];
0743 } else if(_words[0]=="#title") {
0744 if(_words.size()<1) {
0745 a_out << "tools::rcsv::ntuple::read_commented_header :"
0746 << " syntax error in " << sout(line)
0747 << std::endl;
0748 return false;
0749 }
0750 if(_words.size()==1) {
0751 a_title.clear();
0752 } else {
0753 std::string::size_type pos = line.find(_words[0]);
0754 pos += _words[0].size()+1;
0755 a_title = line.substr(pos,line.size()-pos);
0756 }
0757 } else if((_words[0]=="#separator")) {
0758 if(_words.size()!=2) {
0759 a_out << "tools::rcsv::ntuple::read_commented_header :"
0760 << " syntax error in " << sout(line)
0761 << std::endl;
0762 return false;
0763 }
0764 unsigned int uisep;
0765 if(!to(_words[1],uisep)) {
0766 a_out << "tools::rcsv::ntuple::read_commented_header :"
0767 << " syntax error in " << sout(line)
0768 << std::endl;
0769 return false;
0770 }
0771 a_sep = (char)uisep;
0772 } else if((_words[0]=="#vector_separator")) {
0773 if(_words.size()!=2) {
0774 a_out << "tools::rcsv::ntuple::read_commented_header :"
0775 << " syntax error in " << sout(line)
0776 << std::endl;
0777 return false;
0778 }
0779 unsigned int uisep;
0780 if(!to(_words[1],uisep)) {
0781 a_out << "tools::rcsv::ntuple::read_commented_header :"
0782 << " syntax error in " << sout(line)
0783 << std::endl;
0784 return false;
0785 }
0786 a_vec_sep = (char)uisep;
0787 } else if((_words[0]=="#column")) {
0788 if(_words.size()<2) {
0789 a_out << "tools::rcsv::ntuple::read_commented_header :"
0790 << " syntax error in " << sout(line)
0791 << std::endl;
0792 return false;
0793 }
0794 std::string stype = _words[1];
0795 std::string label;
0796 if(_words.size()==2) {
0797 label.clear();
0798 } else {
0799 std::string::size_type pos = line.find(_words[1]);
0800 pos += _words[1].size()+1;
0801 label = line.substr(pos,line.size()-pos);
0802 }
0803 //a_out << "column " << stype << " " << sout(label) << std::endl;
0804 a_cols.push_back(col_desc(stype,label));
0805 } else {
0806 a_out << "tools::rcsv::ntuple::read_commented_header :"
0807 << " syntax error in " << sout(line)
0808 << ", unknown keyword " << sout(_words[0])
0809 << std::endl;
0810 //return false;
0811 }
0812 }
0813
0814 /*
0815 a_out << "class " << _class << std::endl;
0816 a_out << "title " << _title << std::endl;
0817 a_out << "separator " << _separator << std::endl;
0818 */
0819
0820 return true;
0821 }
0822
0823 protected:
0824 template <class T>
0825 column<T>* create_column(const std::string& a_name,T* a_user_var = 0){
0826 if(find_named<read::icol>(m_cols,a_name)) return 0;
0827 column<T>* col = new column<T>(a_name,a_user_var);
0828 if(!col) return 0;
0829 m_cols.push_back(col);
0830 return col;
0831 }
0832
0833 protected:
0834 static bool read_line(std::istream& a_reader,std::streampos a_sz,std::string& a_s){
0835 a_s.clear();
0836 char c;
0837 while(true) {
0838 if(a_reader.tellg()>=a_sz) {a_s.clear();return false;}
0839 a_reader.get(c);
0840 if(c==CR()) continue;
0841 if(c==LF()) break; //eol.
0842 a_s += c;
0843 }
0844 return true;
0845 }
0846
0847 static bool skip_line(std::istream& a_reader,std::streampos a_sz){
0848 char c;
0849 while(true) {
0850 if(a_reader.tellg()>=a_sz) return false;
0851 a_reader.get(c);
0852 if(c==LF()) break;
0853 }
0854 return true;
0855 }
0856
0857 static bool skip_comment(std::istream& a_reader,std::streampos a_sz){
0858 //ret true = we had a commented line, false : a data line or nothing.
0859 if(a_reader.tellg()>=a_sz) return false;
0860 //we should be at bol :
0861 char c;
0862 a_reader.get(c);
0863 if(c=='#') {
0864 return skip_line(a_reader,a_sz);
0865 //eol. Next char should be bol.
0866 } else {
0867 a_reader.putback(c);
0868 return false;
0869 }
0870 }
0871
0872 template <class T>
0873 static bool _read(std::istream& a_reader,std::streampos,char,T& a_v) {
0874 a_reader >> a_v;
0875 if(a_reader.tellg()==std::streampos(-1)) {a_v = 0;return false;}
0876 //std::cout << "debug : _read(double) " << a_v << std::endl;
0877 return true;
0878 }
0879 static bool _read_time(std::istream& a_reader,std::streampos a_sz,char a_sep,time_t& a_v) {
0880 std::string _s;
0881 char c;
0882 while(true){
0883 if(a_reader.tellg()>=a_sz) break;
0884 a_reader.get(c);
0885 if((c==a_sep)||(c==CR())||(c==LF())) {
0886 a_reader.putback(c);
0887 break;
0888 }
0889 _s += c;
0890 }
0891 if(!s2time(_s,a_v)) return false;
0892 return true;
0893 }
0894 static bool _read(std::istream& a_reader,std::streampos a_sz,char a_sep,std::string& a_v) {
0895 a_v.clear();
0896 char c;
0897 while(true){
0898 if(a_reader.tellg()>=a_sz) break;
0899 a_reader.get(c);
0900 if((c==a_sep)||(c==CR())||(c==LF())) {
0901 a_reader.putback(c);
0902 break;
0903 }
0904 a_v += c;
0905 }
0906 return true;
0907 }
0908
0909 static bool _vec_read(std::istream& a_reader,std::streampos a_sz,
0910 std::istringstream&,std::vector<std::string>&,
0911 char a_sep,const std::string& a_vec_sep,
0912 std::vector<std::string>& a_v) {
0913 std::string _s;
0914 if(!_read(a_reader,a_sz,a_sep,_s)) return false;
0915 //replace(_s,"\\"+a_vec_sep,"@@");
0916 words(_s,a_vec_sep,true,a_v);
0917 //tools_vforit(std::string,a_v,it) replace(*it,"@@",a_vec_sep);
0918 return true;
0919 }
0920
0921 template <class T>
0922 static bool _vec_read(std::istream& a_reader,std::streampos a_sz,
0923 std::istringstream& a_iss,std::vector<std::string>& a_tmp,
0924 char a_sep,const std::string& a_vec_sep,
0925 std::vector<T>& a_v) {
0926 std::string _s;
0927 if(!_read(a_reader,a_sz,a_sep,_s)) return false;
0928 if(!snums<T>(_s,a_iss,a_tmp,a_vec_sep,a_v)) return false;
0929 return true;
0930 }
0931
0932 protected:
0933 bool _read_line() {
0934 // have to loop on all columns !
0935 typedef read::icol icol_t;
0936
0937 typedef ntuple::column<char> col_char;
0938 typedef ntuple::column<short> col_short;
0939 typedef ntuple::column<int> col_int;
0940 typedef ntuple::column<float> col_float;
0941 typedef ntuple::column<double> col_double;
0942 typedef std::string string_t;
0943 typedef ntuple::column<string_t> col_string_t;
0944
0945 typedef ntuple::column<uchar> col_uchar;
0946 typedef ntuple::column<ushort> col_ushort;
0947 typedef ntuple::column<uint32> col_uint32;
0948 typedef ntuple::column<bool> col_bool;
0949 typedef ntuple::column<int64> col_int64;
0950 typedef ntuple::column<uint64> col_uint64;
0951
0952 typedef ntuple::column<csv_time> col_time;
0953
0954 typedef ntuple::column< std::vector<char> > col_vec_char;
0955 typedef ntuple::column< std::vector<short> > col_vec_short;
0956 typedef ntuple::column< std::vector<int32> > col_vec_int;
0957 typedef ntuple::column< std::vector<float> > col_vec_float;
0958 typedef ntuple::column< std::vector<double> > col_vec_double;
0959 typedef ntuple::column< std::vector<std::string> > col_vec_string_t;
0960
0961 typedef ntuple::column< std::vector<uchar> > col_vec_uchar;
0962 typedef ntuple::column< std::vector<ushort> > col_vec_ushort;
0963 typedef ntuple::column< std::vector<uint32> > col_vec_uint32;
0964 typedef ntuple::column< std::vector<bool> > col_vec_bool;
0965 typedef ntuple::column< std::vector<int64> > col_vec_int64;
0966 typedef ntuple::column< std::vector<uint64> > col_vec_uint64;
0967
0968 std::string vec_sep;vec_sep += m_vec_sep;
0969 std::istringstream iss;
0970 std::vector<std::string> tmp;
0971
0972 size_t index = 0;
0973 size_t num = m_cols.size();
0974 std::vector<icol_t*>::const_iterator it;
0975 for(it=m_cols.begin();it!=m_cols.end();++it,index++) {
0976
0977 #define TOOLS_RCSV_NTUPLE_IF_COL(a__type) \
0978 if(col_##a__type* _col_##a__type = id_cast<icol_t,col_##a__type>(*(*it))) {\
0979 a__type v;\
0980 if(!_read(m_reader,m_sz,m_sep,v)) return false;\
0981 _col_##a__type->set_value(v);\
0982 }
0983
0984 #define TOOLS_RCSV_NTUPLE_IF_VEC_COL(a__type) \
0985 if(col_vec_##a__type* _col_vec_##a__type = id_cast<icol_t,col_vec_##a__type>(*(*it))) {\
0986 std::vector<a__type> v;\
0987 if(!_vec_read(m_reader,m_sz,iss,tmp,m_sep,vec_sep,v)) return false;\
0988 _col_vec_##a__type->set_value(v);\
0989 }
0990
0991 TOOLS_RCSV_NTUPLE_IF_COL(char)
0992 else TOOLS_RCSV_NTUPLE_IF_COL(short)
0993 else TOOLS_RCSV_NTUPLE_IF_COL(int)
0994 else TOOLS_RCSV_NTUPLE_IF_COL(float)
0995 else TOOLS_RCSV_NTUPLE_IF_COL(double)
0996 else TOOLS_RCSV_NTUPLE_IF_COL(string_t)
0997
0998 else TOOLS_RCSV_NTUPLE_IF_COL(uchar)
0999 else TOOLS_RCSV_NTUPLE_IF_COL(ushort)
1000 else TOOLS_RCSV_NTUPLE_IF_COL(uint32)
1001 else TOOLS_RCSV_NTUPLE_IF_COL(bool)
1002 else TOOLS_RCSV_NTUPLE_IF_COL(int64)
1003 else TOOLS_RCSV_NTUPLE_IF_COL(uint64)
1004
1005 else if(col_time* _col_time = id_cast<icol_t,col_time>(*(*it))) {
1006 time_t v;
1007 if(!_read_time(m_reader,m_sz,m_sep,v)) return false;
1008 csv_time ct;ct.m_l = long(v);
1009 _col_time->set_value(ct);
1010 }
1011
1012 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(char)
1013 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(short)
1014 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(int)
1015 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(float)
1016 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(double)
1017 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(string_t)
1018
1019 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uchar)
1020 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(ushort)
1021 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uint32)
1022 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(bool)
1023 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(int64)
1024 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uint64)
1025
1026 #undef TOOLS_RCSV_NTUPLE_IF_COL
1027 #undef TOOLS_RCSV_NTUPLE_IF_VEC_COL
1028
1029 else {
1030 //std::cout << "column cast failed." << std::endl;
1031 return false;
1032 }
1033
1034 if(index==(num-1)) { //read up to LF()
1035 char c;
1036 while(true){
1037 if(m_reader.tellg()>=m_sz) break;
1038 m_reader.get(c);
1039 if(c==LF()) break;
1040 }
1041 } else { //read sep :
1042 char sep;
1043 m_reader.get(sep);
1044 }
1045 }
1046 return true;
1047 }
1048 protected:
1049 std::istream& m_reader;
1050 std::string m_title;
1051 char m_sep;
1052 char m_vec_sep;
1053 std::vector<read::icol*> m_cols;
1054 std::streampos m_sz;
1055 int m_rows; //to optimize number_of_entries().
1056 bool m_hippo;
1057 };
1058
1059 }}
1060
1061
1062 #include <fstream>
1063
1064 namespace tools {
1065 namespace rcsv {
1066
1067 class fntuple : public ntuple {
1068 typedef ntuple parent;
1069 public:
1070 static const std::string& s_class() {
1071 static const std::string s_v("tools::rcsv::fntuple");
1072 return s_v;
1073 }
1074 public:
1075 fntuple(const std::string& a_file)
1076 :parent(m_freader)
1077 ,m_file(a_file)
1078 {}
1079 virtual ~fntuple() {m_freader.close();}
1080 protected:
1081 fntuple(const fntuple& a_from)
1082 :read::intuple(a_from)
1083 ,parent(a_from)
1084 ,m_file(a_from.m_file)
1085 {}
1086 fntuple& operator=(const fntuple& a_from){
1087 parent::operator=(a_from);
1088 m_file = a_from.m_file;
1089 return *this;
1090 }
1091 public:
1092 bool open(){
1093 m_freader.open(m_file.c_str());
1094 return m_freader.fail()?false:true;
1095 }
1096 bool initialize(std::ostream& a_out,
1097 char a_sep = 0, //guessed
1098 const std::string& a_suffix = "x", //col suffix
1099 bool a_verbose = false) {
1100 if(!m_freader.is_open()) {
1101 m_freader.open(m_file.c_str());
1102 if(m_freader.fail()) {
1103 a_out << "tools::rcsv::fntuple::initialize :"
1104 << " can't open " << m_file << "."
1105 << std::endl;
1106 return false;
1107 }
1108 }
1109 return parent::initialize(a_out,a_sep,a_suffix,a_verbose);
1110 }
1111 protected:
1112 std::string m_file;
1113 std::ifstream m_freader;
1114 };
1115
1116 }}
1117
1118 #endif