Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:27

0001 #pragma once
0002 /**
0003 NPFold.h : collecting and persisting NP arrays keyed by relative paths
0004 ========================================================================
0005 
0006 Primary Source Location in *np* repo (not *opticks*)
0007 ---------------------------------------------------------
0008 
0009 +---------+------------------------------------------------------------------------+
0010 | Action  |  Command                                                               |
0011 +=========+========================================================================+
0012 | Browse  | open https://github.com/simoncblyth/np/blob/master/NPFold.h            |
0013 +---------+------------------------------------------------------------------------+
0014 | Edit    | vi ~/np/NPFold.h                                                       |
0015 +---------+------------------------------------------------------------------------+
0016 | Test    | cd ~/np/tests ; ./NPFold_..._test.sh                                   |
0017 +---------+------------------------------------------------------------------------+
0018 | Copy    | cd ~/np ; ./cp.sh # when diff copies to  ~/opticks/sysrap/NPFold.h     |
0019 +---------+------------------------------------------------------------------------+
0020 
0021 
0022 Load/Save Modes
0023 ----------------
0024 
0025 There are two load/save modes:
0026 
0027 1. with index txt file "NPFold_index.txt" : the default mode
0028    in which the ordering of the keys are preserved
0029 
0030 2. without index txt file : the ordering of keys/arrays
0031    follows the sorted order from U::DirList
0032 
0033 
0034 Supporting NPFold within NPFold recursively
0035 ----------------------------------------------
0036 
0037 For example "Materials" NPFold containing sub-NPFold for each material and at higher
0038 level "Properties" NPFold containing "Materials" and "Surfaces" sub-NPFold.
0039 
0040 A sub-NPFold of an NPFold is simply represented by a key in the
0041 index that does not end with ".npy" which gets stored into ff vector.
0042 
0043 Loading txt property files
0044 ----------------------------
0045 
0046 Simple txt property files can also be loaded.
0047 NB these txt property files are input only.
0048 
0049 The NPFold keys still use the .npy extension
0050 and saving the NPFold proceeds normally saving the arrays into
0051 standard binary .npy and sidecars.
0052 
0053 
0054 Former load_fts approach vs current load_dir
0055 -----------------------------------------------
0056 
0057 Former use of fts filesystem traversal with load_fts led
0058 to loading a directory tree of .npy all into a single NPFold
0059 Now with load_dir each directory is loading into an NPFold
0060 so loading a directory tree creates a corresponding tree of NPFold.
0061 
0062 Hid fts.h usage behind WITH_FTS as getting compilation error on Linux::
0063 
0064     /usr/include/fts.h:41:3: error: #error "<fts.h> cannot be used with -D_FILE_OFFSET_BITS==64"
0065      # error "<fts.h> cannot be used with -D_FILE_OFFSET_BITS==64"
0066        ^~~~~
0067 
0068 **/
0069 
0070 #include <string>
0071 #include <algorithm>
0072 #include <iterator>
0073 #include <vector>
0074 #include <map>
0075 #include <set>
0076 #include <cstdlib>
0077 #include <csignal>
0078 #include <cstdio>
0079 #include <sys/types.h>
0080 
0081 #ifdef WITH_FTS
0082 #include <fts.h>
0083 #endif
0084 
0085 #include <cstring>
0086 #include <errno.h>
0087 #include <sstream>
0088 #include <iomanip>
0089 
0090 #include "NPX.h"
0091 
0092 struct NPFold
0093 {
0094     // PRIMARY MEMBERS : KEYS, ARRAYS, SUBFOLD
0095 
0096     std::vector<std::string> kk ;
0097     std::vector<const NP*>   aa ;
0098 
0099     std::vector<std::string> ff ;  // keys of sub-NPFold
0100     std::vector<NPFold*> subfold ;
0101 
0102 
0103     // METADATA FIELDS
0104     std::string               headline ;
0105     std::string               meta ;
0106     std::vector<std::string>  names ;
0107     const char*               savedir ;
0108     const char*               loaddir ;
0109 
0110     // nodata:true used for lightweight access to metadata from many arrays
0111     bool                      nodata ;
0112     bool                      verbose_ ;
0113 
0114     // [TRANSIENT FIELDS : NOT COPIED BY CopyMeta
0115     bool                      allowempty ;
0116     bool                      allowonlymeta ;
0117     bool                      skipdelete ;   // set to true on subfold during trivial concat
0118     NPFold*                   parent ;      // set by add_subfold
0119     // ]TRANSIENT FIELDS
0120 
0121     static constexpr const char INTKEY_PREFIX = 'f' ;
0122     static constexpr const int UNDEF = -1 ;
0123     static constexpr const bool VERBOSE = false ;
0124     static constexpr const bool ALLOWEMPTY = false ;
0125     static constexpr const bool ALLOWONLYMETA = false ;
0126     static constexpr const bool SKIPDELETE = false ;
0127     static constexpr NPFold*    PARENT = nullptr ;
0128 
0129     static constexpr const char* DOT_NPY = ".npy" ;  // formerly EXT
0130     static constexpr const char* DOT_TXT = ".txt" ;
0131     static constexpr const char* DOT_PNG = ".png" ;
0132     static constexpr const char* DOT_JPG = ".jpg" ;
0133     static constexpr const char* TOP = "/" ;
0134     static constexpr const char* INDEX = "NPFold_index.txt" ;
0135     static constexpr const char* META  = "NPFold_meta.txt" ;
0136     static constexpr const char* NAMES = "NPFold_names.txt" ;
0137     static constexpr const char* kNP_PROP_BASE = "NP_PROP_BASE" ;
0138 
0139 
0140     static bool IsNPY(const char* k);
0141     static bool IsTXT(const char* k);
0142     static bool IsPNG(const char* k);
0143     static bool IsJPG(const char* k);
0144     static bool HasSuffix( const char* k, const char* s );
0145     static bool HasPrefix( const char* k, const char* p );
0146 
0147     static const char* BareKey(const char* k);  // without .npy
0148     static std::string FormKey(const char* k, bool change_txt_to_npy );
0149     static std::string FormKey(const char* k );
0150 
0151     static bool    IsValid(const NPFold* fold);
0152     static NPFold* LoadIfExists(const char* base);
0153     static bool    Exists(const char* base);
0154     static NPFold* Load_(const char* base );
0155     static NPFold* LoadNoData_(const char* base );
0156 
0157     static const char* Resolve(const char* base_, const char* rel1_=nullptr, const char* rel2_=nullptr);
0158     static NPFold* Load(const char* base);
0159     static NPFold* Load(const char* base, const char* rel );
0160     static NPFold* Load(const char* base, const char* rel1, const char* rel2 );
0161 
0162     static NPFold* LoadNoData(const char* base);
0163     static NPFold* LoadNoData(const char* base, const char* rel );
0164     static NPFold* LoadNoData(const char* base, const char* rel1, const char* rel2 );
0165 
0166 
0167     static NPFold* LoadProp(const char* rel0, const char* rel1=nullptr );
0168 
0169     static int Compare(const NPFold* a, const NPFold* b );
0170     static std::string DescCompare(const NPFold* a, const NPFold* b );
0171 
0172 
0173     // CTOR
0174     NPFold();
0175 
0176     void set_verbose( bool v=true );
0177     void set_skipdelete( bool v=true );
0178     void set_allowempty( bool v=true );
0179     void set_allowonlymeta( bool v=true );
0180 
0181     void set_verbose_r( bool v=true );
0182     void set_skipdelete_r( bool v=true );
0183     void set_allowempty_r( bool v=true );
0184     void set_allowonlymeta_r( bool v=true );
0185 
0186     enum { SET_VERBOSE, SET_SKIPDELETE, SET_ALLOWEMPTY, SET_ALLOWONLYMETA } ;
0187     static int SetFlag_r(NPFold* nd, int flag, bool v);
0188 
0189 private:
0190     void check_integrity() const ;
0191 public:
0192 
0193     // [subfold handling
0194     NPFold*      add_subfold(char prefix=INTKEY_PREFIX);
0195     void         add_subfold(int ikey     , NPFold* fo, char prefix=INTKEY_PREFIX ); // integer key formatted with prefix
0196 
0197     static constexpr const char* _NPFold__add_subfold_ALLOW_DUPLICATE_KEY = "NPFold__add_subfold_ALLOW_DUPLICATE_KEY" ;
0198     void         add_subfold(const char* f, NPFold* fo );
0199 
0200     bool         has_zero_subfold() const ;
0201     int          get_num_subfold() const ;
0202     NPFold*      get_subfold(unsigned idx) const ;
0203 
0204     const char*  get_last_subfold_key() const ;
0205     const char*  get_subfold_key(unsigned idx) const ;
0206     int          get_subfold_idx(const char* f) const ;
0207     int          get_subfold_idx(const NPFold* fo) const ;
0208 
0209     int          get_key_idx(const char* k) const ;
0210     int          get_arr_idx(const NP* a) const ;
0211 
0212 
0213     const char*  get_subfold_key_within_parent() const ;
0214 
0215     void         get_treepath_(std::vector<std::string>& elem) const ;
0216     std::string  get_treepath(const char* k=nullptr) const ;
0217     static std::string Treepath(const NPFold* f);
0218 
0219     NPFold*      get_subfold(const char* f) const ;
0220     bool         has_subfold(const char* f) const ;
0221 
0222 
0223     void find_arrays_with_key(  std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key) const ;
0224     void find_arrays_with_key_r(std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key) const ;
0225     static void FindArraysWithKey_r(const NPFold* nd, std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key, int d);
0226     static std::string DescArraysAndPaths( std::vector<const NP*>& rr, std::vector<std::string>& tt );
0227 
0228 
0229     const NP*      find_array(const char* apath) const ;
0230     const NP*      find_array(const char* base, const char* name) const ;
0231 
0232     NPFold*        find_subfold_(const char* fpath) const  ;
0233     const NPFold*  find_subfold(const char* fpath) const  ;
0234 
0235 
0236     const void     find_subfold_with_all_keys(
0237         std::vector<const NPFold*>& subs,
0238         const char* keys,
0239         char delim=',' ) const ;
0240 
0241     const void     find_subfold_with_all_keys(
0242         std::vector<const NPFold*>& subs,
0243         std::vector<std::string>& names,
0244         const char* keys,
0245         char delim=',' ) const ;
0246 
0247 
0248     void get_subfold_with_intkey(std::vector<const NPFold*>& subs, char prefix) const ;
0249     bool all_subfold_with_intkey(char prefix) const ;
0250     void get_all_subfold_unique_keys(std::vector<std::string>& uks) const ;
0251 
0252 
0253 
0254     int concat_strict(std::ostream* out=nullptr);
0255     int concat(std::ostream* out=nullptr);
0256     NP* concat_(const char* k, std::ostream* out=nullptr);
0257     bool can_concat(std::ostream* out=nullptr) const ;
0258 
0259     int maxdepth() const ;
0260     static int MaxDepth_r(const NPFold* nd, int d);
0261 
0262 
0263     static constexpr const int MXD_NOLIMIT = 0 ;
0264     static int Traverse_r(
0265         const NPFold* nd,
0266         std::string nd_path,
0267         std::vector<const NPFold*>& folds,
0268         std::vector<std::string>& paths,
0269         int d,
0270         int mxd=MXD_NOLIMIT );
0271 
0272     static std::string FormSubPath(const char* base, const char* sub, char delim='/' );
0273 
0274     std::string desc_subfold(const char* top=TOP) const ;
0275     void find_subfold_with_prefix(
0276          std::vector<const NPFold*>& subs,
0277          std::vector<std::string>* subpaths,
0278          const char* prefix,
0279          int maxdepth ) const ;
0280 
0281     static std::string DescFoldAndPaths( const std::vector<const NPFold*>& subs, const std::vector<std::string>& subpaths );
0282 
0283     bool is_empty() const ;
0284     int total_items() const ;
0285 
0286     // ]subfold handling
0287 
0288 
0289     void add(int ikey, const NP* a, char prefix, int wid=3);
0290     void add( const char* k, const NP* a);
0291     void add_(const char* k, const NP* a);
0292     void set( const char* k, const NP* a);
0293 
0294     static void SplitKeys( std::vector<std::string>& elem , const char* keylist, char delim=',');
0295     static std::string DescKeys( const std::vector<std::string>& elem, char delim=',' );
0296 
0297     void clear();
0298 private:
0299     void clear_(const std::vector<std::string>* keep);
0300     void clear_arrays(const std::vector<std::string>* keep);
0301 public:
0302     void clear_subfold();
0303     void clear_only(  const char* clrlist=nullptr, bool copy=true, char delim=',');
0304     void clear_except(const char* keylist=nullptr, bool copy=true, char delim=',');
0305     void clear_except_(const std::vector<std::string>& keep, bool copy ) ;
0306 
0307 
0308     //OLD API: NPFold* copy( const char* keylist, bool shallow_array_copy, char delim=',' ) const ;
0309     NPFold* deepcopy(const char* keylist=nullptr, char delim=',' ) const ;
0310     NPFold* shallowcopy(const char* keylist=nullptr, char delim=',' ) const ;
0311 private:
0312     // make private to find them all and switch to above form
0313     NPFold* copy(   bool shallow_array_copy, const char* keylist=nullptr, char delim=',' ) const ;
0314 public:
0315     static NPFold* Copy(const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys );
0316     static void CopyMeta( NPFold* b , const NPFold* a );
0317     static void CopyArray(   NPFold* dst , const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys );
0318     static void CopySubfold( NPFold* dst , const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys );
0319 
0320     int count_keys( const std::vector<std::string>* keys ) const ;
0321 
0322 
0323     // single level (non recursive) accessors
0324 
0325     int num_items() const ;
0326     const char* get_key(unsigned idx) const ;
0327     const NP*   get_array(unsigned idx) const ;
0328 
0329     int find(const char* k) const ;
0330     bool has_key(const char* k) const ;
0331     bool has_all_keys(const char* keys, char delim=',') const ;
0332     bool has_all_keys(const std::vector<std::string>& qq) const ;
0333     int  count_keys(const std::vector<std::string>& qq) const ;
0334 
0335     const NP* get(const char* k) const ;
0336     NP*       get_(const char* k);
0337 
0338 
0339     const NP* get_optional(const char* k) const ;
0340     size_t get_num(const char* k) const ;   // number of items in array
0341     void  get_counts( std::vector<std::string>* keys, std::vector<size_t>* counts ) const ;
0342     static std::string DescCounts(const std::vector<std::string>& keys, const std::vector<size_t>& counts );
0343 
0344 
0345 
0346 
0347     template<typename T> T    get_meta(const char* key, T fallback=0) const ;  // for T=std::string must set fallback to ""
0348     std::string get_meta_string(const char* key) const ;  // empty when not found
0349 
0350     template<typename T> void set_meta(const char* key, T value ) ;
0351 
0352 
0353     int save(const char* base, const char* rel, const char* name) ;
0354     int save(const char* base, const char* rel) ;
0355     int save(const char* base) ;
0356     int save_verbose(const char* base) ;
0357 
0358     int _save_local_item_count() const ;
0359     int _save_local_meta_count() const ;
0360     int _save(const char* base) ;
0361 
0362     int  _save_arrays(const char* base);
0363     void _save_subfold_r(const char* base);
0364 
0365     void load_array(const char* base, const char* relp);
0366     void load_subfold(const char* base, const char* relp);
0367 
0368 #ifdef WITH_FTS
0369     static int FTS_Compare(const FTSENT** one, const FTSENT** two);
0370     int  no_longer_used_load_fts(const char* base) ;
0371 #endif
0372 
0373     int  load_dir(const char* base) ;
0374     static constexpr const char* load_dir_DUMP = "NPFold__load_dir_DUMP" ;
0375 
0376     int  load_index(const char* base) ;
0377     static constexpr const char* load_index_DUMP = "NPFold__load_index_DUMP" ;
0378 
0379     int load(const char* base ) ;
0380     static constexpr const char* load_DUMP = "NPFold__load_DUMP" ;
0381 
0382     int load(const char* base, const char* rel0, const char* rel1=nullptr ) ;
0383 
0384 
0385     std::string descKeys() const ;
0386     std::string desc() const ;
0387 
0388     std::string desc_(int depth) const ;
0389     std::string descf_(int depth) const ;
0390     std::string desc(int depth) const ;
0391 
0392     std::string descMetaKVS() const ;
0393     void getMetaKVS(std::vector<std::string>* keys, std::vector<std::string>* vals, std::vector<int64_t>* stamps, bool only_with_stamp ) const ;
0394     int  getMetaNumStamp() const ;
0395 
0396     std::string descMetaKV() const ;
0397     void getMetaKV(std::vector<std::string>* keys, std::vector<std::string>* vals, bool only_with_profile ) const ;
0398     int  getMetaNumProfile() const ;
0399 
0400     void setMetaKV(const std::vector<std::string>& keys, const std::vector<std::string>& vals) ;
0401 
0402     static std::string Indent(int width);
0403 
0404     std::string brief() const ;
0405     std::string stats() const ;
0406     std::string smry() const ;
0407 
0408     // STATIC CONVERTERS
0409 
0410     static void Import_MIMSD(            std::map<int,std::map<std::string,double>>& mimsd, const NPFold* f );
0411     static NPFold* Serialize_MIMSD(const std::map<int,std::map<std::string,double>>& mimsd);
0412     static std::string Desc_MIMSD( const std::map<int,std::map<std::string,double>>& mimsd);
0413 
0414 
0415     // SUMMARIZE FOLD ARRAY COUNTS
0416     NP* subcount( const char* prefix ) const ;
0417     static constexpr const char* subcount_DUMP = "NPFold__subcount_DUMP" ;
0418 
0419     NP* submeta(const char* prefix, const char* column=nullptr ) const ;
0420 
0421     // TIMESTAMP/PROFILE COMPARISON USING SUBFOLD METADATA
0422 
0423     NPFold* substamp(  const char* prefix, const char* keyname) const ;
0424     static constexpr const char* substamp_DUMP = "NPFold__substamp_DUMP" ;
0425 
0426     NPFold* subprofile(const char* prefix, const char* keyname) const ;
0427     static constexpr const char* subprofile_DUMP = "NPFold__subprofile_DUMP" ;
0428 
0429     template<typename ... Args>
0430     NPFold* subfold_summary(const char* method, Args ... args_  ) const  ;
0431     static constexpr const char* subfold_summary_DUMP = "NPFold__subfold_summary_DUMP" ;
0432 
0433     template<typename F, typename T>
0434     NP* compare_subarrays(const char* key, const char* asym="a", const char* bsym="b", std::ostream* out=nullptr  );
0435 
0436     template<typename F, typename T>
0437     std::string compare_subarrays_report(const char* key, const char* asym="a", const char* bsym="b" );
0438 
0439 
0440     static void Subkey(std::vector<std::string>& ukey, const std::vector<const NPFold*>& subs );
0441     static void SubCommonKV(
0442         std::vector<std::string>& okey,
0443         std::vector<std::string>& ckey,
0444         std::vector<std::string>& cval,
0445         const std::vector<const NPFold*>& subs );
0446 
0447     static std::string DescCommonKV(
0448          const std::vector<std::string>& okey,
0449          const std::vector<std::string>& ckey,
0450          const std::vector<std::string>& cval );
0451 
0452 };
0453 
0454 
0455 inline bool NPFold::IsNPY(const char* k) { return HasSuffix(k, DOT_NPY) ; }
0456 inline bool NPFold::IsTXT(const char* k) { return HasSuffix(k, DOT_TXT) ; }
0457 inline bool NPFold::IsPNG(const char* k) { return HasSuffix(k, DOT_PNG) ; }
0458 inline bool NPFold::IsJPG(const char* k) { return HasSuffix(k, DOT_JPG) ; }
0459 
0460 
0461 inline bool NPFold::HasSuffix(const char* k, const char* s )
0462 {
0463     return k && s && strlen(k) >= strlen(s) && strncmp( k + strlen(k) - strlen(s), s, strlen(s)) == 0 ;
0464 }
0465 inline bool NPFold::HasPrefix( const char* k, const char* p )
0466 {
0467     return k && p && strlen(p) <= strlen(k) && strncmp(k, p, strlen(p)) == 0 ;
0468 }
0469 
0470 /**
0471 NPFold::BareKey
0472 ----------------
0473 
0474 For keys ending with DOT_NPY ".npy" or DOT_TXT ".txt"
0475 this returns the key without the extension.
0476 
0477 **/
0478 
0479 inline const char* NPFold::BareKey(const char* k)
0480 {
0481     char* bk = strdup(k);
0482     if(IsNPY(bk) || IsTXT(bk)) bk[strlen(bk)-4] = '\0' ;
0483     return bk ;
0484 }
0485 
0486 
0487 /**
0488 NPFold::FormKey
0489 -----------------
0490 
0491 If added keys do not end with the DOT_NPY ".npy" then the DOT_NPY is added prior to collection.
0492 
0493 Note that even when collecting arrays created from txt files, such as with SProp.h
0494 where files would have no extension (or .txt extension) it is still appropriate
0495 to add the DOT_NPY .npy  to the NPFold in preparation for subsequent saving
0496 and for the simplicity of consistency.
0497 
0498 Empty arrays with argument key ending with .txt are assumed
0499 to be NPX::Holder placeholder arrays that act to carry vectors
0500 of strings in the array names metadata. The extension is
0501 swapped to .npy for more standard handling.
0502 
0503 **/
0504 
0505 
0506 inline std::string NPFold::FormKey(const char* k, bool change_txt_to_npy)
0507 {
0508     bool is_npy = IsNPY(k);
0509     bool is_txt = IsTXT(k);
0510 
0511     std::stringstream ss ;
0512 
0513     if(change_txt_to_npy && is_txt)
0514     {
0515         const char* bk = BareKey(k) ;
0516         ss << bk << DOT_NPY ;
0517     }
0518     else
0519     {
0520         ss << k ;
0521         if(!is_npy) ss << DOT_NPY ;
0522     }
0523 
0524 
0525     std::string str = ss.str();
0526     return str ;
0527 }
0528 
0529 inline std::string NPFold::FormKey(const char* k)
0530 {
0531     bool is_npy = IsNPY(k);
0532     std::stringstream ss ;
0533     ss << k ;
0534     if(!is_npy) ss << DOT_NPY ;
0535     std::string str = ss.str();
0536     return str ;
0537 }
0538 
0539 
0540 inline bool NPFold::IsValid(const NPFold* fold) // static
0541 {
0542     return fold && !fold->is_empty() ;
0543 }
0544 
0545 inline NPFold* NPFold::LoadIfExists(const char* base) // static
0546 {
0547     return Exists(base) ? Load(base) : nullptr ;
0548 }
0549 
0550 inline bool NPFold::Exists(const char* base) // static
0551 {
0552     return NP::Exists(base, INDEX);
0553 }
0554 inline NPFold* NPFold::Load_(const char* base )
0555 {
0556     if(base == nullptr) return nullptr ;
0557     NPFold* nf = new NPFold ;
0558     nf->load(base);
0559     return nf ;
0560 }
0561 
0562 /**
0563 NPFold::LoadNoData_
0564 --------------------
0565 
0566 **/
0567 
0568 inline NPFold* NPFold::LoadNoData_(const char* base_ )
0569 {
0570     if(base_ == nullptr) return nullptr ;
0571     const char* base = NP::PathWithNoDataPrefix(base_);
0572     NPFold* nf = new NPFold ;
0573     nf->load(base);
0574     return nf ;
0575 }
0576 
0577 inline const char* NPFold::Resolve(const char* base_, const char* rel1_, const char* rel2_ )
0578 {
0579     const char* base = U::Resolve(base_, rel1_, rel2_ );
0580     if(base == nullptr) std::cerr
0581         << "NPFold::Resolve"
0582         << " FAILED "
0583         << " base_ " << ( base_ ? base_ : "-" )
0584         << " rel1_ " << ( rel1_ ? rel1_ : "-" )
0585         << " rel2_ " << ( rel2_ ? rel2_ : "-" )
0586         << " POSSIBLY UNDEFINED ENVVAR TOKEN "
0587         << std::endl
0588         ;
0589     return base ;
0590 }
0591 
0592 
0593 inline NPFold* NPFold::Load(const char* base_)
0594 {
0595     const char* base = Resolve(base_);
0596     return Load_(base);
0597 }
0598 inline NPFold* NPFold::Load(const char* base_, const char* rel_)
0599 {
0600     const char* base = Resolve(base_, rel_);
0601     return Load_(base);
0602 }
0603 inline NPFold* NPFold::Load(const char* base_, const char* rel1_, const char* rel2_ )
0604 {
0605     const char* base = Resolve(base_, rel1_, rel2_ );
0606     return Load_(base);
0607 }
0608 
0609 
0610 
0611 inline NPFold* NPFold::LoadNoData(const char* base_)
0612 {
0613     const char* base = Resolve(base_);
0614     return LoadNoData_(base);
0615 }
0616 inline NPFold* NPFold::LoadNoData(const char* base_, const char* rel_)
0617 {
0618     const char* base = Resolve(base_, rel_);
0619     return LoadNoData_(base);
0620 }
0621 inline NPFold* NPFold::LoadNoData(const char* base_, const char* rel1_, const char* rel2_ )
0622 {
0623     const char* base = Resolve(base_, rel1_, rel2_ );
0624     return LoadNoData_(base);
0625 }
0626 
0627 
0628 
0629 
0630 
0631 
0632 
0633 inline NPFold* NPFold::LoadProp(const char* rel0, const char* rel1 )
0634 {
0635     const char* base = getenv(kNP_PROP_BASE) ;
0636     NPFold* nf = new NPFold ;
0637     nf->load(base ? base : "/tmp", rel0, rel1 );
0638     return nf ;
0639 }
0640 
0641 inline int NPFold::Compare(const NPFold* a, const NPFold* b )
0642 {
0643     int na = a->num_items();
0644     int nb = b->num_items();
0645     bool item_match = na == nb ;
0646     if(!item_match ) return -1 ;
0647 
0648     int mismatch = 0 ;
0649     for(int i=0 ; i < na ; i++)
0650     {
0651         const char* a_key = a->get_key(i);
0652         const char* b_key = b->get_key(i);
0653         const NP*   a_arr = a->get_array(i);
0654         const NP*   b_arr = b->get_array(i);
0655 
0656         bool key_match = strcmp(a_key, b_key) == 0 ;
0657         bool arr_match = NP::Memcmp(a_arr, b_arr) == 0 ;
0658 
0659         if(!key_match) mismatch += 1  ;
0660         if(!key_match) std::cout
0661             << "NPFold::Compare KEY_MATCH FAIL"
0662             << " a_key " << std::setw(40) << a_key
0663             << " b_key " << std::setw(40) << b_key
0664             << " key_match " << key_match
0665             << std::endl
0666             ;
0667 
0668         if(!arr_match) mismatch += 1 ;
0669         if(!arr_match) std::cout
0670             << "NPFold::Compare ARR_MATCH FAIL"
0671             << " a_arr " << std::setw(40) << a_arr->sstr()
0672             << " b_arr " << std::setw(40) << b_arr->sstr()
0673             << " arr_match " << arr_match
0674             << std::endl
0675             ;
0676     }
0677     if(mismatch > 0) std::cout << "NPFold::Compare mismatch " << mismatch << std::endl ;
0678     return mismatch ;
0679 }
0680 
0681 
0682 
0683 
0684 inline std::string NPFold::DescCompare(const NPFold* a, const NPFold* b )
0685 {
0686     int na = a ? a->num_items() : -1 ;
0687     int nb = b ? b->num_items() : -1 ;
0688     bool item_match = na == nb ;
0689 
0690     std::stringstream ss ;
0691     ss << "NPFold::DescCompare"
0692        << " a " << ( a ? "Y" : "N" )
0693        << " b " << ( b ? "Y" : "N" )
0694        << std::endl
0695        << " na " << na
0696        << " nb " << nb
0697        << " item_match " << item_match
0698        << std::endl
0699        << " a.desc "
0700        << std::endl
0701        << ( a ? a->desc() : "-" )
0702        << " b.desc "
0703        << std::endl
0704        << ( b ? b->desc() : "-" )
0705        << std::endl
0706        ;
0707     std::string s = ss.str();
0708     return s;
0709 }
0710 
0711 
0712 
0713 // CTOR
0714 
0715 inline NPFold::NPFold()
0716     :
0717     kk(),
0718     aa(),
0719     ff(),
0720     subfold(),
0721     headline(),
0722     meta(),
0723     names(),
0724     savedir(nullptr),
0725     loaddir(nullptr),
0726     nodata(false),
0727     verbose_(VERBOSE),
0728     allowempty(ALLOWEMPTY),
0729     allowonlymeta(ALLOWONLYMETA),
0730     skipdelete(SKIPDELETE),
0731     parent(PARENT)
0732 {
0733     if(verbose_) std::cerr << "NPFold::NPFold" << std::endl ;
0734 }
0735 
0736 
0737 inline void NPFold::set_verbose( bool v )
0738 {
0739      verbose_ = v ;
0740 }
0741 inline void NPFold::set_skipdelete( bool v )
0742 {
0743     skipdelete = v ;
0744 }
0745 inline void NPFold::set_allowempty( bool v )
0746 {
0747     allowempty = v ;
0748 }
0749 inline void NPFold::set_allowonlymeta( bool v )
0750 {
0751     allowonlymeta = v ;
0752 }
0753 
0754 
0755 
0756 inline void NPFold::set_verbose_r( bool v )
0757 {
0758     SetFlag_r(this, SET_VERBOSE, v);
0759 }
0760 inline void NPFold::set_skipdelete_r( bool v )
0761 {
0762     SetFlag_r(this, SET_SKIPDELETE, v);
0763 }
0764 inline void NPFold::set_allowempty_r( bool v )
0765 {
0766     SetFlag_r(this, SET_ALLOWEMPTY, v);
0767 }
0768 inline void NPFold::set_allowonlymeta_r( bool v )
0769 {
0770     SetFlag_r(this, SET_ALLOWONLYMETA, v);
0771 }
0772 
0773 
0774 
0775 inline int NPFold::SetFlag_r(NPFold* nd, int flag, bool v)
0776 {
0777     switch(flag)
0778     {
0779         case SET_VERBOSE   : nd->set_verbose(v)    ; break ;
0780         case SET_SKIPDELETE: nd->set_skipdelete(v) ; break ;
0781         case SET_ALLOWEMPTY: nd->set_allowempty(v) ; break ;
0782         case SET_ALLOWONLYMETA: nd->set_allowonlymeta(v) ; break ;
0783     }
0784 
0785     int tot_fold = 1 ;
0786 
0787     assert( nd->subfold.size() == nd->ff.size() );
0788     int num_sub = nd->subfold.size();
0789     for(int i=0 ; i < num_sub ; i++)
0790     {
0791         NPFold* sub = nd->subfold[i] ;
0792         int num = SetFlag_r( sub, flag, v );
0793         tot_fold += num ;
0794     }
0795     return tot_fold ;
0796 }
0797 
0798 
0799 
0800 
0801 
0802 /**
0803 NPFold::check_integrity
0804 --------------------------
0805 
0806 check_integrity of key and array vectors and similarly for subfold (non-recursive)
0807 
0808 **/
0809 
0810 inline void NPFold::check_integrity() const
0811 {
0812     assert( kk.size() == aa.size() );
0813     assert( ff.size() == subfold.size() );
0814 }
0815 
0816 
0817 
0818 
0819 
0820 // [ subfold handling
0821 
0822 inline NPFold* NPFold::add_subfold(char prefix)
0823 {
0824     int ikey = subfold.size();
0825     NPFold* sub = new NPFold ;
0826     add_subfold(ikey, sub, prefix );
0827     return sub ;
0828 }
0829 
0830 inline void NPFold::add_subfold(int ikey, NPFold* sub, char prefix )
0831 {
0832     int wid = 3 ;
0833     std::string skey = U::FormNameWithPrefix(prefix, ikey, wid);
0834     add_subfold(skey.c_str(), sub );
0835 }
0836 
0837 /**
0838 NPFold::add_subfold
0839 --------------------
0840 
0841 CAUTION : this simply collects keys and NPFold pointers
0842 into vectors, NO COPYING IS DONE.
0843 However, clearing the fold will delete arrays within the fold.
0844 Because of this beware of stale input pointers after clearing.
0845 
0846 * Regard subfold added to an NPFold to belong to the fold
0847 * Do not do silly things like adding the same pointer more than once
0848 
0849 **/
0850 
0851 
0852 inline void NPFold::add_subfold(const char* f, NPFold* fo )
0853 {
0854     if(fo == nullptr) return ;
0855 
0856     bool unique_f  = std::find( ff.begin(), ff.end(), f ) == ff.end() ;
0857     bool unique_fo = std::find( subfold.begin(), subfold.end(), fo ) == subfold.end() ;
0858 
0859 
0860     int ALLOW_DUPLICATE_KEY = U::GetEnvInt(_NPFold__add_subfold_ALLOW_DUPLICATE_KEY, 0 );
0861 
0862     if(!unique_f) std::cerr
0863        << "NPFold::add_subfold"
0864        << " ERROR repeated subfold key f[" << ( f ? f : "-" ) << "]"
0865        << " ff.size " << ff.size()
0866        << "[" << _NPFold__add_subfold_ALLOW_DUPLICATE_KEY << "] " << ALLOW_DUPLICATE_KEY
0867        << "\n"
0868        ;
0869 
0870     if( !unique_f && ALLOW_DUPLICATE_KEY == 0 )
0871     {
0872         assert( unique_f ) ;
0873         std::raise(SIGINT);   // release builds remove assert
0874     }
0875 
0876     if(!unique_fo) std::cerr
0877        << "NPFold::add_subfold"
0878        << " ERROR repeated subfold pointer with key f[" << ( f ? f : "-" ) << "]"
0879        << " subfold.size " << subfold.size()
0880        << "\n"
0881        ;
0882     assert( unique_fo ) ;
0883 
0884 
0885 
0886     ff.push_back(f); // subfold keys
0887     subfold.push_back(fo);
0888 
0889     if(fo->parent != nullptr)
0890     {
0891         std::string fo_treepath = fo->get_treepath();
0892         std::string fo_parent_treepath = fo->parent->get_treepath();
0893         std::string this_treepath = get_treepath();
0894 
0895         std::cerr
0896             << "NPFold::add_subfold "
0897             << " WARNING changing parent of added subfold fo \n"
0898             << " fo.treepath [" << fo_treepath << "]\n"
0899             << " fo.parent.treepath [" << fo_parent_treepath << "]\n"
0900             << " this.treepath [" << this_treepath << "]\n"
0901             << "\n"
0902             ;
0903     }
0904     assert( fo->parent == nullptr );
0905     fo->parent = this ;
0906 }
0907 
0908 
0909 inline bool NPFold::has_zero_subfold() const
0910 {
0911     return 0 == get_num_subfold();
0912 }
0913 inline int NPFold::get_num_subfold() const
0914 {
0915     assert( ff.size() == subfold.size() );
0916     return ff.size();
0917 }
0918 inline NPFold* NPFold::get_subfold(unsigned idx) const
0919 {
0920     return idx < subfold.size() ? subfold[idx] : nullptr ;
0921 }
0922 
0923 
0924 
0925 inline const char* NPFold::get_last_subfold_key() const
0926 {
0927     return ff.size() > 0 ? ff[ff.size()-1].c_str() : nullptr ;
0928 }
0929 
0930 inline const char* NPFold::get_subfold_key(unsigned idx) const
0931 {
0932     return idx < ff.size() ? ff[idx].c_str() : nullptr ;
0933 }
0934 inline int NPFold::get_subfold_idx(const char* f) const
0935 {
0936     size_t idx = std::distance( ff.begin(), std::find( ff.begin(), ff.end(), f )) ;
0937     return idx < ff.size() ? idx : UNDEF ;
0938 }
0939 inline int NPFold::get_subfold_idx(const NPFold* fo) const
0940 {
0941     size_t idx = std::distance( subfold.begin(), std::find( subfold.begin(), subfold.end(), fo )) ;
0942     return idx < subfold.size() ? idx : UNDEF ;
0943 }
0944 
0945 inline int NPFold::get_key_idx(const char* k) const
0946 {
0947     size_t idx = std::distance( kk.begin(), std::find( kk.begin(), kk.end(), k )) ;
0948     return idx < kk.size() ? idx : UNDEF ;
0949 }
0950 inline int NPFold::get_arr_idx(const NP* a) const
0951 {
0952     size_t idx = std::distance( aa.begin(), std::find( aa.begin(), aa.end(), a )) ;
0953     return idx < aa.size() ? idx : UNDEF ;
0954 }
0955 
0956 
0957 
0958 inline const char* NPFold::get_subfold_key_within_parent() const
0959 {
0960     int idx = parent ? parent->get_subfold_idx(this) : -1 ;
0961     return idx == -1 ? nullptr : parent->get_subfold_key(idx) ;
0962 }
0963 
0964 inline void NPFold::get_treepath_(std::vector<std::string>& elem) const
0965 {
0966      const NPFold* n = this ;
0967      while(n)
0968      {
0969          const char* sk = n->get_subfold_key_within_parent() ;
0970          elem.push_back(sk ? sk : "");
0971          n = n->parent ;
0972      }
0973 }
0974 inline std::string NPFold::get_treepath(const char* k) const
0975 {
0976     std::vector<std::string> elem ;
0977     get_treepath_(elem);
0978     std::reverse(elem.begin(), elem.end());
0979     std::stringstream ss ;
0980     int num_elem = elem.size();
0981     for(int i=0 ; i < num_elem ; i++ ) ss << elem[i] << ( i < num_elem - 1 ? "/" : "" ) ;
0982     if(k) ss << "/" << k ;
0983     std::string str = ss.str() ;
0984     return str ;
0985 }
0986 
0987 /**
0988 NPFold::Treepath
0989 -----------------
0990 
0991 Disconnected fold has empty string "" treepath, see::
0992 
0993      TEST=subcopy ~/np/tests/NPFold_copy_test.sh
0994 
0995      NPFold::Treepath(zzz)   : {/z/zz}
0996      NPFold::Treepath(zzz_c) : {}
0997 
0998 
0999 **/
1000 
1001 inline std::string NPFold::Treepath(const NPFold* f)
1002 {
1003     std::stringstream ss ;
1004     ss << "{" << ( f ? f->get_treepath() : "-" ) << "}" ;
1005     std::string str = ss.str() ;
1006     return str ;
1007 }
1008 
1009 
1010 inline NPFold* NPFold::get_subfold(const char* f) const
1011 {
1012     int idx = get_subfold_idx(f) ;
1013     return idx == UNDEF ? nullptr : get_subfold(idx) ;
1014 }
1015 inline bool NPFold::has_subfold(const char* f) const
1016 {
1017     int idx = get_subfold_idx(f) ;
1018     return idx != UNDEF ;
1019 }
1020 
1021 
1022 /**
1023 NPFold::find_arrays_with_key
1024 -----------------------------
1025 
1026 Collect arrays and treepaths within this fold that have the query key.
1027 Would normally expect either zero or one entries.
1028 
1029 The query key has ".npy" appended if not already present.
1030 
1031 **/
1032 
1033 inline void NPFold::find_arrays_with_key(std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key) const
1034 {
1035     std::string q = FormKey(q_key);
1036     for(int i=0 ; i < int(kk.size()) ; i++)
1037     {
1038         const char* k = kk[i].c_str();
1039         const NP* a = aa[i] ;
1040         bool qk_match = strcmp(q.c_str(), k) == 0 ;
1041 
1042         if(qk_match)
1043         {
1044             std::string t = get_treepath(k);
1045             rr.push_back(a);
1046             tt.push_back(t);
1047         }
1048     }
1049 }
1050 
1051 /**
1052 NPFold::find_arrays_with_key_r
1053 --------------------------------
1054 
1055 Recursively collect arrays and treepaths within the entire tree of folders that
1056 have the query key.
1057 
1058 **/
1059 
1060 inline void NPFold::find_arrays_with_key_r(std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key) const
1061 {
1062     FindArraysWithKey_r(this, rr, tt, q_key, 0);
1063 }
1064 
1065 inline void NPFold::FindArraysWithKey_r(const NPFold* nd, std::vector<const NP*>& rr, std::vector<std::string>& tt, const char* q_key, int d)
1066 {
1067     nd->find_arrays_with_key(rr, tt, q_key);
1068     for(int i=0 ; i < int(nd->subfold.size()) ; i++ ) FindArraysWithKey_r(nd->subfold[i], rr, tt, q_key, d+1 );
1069 }
1070 
1071 inline std::string NPFold::DescArraysAndPaths( std::vector<const NP*>& rr, std::vector<std::string>& tt ) // static
1072 {
1073     assert( rr.size() == tt.size() );
1074     int num = rr.size() ;
1075     std::stringstream ss ;
1076     ss << "NPFold::DescArraysAndPaths num " << num << "\n" ;
1077     for(int i=0 ; i < num ; i++ )
1078     {
1079         const NP* r = rr[i] ;
1080         const char* t = tt[i].c_str() ;
1081         ss
1082            << std::setw(10) << r->sstr()
1083            << " : "
1084            << std::setw(30) << ( t ? t : "-" )
1085            << " : "
1086            << r
1087            << "\n"
1088            ;
1089     }
1090     std::string str = ss.str() ;
1091     return str ;
1092 }
1093 
1094 
1095 
1096 
1097 
1098 /**
1099 NPFold::find_array
1100 --------------------
1101 
1102 0. split apath into base and name
1103 1. find the subfold using *base*
1104 2. get the array from the subfold
1105 
1106 **/
1107 
1108 inline const NP* NPFold::find_array(const char* apath) const
1109 {
1110     std::string base = U::DirName(apath);
1111     std::string name = U::BaseName(apath);
1112     return find_array( base.c_str(), name.c_str()) ;
1113 }
1114 
1115 inline const NP* NPFold::find_array(const char* base, const char* name) const
1116 {
1117     const NPFold* fold = find_subfold(base);
1118     return fold ? fold->get(name) : nullptr  ;
1119 }
1120 
1121 inline NPFold* NPFold::find_subfold_(const char* qpath) const
1122 {
1123     const NPFold* f = find_subfold(qpath) ;
1124     return const_cast<NPFold*>(f) ;
1125 }
1126 
1127 /**
1128 NPFold::find_subfold using full subfold qpath, start path is ""
1129 ----------------------------------------------------------------
1130 
1131 0. recursively collects vectors of folds and paths
1132 1. attempts to match the qpath with the vector or paths to get the idx
1133 2. returns the subfold or nullptr if not found
1134 
1135 **/
1136 
1137 inline const NPFold* NPFold::find_subfold(const char* qpath) const
1138 {
1139     std::vector<const NPFold*> folds ;
1140     std::vector<std::string>   paths ;
1141     Traverse_r( this, "",  folds, paths, 0, MXD_NOLIMIT  );
1142     size_t idx = std::distance( paths.begin(), std::find( paths.begin(), paths.end(), qpath ) ) ;
1143 
1144     if(VERBOSE)
1145     {
1146         std::cout
1147             << "NPFold::find_subfold"
1148             << " qpath[" << qpath << "]" << std::endl
1149             << " paths.size " << paths.size() << std::endl
1150             << " folds.size " << folds.size() << std::endl
1151             << " idx " << idx << std::endl
1152             ;
1153 
1154         for(unsigned i=0 ; i < paths.size() ; i++)
1155             std::cout << i << " [" << paths[i].c_str() << "]" << std::endl ;
1156     }
1157     return idx < paths.size() ? folds[idx] : nullptr ;
1158 }
1159 
1160 
1161 
1162 
1163 inline const void NPFold::find_subfold_with_all_keys(
1164     std::vector<const NPFold*>& subs,
1165     const char* keys_,
1166     char delim ) const
1167 {
1168     int num_sub = get_num_subfold();
1169     for(int i=0 ; i < num_sub ; i++)
1170     {
1171         const NPFold* sub = get_subfold(i) ;
1172         if(sub->has_all_keys(keys_, delim)) subs.push_back(sub) ;
1173     }
1174 }
1175 
1176 inline const void NPFold::find_subfold_with_all_keys(
1177     std::vector<const NPFold*>& subs,
1178     std::vector<std::string>&   names,
1179     const char* keys_,
1180     char delim ) const
1181 {
1182     int num_sub = get_num_subfold();
1183     for(int i=0 ; i < num_sub ; i++)
1184     {
1185         const NPFold* sub = get_subfold(i) ;
1186         const char* name = get_subfold_key(i) ;
1187         if(sub->has_all_keys(keys_, delim))
1188         {
1189             subs.push_back(sub) ;
1190             names.push_back(name) ;
1191         }
1192     }
1193 }
1194 
1195 /**
1196 NPFold::get_subfold_with_intkey
1197 ---------------------------------
1198 
1199 Examples of intkey with prefix 'f': f000 f001
1200 
1201 **/
1202 
1203 inline void NPFold::get_subfold_with_intkey(std::vector<const NPFold*>& subs, char prefix) const
1204 {
1205     int num_sub = subfold.size();
1206     for(int i=0 ; i < num_sub ; i++)
1207     {
1208         const NPFold* sub = subfold[i] ;
1209         const std::string& fk = ff[i] ;
1210         const char* fkk = fk.c_str();
1211         if( strlen(fkk) > 1 && fkk[0] == prefix && U::IsIntegerString(fkk+1) ) subs.push_back(sub);
1212     }
1213 }
1214 inline bool NPFold::all_subfold_with_intkey(char prefix) const
1215 {
1216     std::vector<const NPFold*> subs ;
1217     get_subfold_with_intkey(subs, prefix);
1218     return subfold.size() == subs.size();
1219 }
1220 
1221 inline void NPFold::get_all_subfold_unique_keys(std::vector<std::string>& uk) const
1222 {
1223     int num_sub = subfold.size();
1224     for(int i=0 ; i < num_sub ; i++)
1225     {
1226         const NPFold* sub = subfold[i] ;
1227         int num_k = sub->kk.size();
1228         for(int j=0 ; j < num_k ; j++)
1229         {
1230             const std::string& k = sub->kk[j];
1231             if(std::find(uk.begin(), uk.end(), k) == uk.end()) uk.push_back(k);
1232         }
1233     }
1234 }
1235 
1236 
1237 
1238 
1239 
1240 
1241 
1242 /**
1243 NPFold::concat_strict
1244 ----------------------
1245 
1246 Concatenate common arrays from level 1 subfolds
1247 into the top level fold.
1248 
1249 Typically usage::
1250 
1251     fold->concat() ;
1252     fold->clear_subfold();
1253 
1254 Invoking *clear_subfold* after *concat* tidies up
1255 resources used for the sub arrays halving the memory
1256 usage.
1257 
1258 The trivial case of a single subfold is handled by simply
1259 adding the subfold array pointers at top level and
1260 invoking NPFold::set_skipdelete on the subfold to
1261 prevent the active arrays from being deleted by clear_subfold.
1262 
1263 Note that skipdelete is not called at top level, so the resources
1264 will be released when the top level is cleared.
1265 
1266 Future
1267 ~~~~~~~~
1268 
1269 This approach is simple but transiently requires twice the end state memory.
1270 Other more progressive approaches might avoid being so memory expensive.
1271 Example of progressive approach would be to add an NPFold to another
1272 one by one.
1273 
1274 Actually for the application of combining arrays from multiple launches,
1275 there may be little advantage with more involved approaches
1276 as probably the number of launches will normally be 1 and sometimes 2 or 3.
1277 
1278 **/
1279 
1280 inline int NPFold::concat_strict(std::ostream* out)
1281 {
1282     bool zero_sub = has_zero_subfold();
1283     if(out) *out << "NPFold::concat_strict zero_sub " << ( zero_sub ? "YES" : "NO " ) << "\n" ;
1284     if(zero_sub)
1285     {
1286         if(out) *out << "NPFold::concat_strict zero_sub TRIVIAL NOTHING TO CONCAT \n" ;
1287         return 0 ;
1288     }
1289 
1290     bool can = can_concat(out);
1291     if(!can)
1292     {
1293         std::cerr << "NPFold::concat_strict can_concat FAIL : problem with subfold ? return 1 \n";
1294         return 1 ;
1295     }
1296 
1297 
1298     int num_sub = subfold.size();
1299     const NPFold* sub0 = num_sub > 0 ? subfold[0] : nullptr  ;
1300     const std::vector<std::string>* kk0 = sub0 ? &(sub0->kk) : nullptr ;
1301 
1302     int num_k = kk0 ? kk0->size() : 0 ;
1303 
1304     if(out) *out
1305         << "NPFold::concat_strict"
1306         << " num_sub " << num_sub
1307         << " num_k " << num_k
1308         << "\n"
1309         ;
1310 
1311     for(int i=0 ; i < num_k ; i++)
1312     {
1313         const char* k = (*kk0)[i].c_str();
1314         NP* a = concat_(k, out );
1315 
1316         if(out) *out
1317             << "NPFold::concat_strict"
1318             << " k " << ( k ? k : "-" )
1319             << " a " << ( a ? a->sstr() : "-" )
1320             << "\n"
1321             ;
1322 
1323         add(k, a);
1324     }
1325     return 0 ;
1326 }
1327 
1328 inline int NPFold::concat(std::ostream* out)
1329 {
1330     bool zero_sub = has_zero_subfold();
1331     if(out) *out << "NPFold::concat zero_sub " << ( zero_sub ? "YES" : "NO " ) << "\n" ;
1332     if(zero_sub)
1333     {
1334         if(out) *out << "NPFold::concat zero_sub TRIVIAL NOTHING TO CONCAT \n" ;
1335         return 0 ;
1336     }
1337 
1338     std::vector<std::string> uk ;
1339     get_all_subfold_unique_keys(uk);
1340 
1341     int num_uk = uk.size() ;
1342 
1343     if(out) *out
1344         << "NPFold::concat"
1345         << " subfold.size " << subfold.size()
1346         << " num_uk " << num_uk
1347         << "\n"
1348         ;
1349 
1350 
1351     for(int i=0 ; i < num_uk ; i++)
1352     {
1353         const char* k = uk[i].c_str();
1354 
1355         NP* a = concat_(k, out );
1356 
1357         if(out) *out
1358             << "NPFold::concat"
1359             << " k " << ( k ? k : "-" )
1360             << " a " << ( a ? a->sstr() : "-" )
1361             << "\n"
1362             ;
1363 
1364         add(k, a);
1365     }
1366 
1367     return 0 ;
1368 }
1369 
1370 
1371 
1372 
1373 
1374 
1375 /**
1376 NPFold::concat_
1377 ----------------
1378 
1379 Concatenates arrays with key *k* from all immediate subfold.
1380 
1381 When there is only one subfold the concat is trivially
1382 done by adding subfold arrays to this fold.
1383 However in that situation the subfold must be
1384 marked skipdelete to prevent clear_subfold which
1385 is recommended after concat from deleting the
1386 active top level array.
1387 
1388 **/
1389 
1390 inline NP* NPFold::concat_(const char* k, std::ostream* out)
1391 {
1392     int num_sub = subfold.size();
1393     if( num_sub == 0 ) return nullptr ;
1394 
1395     NP* a = nullptr ;
1396     if( num_sub == 1 )
1397     {
1398         NPFold* sub0 = subfold[0] ;
1399         const NP* a0 = sub0->get(k);
1400         a = const_cast<NP*>(a0) ;
1401         sub0->set_skipdelete(true);
1402         if(out) *out << "NPFold::concat_ trivial concat set skipdelete on the single subfold \n" ;
1403     }
1404     else if( num_sub > 1 )
1405     {
1406         std::vector<const NP*> aa ;
1407         for(int i=0 ; i < num_sub ; i++)
1408         {
1409             const NPFold* sub = subfold[i] ;
1410             const NP* asub = sub->get(k);
1411             if(asub) aa.push_back(asub);
1412         }
1413         if(out) *out << "NPFold::concat_ non-trivial concat \n" ;
1414         a = NP::Concatenate(aa);
1415     }
1416     return a ;
1417 }
1418 
1419 
1420 
1421 
1422 
1423 /**
1424 NPFold::can_concat
1425 -------------------
1426 
1427 Require:
1428 
1429 1. two level tree of subfold, ie maxdepth is 1
1430 2. integer string keys with f prefix (for python identifier convenience) giving the concat order
1431 3. common array keys in all subfold::
1432 
1433     f000/[a.npy,b.npy,c.npy]
1434     f001/[a.npy,b.npy,c.npy]
1435     f002/[a.npy,b.npy,c.npy]
1436 
1437 4. all common array keys must not be present within the top level folder
1438 
1439 
1440 The trivial case of a single subfold is handled
1441 simply by adding the subfold pointers to this
1442 fold and calling NPFold::set_skipdelete on the
1443 single subfold.
1444 
1445 **/
1446 
1447 inline bool NPFold::can_concat(std::ostream* out) const
1448 {
1449     int d = maxdepth();
1450     if(out) *out << "NPFold::can_concat maxdepth " << d << "\n" ;
1451     if(d != 1) return false ;
1452 
1453     int num_sub = subfold.size();
1454     if(out) *out << "NPFold::can_concat num_sub " << num_sub << "\n" ;
1455     if(num_sub == 0) return false ;  // ACTUALLY : THATS TRIVIAL CASE
1456 
1457     char prefix = INTKEY_PREFIX ;
1458     bool all_intkey = all_subfold_with_intkey(prefix);
1459     if(out) *out << "NPFold::can_concat all_intkey " << ( all_intkey ? "YES" : "NO " )  << "\n" ;
1460     if(!all_intkey) return false ;
1461 
1462     const NPFold* sub0 = subfold[0] ;
1463     const std::vector<std::string>& kk0 = sub0->kk ;
1464     // reliance on the first sub being complete is not appropriate
1465     // need to collect unique keys from all subs
1466     // and combine as they are available
1467 
1468     int num_top_kk0 = count_keys(kk0);
1469     if(out) *out << "NPFold::can_concat num_top_kk0 " << num_top_kk0 << "\n" ;
1470     if(num_top_kk0 > 0) return false ;
1471 
1472     int sub_with_all_kk0 = 1 ;
1473     for(int i=1 ; i < num_sub ; i++) if(subfold[i]->has_all_keys(kk0)) sub_with_all_kk0 += 1 ;
1474     if(out) *out << "NPFold::can_concat sub_with_all_kk0 " << sub_with_all_kk0 << "\n" ;
1475     // HMM: when really slicing small for debug purposes it can happen that do not
1476     // get any hits from some slices
1477 
1478     bool can = sub_with_all_kk0 == num_sub ;
1479     if(out) *out << "NPFold::can_concat can " << ( can ? "YES" : "NO " )  << "\n" ;
1480     return can ;
1481 }
1482 
1483 /*
1484 NPFold::maxdepth
1485 -----------------
1486 
1487 Maximum depth of the tree of subfold, 0 for a single node tree.
1488 
1489 */
1490 
1491 
1492 inline int NPFold::maxdepth() const
1493 {
1494     return MaxDepth_r(this, 0);
1495 }
1496 
1497 inline int NPFold::MaxDepth_r(const NPFold* nd, int d) // static
1498 {
1499     assert( nd->subfold.size() == nd->ff.size() );
1500     int num_sub = nd->subfold.size();
1501     if(num_sub == 0) return d ;
1502 
1503     int mx = 0 ;
1504     for(int i=0 ; i < num_sub ; i++)
1505     {
1506         const NPFold* sub = nd->subfold[i] ;
1507         mx = std::max(mx, MaxDepth_r(sub, d+1));
1508     }
1509     return mx ;
1510 
1511 }
1512 
1513 
1514 
1515 
1516 
1517 /**
1518 NPFold::Traverse_r
1519 -------------------
1520 
1521 Traverse starting from a single NPFold and proceeding
1522 recursively to visit its subfold and their subfold and so on.
1523 Collects all folds and paths in vectors, where the paths
1524 are concatenated from the keys at each recursion level just
1525 like a file system.
1526 
1527 **/
1528 
1529 inline int NPFold::Traverse_r(
1530      const NPFold* nd,
1531      std::string path,
1532      std::vector<const NPFold*>& folds,
1533      std::vector<std::string>& paths,
1534      int d,
1535      int mxd ) // static
1536 {
1537 
1538     assert( nd->subfold.size() == nd->ff.size() );
1539     unsigned num_sub = nd->subfold.size();
1540 
1541     if(mxd == MXD_NOLIMIT || d <= mxd )
1542     {
1543         folds.push_back(nd);
1544         paths.push_back(path);
1545     }
1546 
1547     int tot_items = nd->num_items() ;
1548 
1549     for(unsigned i=0 ; i < num_sub ; i++)
1550     {
1551         const NPFold* sub = nd->subfold[i] ;
1552         std::string subpath = FormSubPath(path.c_str(), nd->ff[i].c_str(), '/' ) ;
1553 
1554         int num = Traverse_r( sub, subpath,  folds, paths, d+1, mxd );
1555         tot_items += num ;
1556     }
1557     return tot_items ;
1558 }
1559 
1560 
1561 
1562 
1563 
1564 
1565 inline std::string NPFold::FormSubPath(const char* base, const char* sub, char delim ) // static
1566 {
1567     assert(sub) ; // base can be nullptr : needed for root, but sub must always be defined
1568     std::stringstream ss ;
1569     if(base && strlen(base) > 0) ss << base << delim ;
1570     ss << sub ;
1571     std::string s = ss.str();
1572     return s ;
1573 }
1574 
1575 /**
1576 NPFold::desc_subfold
1577 ---------------------
1578 
1579 Provides summary information for the subfold of this fold
1580 acting as an index to the full details that follow for each
1581 subfold and so on recursively.
1582 
1583 **/
1584 
1585 inline std::string NPFold::desc_subfold(const char* top)  const
1586 {
1587     std::vector<const NPFold*> folds ;
1588     std::vector<std::string>   paths ;
1589     assert( folds.size() == paths.size() );
1590 
1591     int tot_items = Traverse_r( this, top,  folds, paths, 0, MXD_NOLIMIT );
1592 
1593     std::stringstream ss ;
1594     ss << " tot_items " << tot_items << std::endl ;
1595     ss << " folds " << folds.size() << std::endl ;
1596     ss << " paths " << paths.size() << std::endl ;
1597     for(int i=0 ; i < int(paths.size()) ; i++)
1598     {
1599         const NPFold* f = folds[i] ;
1600         const std::string& p = paths[i] ;
1601         ss << std::setw(3) << i
1602            << " [" << p << "] "
1603            << f->smry()
1604            << std::endl
1605            ;
1606     }
1607 
1608     if(nodata) ss << " NODATA " ;
1609 
1610     std::string s = ss.str();
1611     return s ;
1612 }
1613 
1614 /**
1615 NPFold::find_subfold_with_prefix
1616 --------------------------------
1617 
1618 **/
1619 
1620 
1621 inline void NPFold::find_subfold_with_prefix(
1622     std::vector<const NPFold*>& subs,
1623     std::vector<std::string>* subpaths,
1624     const char* prefix,
1625     int maxdepth ) const
1626 {
1627     std::vector<const NPFold*> folds ;
1628     std::vector<std::string>   paths ;
1629 
1630 
1631     int tot_items = Traverse_r( this, TOP,  folds, paths, 0, maxdepth );
1632 
1633     assert( folds.size() == paths.size() );
1634     int num_paths = paths.size();
1635 
1636     bool dump = false ;
1637 
1638     if(dump)
1639     {
1640         std::cerr
1641             << "NPFold::find_subfold_with_prefix"
1642             << " prefix " << ( prefix ? prefix : "-" )
1643             << " maxdepth " << maxdepth
1644             << " TOP " << TOP
1645             << " folds.size " << folds.size()
1646             << " paths.size " << paths.size()
1647             << " tot_items " << tot_items
1648             << " nodata " << nodata
1649             << std::endl
1650             ;
1651 
1652         for(int i=0 ; i < num_paths ; i++) std::cerr
1653             << "[" << paths[i] << "]"
1654             << std::endl
1655             ;
1656     }
1657 
1658     if(nodata == false && tot_items == 0) return ;
1659 
1660     for(int i=0 ; i < num_paths ; i++)
1661     {
1662         const NPFold* f = folds[i] ;
1663         const char* p = paths[i].c_str() ;
1664         if(U::StartsWith(p, prefix))
1665         {
1666             subs.push_back(f);
1667             if(subpaths) subpaths->push_back(p);
1668         }
1669     }
1670 }
1671 
1672 inline std::string NPFold::DescFoldAndPaths( const std::vector<const NPFold*>& subs, const std::vector<std::string>& subpaths )
1673 {
1674     assert( subs.size() == subpaths.size() );
1675     std::stringstream ss ;
1676     ss << "[NPFold::DescFoldAndPaths\n" ;
1677     for(int i=0 ; i < int(subs.size()) ; i++ )
1678     {
1679         const NPFold* sub = subs[i];
1680         const char* p = subpaths[i].c_str(); \
1681         ss
1682            << " sub " << ( sub ? "YES" : "NO " )
1683            << "  p  " << p
1684            << "\n"
1685            ;
1686     }
1687     ss << "]NPFold::DescFoldAndPaths\n" ;
1688     std::string str = ss.str() ;
1689     return str ;
1690 }
1691 
1692 
1693 
1694 
1695 inline bool NPFold::is_empty() const
1696 {
1697     return total_items() == 0 ;
1698 }
1699 
1700 /**
1701 NPFold::total_items
1702 ---------------------
1703 
1704 Assuming that a NoData fold with some metadata
1705 will have total_items greater than zero ?
1706 
1707 **/
1708 
1709 inline int NPFold::total_items() const
1710 {
1711     std::vector<const NPFold*> folds ;
1712     std::vector<std::string>   paths ;
1713 
1714     int tot_items = Traverse_r( this, TOP,  folds, paths, 0, MXD_NOLIMIT );
1715     return tot_items ;
1716 }
1717 
1718 
1719 // ] subfold handling
1720 
1721 
1722 inline void NPFold::add(int ikey, const NP* a, char prefix, int wid)
1723 {
1724     std::string skey = U::FormNameWithPrefix(prefix, ikey, wid);
1725     add(skey.c_str(), a );
1726 }
1727 
1728 
1729 /**
1730 NPFold::add
1731 ------------
1732 
1733 CAUTION : NO ARRAY COPYING IS DONE,
1734 this simply collects key and pointer into vectors,
1735 but clearing will delete those arrays.
1736 
1737 * regard everything added to NPFold to belong to the fold.
1738 * input pointers will become stale after clear, dereferencing
1739   them will SIGSEGV (if you are lucky)
1740 
1741 This approach is used as NPFold is intended to work
1742 with very large multi-gigabyte arrays : so users
1743 need to think carefully regarding memory management.
1744 
1745 When *k* ends with ".txt" the key is changed to ".npy"
1746 to simplify handling of empty NPX::Holder arrays.
1747 
1748 Previously only did that for "a->is_empty()" but
1749 as also need the change on find decided its simpler
1750 to always do that.
1751 
1752 **/
1753 
1754 inline void NPFold::add(const char* k, const NP* a)
1755 {
1756     if(a == nullptr) return ;
1757     bool change_txt_to_npy = true ;
1758     std::string key = FormKey(k, change_txt_to_npy );
1759     add_(key.c_str(), a );
1760 }
1761 
1762 /**
1763 NPFold::add_
1764 --------------
1765 
1766 This lower level method does not add DOT_NPY to keys
1767 
1768 NB the assertion that the key is not already present
1769 avoids potential memory leak if were to just replace
1770 the pointer loosing connection with that previous allocation
1771 
1772 
1773 **/
1774 inline void NPFold::add_(const char* k, const NP* a)
1775 {
1776     if(verbose_)
1777     {
1778         std::string p = get_treepath(k);
1779         std::cerr << "NPFold::add_ [" << p << "]" << a << " " << a->sstr() << "\n" ;
1780     }
1781 
1782     int k_idx = get_key_idx(k);
1783     bool have_key_already = k_idx != UNDEF ;
1784     if(have_key_already) std::cerr
1785         << "NPFold::add_ FATAL : have_key_already [" << k << "]  k_idx[" << k_idx << "]\n"  << desc_(0) ;
1786     assert( !have_key_already );
1787 
1788 
1789     int a_idx = get_arr_idx(a);
1790     bool have_arr_already = a_idx != UNDEF ;
1791     if(have_arr_already) std::cerr
1792         << "NPFold::add_ FATAL : have_arr_already k[" << k << "] a_idx[" << a_idx << "]\n" << desc_(0) ;
1793     assert( !have_arr_already );
1794 
1795     kk.push_back(k);
1796     aa.push_back(a);
1797 }
1798 
1799 
1800 /**
1801 NPFold::set
1802 -----------
1803 
1804 Is an array corresponding to key *k* is not present
1805 then the array is added.
1806 
1807 If an array correspondiong to key *k* is already present
1808 then that array is deleted with the argument array replacing it.
1809 
1810 Attempting to set the same array twice silently does nothing.
1811 
1812 **/
1813 
1814 inline void NPFold::set(const char* k, const NP* a)
1815 {
1816     int idx = find(k);
1817     if(idx == UNDEF)
1818     {
1819         add(k, a);
1820     }
1821     else
1822     {
1823         const NP* old_a = aa[idx] ;
1824         if( old_a == a ) return ;
1825         delete old_a ;
1826         aa[idx] = a ;
1827     }
1828 }
1829 
1830 
1831 
1832 
1833 
1834 /**
1835 NPFold::SplitKeys
1836 --------------------
1837 
1838 FormKey which adds .npy if not already present is applied to form the elem
1839 as that is done by NPFold::add
1840 
1841 **/
1842 
1843 inline void NPFold::SplitKeys( std::vector<std::string>& elem , const char* keylist, char delim) // static
1844 {
1845     bool change_txt_to_npy = false ;
1846 
1847     std::stringstream ss;
1848     ss.str(keylist)  ;
1849     std::string s;
1850     while (std::getline(ss, s, delim)) elem.push_back(FormKey(s.c_str(),change_txt_to_npy));
1851 }
1852 
1853 inline std::string NPFold::DescKeys( const std::vector<std::string>& elem, char delim )
1854 {
1855     int num_elem = elem.size();
1856     std::stringstream ss;
1857     for(int i=0 ; i < num_elem; i++)
1858     {
1859         ss << elem[i] ;
1860         if( i < num_elem - 1) ss << delim ;
1861     }
1862     std::string str = ss.str();
1863     return str ;
1864 }
1865 
1866 
1867 
1868 
1869 
1870 
1871 /**
1872 NPFold::clear (clearing this fold and all subfold recursively)
1873 ----------------------------------------------------------------
1874 
1875 **/
1876 inline void NPFold::clear()
1877 {
1878     if(verbose_)
1879     {
1880         std::string p = get_treepath();
1881         std::cerr << "NPFold::clear ALL [" << p << "]" << this << "\n" ;
1882     }
1883     clear_(nullptr);
1884 }
1885 
1886 
1887 /**
1888 NPFold::clear_
1889 ----------------
1890 
1891 NB: std::vector<NP*>::clear destructs the (NP*)
1892 pointers but not the objects (the NP arrays) they point to.
1893 
1894 
1895 This method is private as it must be used in conjunction with
1896 NPFold::clear_except in order to to keep (key, array) pairs
1897 of listed keys.
1898 
1899 1. check_integrity (non-recursive)
1900 2. each NP array with corresponding key not in the keep list is deleted
1901 3. clears the kk and aa vectors
1902 4. for each subfold call NPFold::clear on it and clear the subfold and ff vectors
1903 
1904 
1905 HUH: CLEARS ARRAY POINTER VECTOR BUT DOES NOT DELETE
1906 ARRAYS WITH KEYS IN THE KEEP LIST SO IT LOOSES
1907 ARRAY POINTERS OF KEPT ARRAYS
1908 
1909 THAT CAN ONLY WORK IF THOSE POINTERS WERE GRABBED PREVIOUSLY
1910 AS DEMONSTRATED BY clear_except
1911 
1912 **/
1913 
1914 inline void NPFold::clear_(const std::vector<std::string>* keep)
1915 {
1916     check_integrity();
1917     clear_arrays(keep);
1918     clear_subfold();
1919 }
1920 
1921 
1922 
1923 
1924 inline void NPFold::clear_arrays(const std::vector<std::string>* keep)
1925 {
1926     for(unsigned i=0 ; i < aa.size() ; i++)
1927     {
1928         const NP* a = aa[i];
1929         const std::string& k = kk[i] ;
1930         bool listed = keep && std::find( keep->begin(), keep->end(), k ) != keep->end() ;
1931         if(!listed && !skipdelete)
1932         {
1933             if(verbose_)
1934             {
1935                 std::string p = get_treepath(k.c_str());
1936                 std::cerr << "NPFold::clear_arrays.delete[" << p << "]" << a << " " << a->sstr() << "\n";
1937             }
1938             delete a ;
1939         }
1940     }
1941     aa.clear();
1942     kk.clear();
1943 }
1944 
1945 
1946 /**
1947 NPFold::clear_subfold
1948 ----------------------
1949 
1950 CAUTION: after doing clear_subfold nullify any pointers to objects that
1951 were added to the subfolds and that are deleted by the clear_subfold.
1952 This avoids SIGSEGV from dereferencing those stale pointers.
1953 
1954 **/
1955 
1956 inline void NPFold::clear_subfold()
1957 {
1958     if(verbose_)
1959     {
1960         std::string p = get_treepath();
1961         std::cerr << "NPFold::clear_subfold[" << p << "]" << this << "\n";
1962     }
1963 
1964     check_integrity();
1965     int num_sub = subfold.size() ;
1966     [[maybe_unused]] int num_ff = ff.size() ;
1967     assert( num_ff == num_sub ) ;
1968 
1969     for(int i=0 ; i < num_sub ; i++)
1970     {
1971         NPFold* sub = const_cast<NPFold*>(subfold[i]) ;
1972         sub->clear();
1973     }
1974     subfold.clear();
1975     ff.clear();       // folder keys
1976 }
1977 
1978 
1979 /**
1980 NPFold::clear_except
1981 -----------------------
1982 
1983 Clears the folder but preserves the (key, array) pairs
1984 listed in the keeplist of keys.
1985 
1986 copy:false
1987     uses the old arrays
1988 
1989 copy:true
1990     creates copies of the arrays that are kept
1991 
1992 
1993 It is not so easy to do partial erase from vector
1994 as the indices keep changing as elements are removed.
1995 So take a simpler approach:
1996 
1997 1. first copy keys and arrays identified by the *keeplist* into tmp_kk, tmp_aa
1998 2. do a normal clear of all elements, which deletes
1999 3. add copied tmp_aa tmp_kk back to the fold
2000 
2001 NB that this means old pointers will be invalidated.
2002 Unsure if that will be a problem.
2003 
2004 
2005 Q: HOW TO USE THIS WITHOUT LEAKING IN COPY:TRUE AND FALSE MODES ?
2006 
2007 HMM: need to delete whats cleared to avoid leak ? COMFIRM THIS
2008 
2009 
2010 **/
2011 
2012 
2013 inline void NPFold::clear_except_(const std::vector<std::string>& keep, bool copy )
2014 {
2015     check_integrity();
2016 
2017     std::vector<const NP*>   tmp_aa ;
2018     std::vector<std::string> tmp_kk ;
2019 
2020     for(unsigned i=0 ; i < aa.size() ; i++)
2021     {
2022         const NP* a = aa[i];
2023         const std::string& k = kk[i] ;
2024         bool listed = std::find( keep.begin(), keep.end(), k ) != keep.end() ;
2025         if(listed)
2026         {
2027             tmp_aa.push_back(copy ? NP::MakeCopy(a) : a );
2028             tmp_kk.push_back(k);
2029         }
2030     }
2031 
2032     if(copy == true)
2033     {
2034         clear_(nullptr);  // remove all (k,a) pairs
2035     }
2036     else
2037     {
2038         clear_(&keep);    // remove all apart from the keep list, clears all keys
2039     }
2040 
2041 
2042     assert( tmp_aa.size() == tmp_kk.size() );
2043     for(unsigned i=0 ; i < tmp_aa.size() ; i++)
2044     {
2045         const NP* a = tmp_aa[i];
2046         const std::string& k = tmp_kk[i] ;
2047         add_( k.c_str(), a );
2048     }
2049 }
2050 
2051 
2052 inline void NPFold::clear_except(const char* keeplist, bool copy, char delim )
2053 {
2054     if(verbose_) std::cerr
2055          << "NPFold::clear_except("
2056          << " keeplist:" << keeplist
2057          << " copy:" << copy
2058          << " delim:" << delim
2059          << ")"
2060          << std::endl
2061          ;
2062 
2063     std::vector<std::string> keep ;
2064     if(keeplist) SplitKeys(keep, keeplist, delim);
2065 
2066     clear_except_(keep, copy );
2067 }
2068 
2069 /**
2070 NPFold::clear_only
2071 -------------------
2072 
2073 This is an alternative interface to clear_except which
2074 forms a keeplist based on the keys present in the NPFold
2075 and the ones on the clear list.
2076 
2077 HMM: need to delete whats cleared to avoid leak ? COMFIRM THIS
2078 
2079 **/
2080 
2081 inline void NPFold::clear_only(const char* clearlist, bool copy, char delim )
2082 {
2083     std::vector<std::string> clr ;
2084     if(clearlist) SplitKeys(clr, clearlist, delim);
2085 
2086     std::vector<std::string> keep ;
2087     int num_k = kk.size();
2088     for(int i=0 ; i < num_k ; i++)
2089     {
2090         const char* k = kk[i].c_str();
2091         bool k_listed = std::find( clr.begin(), clr.end(), k ) != clr.end() ;
2092         if(!k_listed) keep.push_back(k) ;
2093     }
2094     clear_except_(keep, copy );
2095 }
2096 
2097 
2098 
2099 inline NPFold* NPFold::deepcopy( const char* keylist, char delim ) const
2100 {
2101     bool shallow_array_copy = false ;
2102     return copy(shallow_array_copy, keylist, delim);
2103 }
2104 inline NPFold* NPFold::shallowcopy( const char* keylist, char delim ) const
2105 {
2106     bool shallow_array_copy = true ;
2107     return copy(shallow_array_copy, keylist, delim);
2108 }
2109 
2110 
2111 /**
2112 NPFold::copy
2113 ---------------
2114 
2115 Formerly returned nullptr when none of this folds keys
2116 are specified in the keylist. However changed this
2117 as sometimes want just the fold metadata.
2118 A new NPFold is created and populated with any keylist
2119 selected arrays from this fold.
2120 
2121 shallow:true
2122     array pointers are copied as is
2123 
2124 shallow:false
2125     arrays are copies and new array pointers used
2126 
2127 NB the shallow refers to the arrays, not the NPFold that
2128 are lightweight whose pointers are never copies as is
2129 
2130 Also the keylist refer to array keys, not folder keys
2131 
2132 **/
2133 
2134 inline NPFold* NPFold::copy(bool shallow_array_copy, const char* keylist, char delim ) const
2135 {
2136     std::vector<std::string> keys ;
2137     if(keylist) SplitKeys(keys, keylist, delim);
2138     // SplitKeys adds .npy to keys if not already present
2139 
2140     int count = count_keys(&keys) ;
2141     if( keylist && count == 0 && VERBOSE) std::cerr
2142         << "NPFold::copy"
2143         << " VERBOSE " << ( VERBOSE ? "YES" : "NO " )
2144         << " NOTE COUNT_KEYS GIVING ZERO "
2145         << " keylist [" << ( keylist ? keylist : "-" ) << "]"
2146         << " keylist.keys [" << DescKeys(keys, delim) << "]"
2147         << " count " << count
2148         << " kk.size " << kk.size()
2149         << " DescKeys(kk) [" << DescKeys(kk,',')  << "]"
2150         << " meta " << ( meta.empty() ? "EMPTY" : meta )
2151         << std::endl
2152         ;
2153 
2154     return NPFold::Copy(this, shallow_array_copy, keylist ? &keys : nullptr );
2155 }
2156 
2157 
2158 inline NPFold* NPFold::Copy(const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys ) // static
2159 {
2160     src->check_integrity();
2161     NPFold* dst = new NPFold ;
2162     CopyMeta(dst, src);
2163     CopyArray(  dst, src, shallow_array_copy, keys );
2164     CopySubfold(dst, src, shallow_array_copy, keys );
2165     dst->check_integrity();
2166     return dst ;
2167 }
2168 
2169 /**
2170 NPFold::CopyMeta
2171 -----------------
2172 
2173 Some members are not copied, namely::
2174 
2175     allowempty
2176     allowonlymeta
2177     skipdelete
2178     verbose_
2179     parent
2180 
2181 **/
2182 
2183 inline void NPFold::CopyMeta( NPFold* dst , const NPFold* src ) // static
2184 {
2185     dst->headline = src->headline ;
2186     dst->meta = src->meta ;
2187     dst->names = src->names ;
2188     dst->savedir = src->savedir ? strdup(src->savedir) : nullptr ;
2189     dst->loaddir = src->loaddir ? strdup(src->loaddir) : nullptr ;
2190     dst->nodata  = src->nodata ;
2191     dst->verbose_ = src->verbose_ ;
2192 }
2193 
2194 /**
2195 NPFold::CopyArray
2196 ------------------
2197 
2198 keys:nullptr
2199    signals copy all arrays without selection
2200 
2201 keys:!nullptr
2202    only arrays with listed keys are copied
2203 
2204 **/
2205 
2206 inline void NPFold::CopyArray( NPFold* dst , const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys ) // static
2207 {
2208     for(int i=0 ; i < int(src->aa.size()) ; i++)
2209     {
2210         const NP* a = src->aa[i];
2211         const char* k = src->kk[i].c_str() ;
2212         bool listed = keys != nullptr && std::find( keys->begin(), keys->end(), k ) != keys->end() ;
2213         bool docopy = keys == nullptr || listed ;
2214         const NP* dst_a = docopy ? ( shallow_array_copy ? a : NP::MakeCopy(a) ) : nullptr  ;
2215         if(dst_a) dst->add_( k, dst_a );
2216     }
2217 
2218     if( keys == nullptr )
2219     {
2220         assert( src->aa.size() == dst->aa.size() ) ;
2221     }
2222 }
2223 
2224 inline void NPFold::CopySubfold( NPFold* dst , const NPFold* src, bool shallow_array_copy, std::vector<std::string>* keys ) // static
2225 {
2226     for(int i=0 ; i < int(src->ff.size()) ; i++)
2227     {
2228         const char* k = src->ff[i].c_str() ;
2229         NPFold* fo = Copy(src->subfold[i], shallow_array_copy, keys);
2230         dst->add_subfold( k, fo );
2231     }
2232 }
2233 
2234 
2235 
2236 
2237 
2238 
2239 /**
2240 NPFold::count_keys
2241 ------------------
2242 
2243 Returns a count of immediate keys in the fold that are listed in the keys vector.
2244 
2245 **/
2246 
2247 inline int NPFold::count_keys( const std::vector<std::string>* keys ) const
2248 {
2249     check_integrity();
2250     int count = 0 ;
2251     for(unsigned i=0 ; i < kk.size() ; i++)
2252     {
2253         const char* k = kk[i].c_str() ;
2254         bool listed = keys && std::find( keys->begin(), keys->end(), k ) != keys->end() ;
2255         if(listed) count += 1 ;
2256     }
2257     return count ;
2258 }
2259 
2260 
2261 // single level (non recursive) accessors
2262 
2263 inline int NPFold::num_items() const
2264 {
2265     check_integrity();
2266     return kk.size();
2267 }
2268 
2269 
2270 inline const char* NPFold::get_key(unsigned idx) const
2271 {
2272     return idx < kk.size() ? kk[idx].c_str() : nullptr ;
2273 }
2274 
2275 inline const NP* NPFold::get_array(unsigned idx) const
2276 {
2277     return idx < aa.size() ? aa[idx] : nullptr ;
2278 }
2279 
2280 /**
2281 NPFold::find (non recursive)
2282 -----------------------------
2283 
2284 If the query key *k* does not end with the DOT_NPY ".npy" then that is added before searching.
2285 
2286 std::find returns iterator to the first match
2287 
2288 **/
2289 inline int NPFold::find(const char* k) const
2290 {
2291     bool change_txt_to_npy = true ;
2292     std::string key = FormKey(k, change_txt_to_npy);
2293     size_t idx = std::distance( kk.begin(), std::find( kk.begin(), kk.end(), key.c_str() )) ;
2294     return idx < kk.size() ? idx : UNDEF ;
2295 }
2296 
2297 inline bool NPFold::has_key(const char* k) const
2298 {
2299     int idx = find(k);
2300     return idx != UNDEF  ;
2301 }
2302 inline bool NPFold::has_all_keys(const char* qq_, char delim) const
2303 {
2304     std::vector<std::string> qq ;
2305     U::Split(qq_, delim, qq ) ;
2306     return has_all_keys(qq);
2307 }
2308 
2309 inline bool NPFold::has_all_keys(const std::vector<std::string>& qq) const
2310 {
2311     int num_q = qq.size() ;
2312     int q_count = count_keys( qq );
2313     bool has_all = num_q > 0 && q_count == num_q ;
2314     return has_all ;
2315 }
2316 
2317 inline int NPFold::count_keys(const std::vector<std::string>& qq) const
2318 {
2319     int num_q = qq.size() ;
2320     int q_count = 0 ;
2321     for(int i=0 ; i < num_q ; i++)
2322     {
2323        const char* q = qq[i].c_str() ;
2324        if(has_key(q)) q_count += 1 ;
2325     }
2326     return q_count ;
2327 }
2328 
2329 
2330 inline const NP* NPFold::get(const char* k) const
2331 {
2332     int idx = find(k) ;
2333     return idx == UNDEF ? nullptr : aa[idx] ;
2334 }
2335 
2336 inline NP* NPFold::get_(const char* k)
2337 {
2338     const NP* a = get(k) ;
2339     return const_cast<NP*>(a) ;
2340 }
2341 
2342 
2343 /**
2344 NPFold::get_optional
2345 ---------------------
2346 
2347 For now just the same as NPFold::get but in future
2348 could assert that NPFold::get finds something whereas get_optional
2349 is allowed to return nullptr.
2350 
2351 **/
2352 inline const NP* NPFold::get_optional(const char* k) const
2353 {
2354     return get(k);
2355 }
2356 
2357 
2358 
2359 
2360 
2361 /**
2362 NPFold::get_num
2363 -----------------
2364 
2365 Number of items in the array with key *k* or -1 if not such key.
2366 **/
2367 
2368 inline size_t NPFold::get_num(const char* k) const
2369 {
2370     const NP* a = get(k) ;
2371     return a == nullptr ? 0 : a->shape[0] ;  // UNDEF->0 when changed to size_t
2372 }
2373 
2374 
2375 inline void NPFold::get_counts( std::vector<std::string>* keys, std::vector<size_t>* counts ) const
2376 {
2377     int nkk = kk.size();
2378     for(int i=0 ; i < nkk ; i++)
2379     {
2380         const char* k = kk[i].c_str();
2381         const NP* a = get(k) ;
2382         if(a == nullptr) continue ;
2383         if(keys) keys->push_back(k);
2384         if(counts) counts->push_back(a->shape[0]);
2385     }
2386 }
2387 inline std::string NPFold::DescCounts(const std::vector<std::string>& keys, const std::vector<size_t>& counts )
2388 {
2389     assert( keys.size() == counts.size() );
2390     int num_key = keys.size();
2391     std::stringstream ss ;
2392     ss << "NPFold::DescCounts num_key " << num_key << std::endl ;
2393     for(int i=0 ; i < num_key ; i++ ) ss << std::setw(20) << keys[i] << " : " << counts[i] << std::endl ;
2394     std::string str = ss.str() ;
2395     return str ;
2396 }
2397 
2398 
2399 
2400 
2401 
2402 
2403 
2404 template<typename T> inline T NPFold::get_meta(const char* key, T fallback) const
2405 {
2406     if(meta.empty()) return fallback ;
2407     return NP::GetMeta<T>( meta.c_str(), key, fallback );
2408 }
2409 
2410 template int         NPFold::get_meta<int>(const char*, int ) const ;
2411 template unsigned    NPFold::get_meta<unsigned>(const char*, unsigned ) const  ;
2412 template float       NPFold::get_meta<float>(const char*, float ) const ;
2413 template double      NPFold::get_meta<double>(const char*, double ) const ;
2414 template std::string NPFold::get_meta<std::string>(const char*, std::string ) const ;
2415 
2416 
2417 /**
2418 NPFold::get_meta_string
2419 -------------------------
2420 
2421 If the key is not found returns an empty string
2422 
2423 **/
2424 inline std::string NPFold::get_meta_string(const char* key) const
2425 {
2426     bool meta_empty = meta.empty();
2427     if(meta_empty)
2428     {
2429         std::string tp = get_treepath();
2430         std::cerr
2431             << "NPFold::get_meta_string"
2432             << " meta_empty " << ( meta_empty ? "YES" : "NO " )
2433             << " key " << ( key ? key : "-" )
2434             << " treepath " << tp
2435             << "\n"
2436             ;
2437     }
2438     return meta_empty ? "" : NP::get_meta_string(meta, key);
2439 }
2440 
2441 
2442 
2443 
2444 
2445 template<typename T> inline void NPFold::set_meta(const char* key, T value)
2446 {
2447     NP::SetMeta(meta, key, value);
2448 }
2449 
2450 template void     NPFold::set_meta<int>(const char*, int );
2451 template void     NPFold::set_meta<unsigned>(const char*, unsigned );
2452 template void     NPFold::set_meta<float>(const char*, float );
2453 template void     NPFold::set_meta<double>(const char*, double );
2454 template void     NPFold::set_meta<std::string>(const char*, std::string );
2455 
2456 
2457 
2458 
2459 
2460 
2461 inline int NPFold::save(const char* base_, const char* rel) // not const as sets savedir
2462 {
2463     std::string _base = U::form_path(base_, rel);
2464     const char* base = _base.c_str();
2465     return save(base);
2466 }
2467 
2468 inline int NPFold::save(const char* base_, const char* rel, const char* name) // not const as sets savedir
2469 {
2470     std::string _base = U::form_path(base_, rel, name);
2471     const char* base = _base.c_str();
2472     return save(base);
2473 }
2474 
2475 
2476 
2477 
2478 
2479 
2480 /**
2481 NPFold::save
2482 --------------
2483 
2484 ISSUE : repeated use of save for a fold with no .npy ie with only subfolds
2485 never truncates the index, so it just keeps growing at every save.
2486 
2487 FIXED THIS BY NOT EARLY EXITING NP::WriteNames when kk.size is zero
2488 SO THE INDEX ALWAYS GETS TRUNCATED
2489 
2490 **/
2491 
2492 inline int NPFold::save(const char* base_)  // not const as calls _save
2493 {
2494     const char* base = U::Resolve(base_);
2495 
2496     if(base == nullptr) std::cerr
2497         << "NPFold::save(\"" << ( base_ ? base_ : "-" ) << "\")"
2498         << " did not resolve all tokens in argument "
2499         << std::endl
2500         ;
2501     if(base == nullptr) return 1 ;
2502 
2503     return _save(base) ;
2504 }
2505 
2506 inline int NPFold::save_verbose(const char* base_)  // not const as calls _save
2507 {
2508     const char* base = U::Resolve(base_);
2509     std::cerr
2510         << "NPFold::save(\"" << ( base_ ? base_ : "-" ) << "\")"
2511         << std::endl
2512         << " resolved to  [" << ( base ? base : "ERR-FAILED-TO-RESOLVE-TOKENS" ) << "]"
2513         << std::endl
2514         ;
2515     if(base == nullptr) return 1 ;
2516     return _save(base) ;
2517 }
2518 
2519 
2520 /**
2521 NPFold::_save_local_item_count
2522 --------------------------------
2523 
2524 This is used by SEvt::_save to avoid writing
2525 empty dirs
2526 
2527 TODO: using NP directory making could allow to
2528 encapsulate this within here
2529 
2530 **/
2531 
2532 inline int NPFold::_save_local_item_count() const
2533 {
2534     return kk.size() + ff.size() ;
2535 }
2536 
2537 inline int NPFold::_save_local_meta_count() const
2538 {
2539     bool with_meta = !meta.empty() ;
2540     bool with_names = names.size() > 0 ;
2541     return int(with_meta) + int(with_names) ;
2542 }
2543 
2544 
2545 
2546 /**
2547 NPFold::_save
2548 ---------------
2549 
2550 allowempty
2551     default from ALLOWEMPTY is false
2552     when true proceeds with saving even when no arrays
2553     [HMM: can probably remove this after adding allowonlymeta?]
2554 
2555 allowonlymeta
2556     default from ALLOWONLYMETA is false
2557     when true proceeds with saving when a folder
2558     contains only metadata (ie no arrays or subfold)
2559 
2560 
2561 
2562 onlymeta_proceed
2563     no arrays or subfold but has metadata and allowonlymeta:true
2564 
2565 **/
2566 
2567 inline int NPFold::_save(const char* base)  // not const as sets savedir
2568 {
2569     assert( !nodata );
2570 
2571     int slic = _save_local_item_count();
2572     int slmc = _save_local_meta_count();
2573 
2574     bool slic_proceed = slic > 0 || ( slic == 0 && allowempty == true ) ;
2575     bool onlymeta_proceed = slic == 0 && slmc > 0 && allowonlymeta == true ;
2576     bool proceed = slic_proceed || onlymeta_proceed ;
2577 
2578     if(!proceed) return 1 ;
2579 
2580 
2581     savedir = strdup(base);
2582 
2583     _save_arrays(base);
2584 
2585     NP::WriteNames(base, INDEX, kk );
2586 
2587     NP::WriteNames(base, INDEX, ff, 0, true  ); // append:true : write subfold keys (without .npy ext) to INDEX
2588 
2589     _save_subfold_r(base);
2590 
2591     bool with_meta = !meta.empty() ;
2592 
2593     if(with_meta) U::WriteString(base, META, meta.c_str() );
2594 
2595     NP::WriteNames_Simple(base, NAMES, names) ;
2596 
2597     return 0 ;
2598 }
2599 
2600 
2601 
2602 
2603 inline int NPFold::_save_arrays(const char* base) // using the keys with .npy ext as filenames
2604 {
2605     int count = 0 ;
2606     for(unsigned i=0 ; i < kk.size() ; i++)
2607     {
2608         const char* k = kk[i].c_str() ;
2609         const NP* a = aa[i] ;
2610         if( a == nullptr )
2611         {
2612             if(VERBOSE) std::cerr
2613                 << "NPFold::_save_arrays"
2614                 << " base " << base
2615                 << " k " << k
2616                 << " ERROR MISSING ARRAY FOR KEY "
2617                 << std::endl
2618                 ;
2619         }
2620         else
2621         {
2622             a->save(base, k );
2623             count += 1 ;
2624         }
2625     }
2626     // this motivated adding directory creation to NP::save
2627     return count ;
2628 }
2629 
2630 inline void NPFold::_save_subfold_r(const char* base)  // NB recursively called via NPFold::save
2631 {
2632     assert( subfold.size() == ff.size() );
2633     for(unsigned i=0 ; i < ff.size() ; i++)
2634     {
2635         const char* f = ff[i].c_str() ;
2636         NPFold* sf = subfold[i] ;
2637         sf->save(base, f );
2638     }
2639 }
2640 
2641 
2642 
2643 
2644 /**
2645 NPFold::load_array
2646 --------------------
2647 
2648 0. NP::Load for relp ending .npy otherwise NP::LoadFromTxtFile<double>
2649 1. add the array using relp as the key
2650 
2651 **/
2652 inline void NPFold::load_array(const char* _base, const char* relp)
2653 {
2654     bool is_nodata = NP::IsNoData(_base);
2655     bool is_npy = IsNPY(relp) ;
2656     bool is_txt = IsTXT(relp) ;
2657 
2658 
2659     NP* a = nullptr ;
2660 
2661     if(is_npy)
2662     {
2663         a = NP::Load(_base, relp) ;
2664     }
2665     else if(is_nodata)   // nodata mode only do nodata load of arrays
2666     {
2667         a = nullptr ;
2668     }
2669     else if(is_txt)
2670     {
2671         a = NP::LoadFromTxtFile<double>(_base, relp) ;
2672     }
2673     else
2674     {
2675         a = nullptr ;
2676     }
2677     if(a) add(relp,a ) ;
2678 }
2679 
2680 /**
2681 NPFold::load_subfold
2682 ---------------------
2683 
2684 **/
2685 
2686 inline void NPFold::load_subfold(const char* _base, const char* relp)
2687 {
2688     assert(!IsNPY(relp));
2689     add_subfold(relp,  NPFold::Load(_base, relp) ) ;
2690 }
2691 
2692 
2693 
2694 #ifdef WITH_FTS
2695 inline int NPFold::FTS_Compare(const FTSENT** one, const FTSENT** two)
2696 {
2697     return (strcmp((*one)->fts_name, (*two)->fts_name));
2698 }
2699 
2700 
2701 /**
2702 NPFold::no_longer_used_load_fts
2703 ----------------------------------
2704 
2705 This was formerly called by NPFold::load when the
2706 base directory does not include an INDEX file.
2707 
2708 It has been replaced by NPFold::load_dir
2709 because of the structural difference between
2710 loading with index and loading with fts.
2711 
2712 fts traverses the directory tree and invokes NPFold::load_array when
2713 meeting regular files or symbolic links.
2714 See "man fts" and tests/fts_test.sh for background.
2715 
2716 This fts approach resulted in a single NPFold
2717 with keys containing slash.
2718 
2719 Switching to NP::load_dir unifies the structure
2720 with an NPFold for each directory.
2721 
2722 **/
2723 
2724 inline int NPFold::no_longer_used_load_fts(const char* base_)
2725 {
2726     char* base = const_cast<char*>(base_);
2727     char* basepath[2] {base, nullptr};
2728 
2729     // NB fs is file system hierarchy, not just one directory
2730     FTS* fs = fts_open(basepath,FTS_COMFOLLOW|FTS_NOCHDIR,&FTS_Compare);
2731     if(fs == nullptr) return 1 ;
2732 
2733     FTSENT* node = nullptr ;
2734     while((node = fts_read(fs)) != nullptr)
2735     {
2736         switch (node->fts_info)
2737         {
2738             case FTS_D :    // directory being visited in preorder
2739                  break;
2740             case FTS_F  :   // regular file
2741             case FTS_SL :   // symbolic link
2742                 {
2743                     char* relp = node->fts_path+strlen(base)+1 ;
2744                     load_array(base, relp) ;
2745                 }
2746                 break;
2747             default:
2748                 break;
2749         }
2750     }
2751     fts_close(fs);
2752     return 0 ;
2753 }
2754 #endif
2755 
2756 /**
2757 NPFold::load_dir
2758 -----------------
2759 
2760 Loads directory-by-directory into separate NPFold
2761 unlike load_fts that loads entire tree into a single NPFold.
2762 
2763 Excluding files with names ending run_meta.txt avoids
2764 loading metadata sidecar files as those are loaded whilst loading
2765 the associated array eg run.npy
2766 
2767 **/
2768 
2769 inline int NPFold::load_dir(const char* _base)
2770 {
2771     const char* base = nodata ? _base + 1 : _base ;
2772 
2773     int _DUMP = U::GetEnvInt(load_dir_DUMP , 0);
2774 
2775     if(_DUMP > 0) std::cout << "[" << load_dir_DUMP << " : [" << ( base ? base : "-" )  << "]\n" ;
2776 
2777     std::vector<std::string> names ;
2778     const char* ext = nullptr ;
2779     bool exclude = false ;
2780     bool allow_nonexisting = false ;
2781 
2782     U::DirList(names, base, ext, exclude, allow_nonexisting) ;
2783     if(names.size() == 0) return 1 ;
2784 
2785     for(unsigned i=0 ; i < names.size() ; i++)
2786     {
2787         const char* name = names[i].c_str();
2788         int type = U::PathType(base, name) ;
2789 
2790         if( type == U::FILE_PATH && U::EndsWith(name, "_meta.txt"))
2791         {
2792             if(_DUMP > 0) std::cerr << "-NPFold::load_dir SKIP metadata sidecar " << name << std::endl ;
2793         }
2794         else if( type == U::FILE_PATH )
2795         {
2796             load_array(_base, name) ;
2797         }
2798         else if( type == U::DIR_PATH && U::StartsWith(name, "_"))
2799         {
2800             if(_DUMP > 0) std::cerr << "-NPFold::load_dir SKIP directory starting with _" << name << std::endl ;
2801         }
2802         else if( type == U::DIR_PATH )
2803         {
2804             load_subfold(_base, name);  // instanciates NPFold and add_subfold
2805         }
2806     }
2807 
2808     if(_DUMP > 0) std::cout << "]" << load_dir_DUMP << " : [" << ( base ? base : "-" )  << "]\n" ;
2809 
2810     return 0 ;
2811 }
2812 
2813 
2814 inline int NPFold::load_index(const char* _base)
2815 {
2816     const char* base = nodata ? _base + 1 : _base ;
2817     int _DUMP = U::GetEnvInt(load_index_DUMP,0);
2818     if(_DUMP>0) std::cout << "[" << load_index_DUMP << " : [" << ( base ? base : "-" )  << "]\n" ;
2819 
2820 
2821     std::vector<std::string> keys ;
2822     NP::ReadNames(base, INDEX, keys );
2823     for(unsigned i=0 ; i < keys.size() ; i++)
2824     {
2825         const char* key = keys[i].c_str() ;
2826         if(IsNPY(key))
2827         {
2828             load_array(_base, key );   // invokes *add* appending to kk and aa
2829         }
2830         else
2831         {
2832             load_subfold(_base, key);  // instanciates NPFold and add_subfold
2833         }
2834     }
2835     if(_DUMP>0) std::cout << "]" << load_index_DUMP << " : [" << ( base ? base : "-" )  << "]\n" ;
2836     return 0 ;
2837 }
2838 
2839 /**
2840 NPFold::load
2841 ---------------
2842 
2843 Typical persisted NPFold have index files
2844 so the load_dir is not ordinarily used.
2845 
2846 **/
2847 
2848 inline int NPFold::load(const char* _base)
2849 {
2850     nodata = NP::IsNoData(_base) ;  // _path starting with NP::NODATA_PREFIX eg '@'
2851     const char* base = nodata ? _base + 1 : _base ;
2852 
2853     int _DUMP = U::GetEnvInt(load_DUMP, 0);
2854     if(_DUMP>0) std::cout << "[" << load_DUMP << " " << U::FormatLog() << " : [" << ( base ? base : "-" )  << "]\n" ;
2855 
2856 
2857     bool exists = NP::Exists(base);
2858     if(!exists && _DUMP>0) std::cout << "NPFold::load non-existing base[" << ( base ? base : "-" ) << "]" << std::endl ;
2859     if(!exists) return 1 ;
2860 
2861     loaddir = strdup(base);
2862     bool has_meta = NP::Exists(base, META) ;
2863     if(has_meta) meta = U::ReadString( base, META );
2864 
2865     bool has_names = NP::Exists(base, NAMES) ;
2866     if(has_names) NP::ReadNames( base, NAMES, names );
2867 
2868     bool has_index = NP::Exists(base, INDEX) ;
2869     int rc = has_index ? load_index(_base) : load_dir(_base) ;
2870 
2871     if(_DUMP>0) std::cout << "]" << load_DUMP << " " << U::FormatLog() << " : [" << ( base ? base : "-" ) << " rc " << rc << "]\n" ;
2872     return rc ;
2873 }
2874 inline int NPFold::load(const char* base_, const char* rel0, const char* rel1)
2875 {
2876     std::string base = U::form_path(base_, rel0, rel1);
2877     return load(base.c_str());
2878 }
2879 
2880 inline std::string NPFold::descKeys() const
2881 {
2882     int num_key = kk.size() ;
2883     std::stringstream ss ;
2884     ss << "NPFold::descKeys"
2885        << " kk.size " << num_key
2886        ;
2887     for(int i=0 ; i < num_key ; i++) ss << " [" << kk[i] << "] " ;
2888 
2889     std::string str = ss.str();
2890     return str ;
2891 }
2892 
2893 
2894 inline std::string NPFold::desc() const
2895 {
2896     std::stringstream ss ;
2897     ss << "[NPFold::desc" << std::endl ;
2898     if(!headline.empty()) ss << headline << std::endl ;
2899     ss << "NPFold::desc_subfold"  << std::endl ;
2900     ss << desc_subfold() ;
2901     ss << "NPFold::desc(0) "  << std::endl ;
2902     ss << desc(0) << std::endl ;
2903     ss << "]NPFold::desc" << std::endl ;
2904     std::string str = ss.str();
2905     return str ;
2906 }
2907 
2908 inline std::string NPFold::descMetaKVS() const
2909 {
2910     return NP::DescMetaKVS(meta);
2911 }
2912 
2913 /**
2914 NPFold::getMetaKVS
2915 ---------------------
2916 
2917 Parses the NPFold::meta string into keys, vals and stamps
2918 where stamp are defaulted to zero if the val do not look
2919 like microssecond timestamps.
2920 
2921 **/
2922 
2923 
2924 inline void NPFold::getMetaKVS(
2925     std::vector<std::string>* keys,
2926     std::vector<std::string>* vals,
2927     std::vector<int64_t>* stamps,
2928     bool only_with_stamp ) const
2929 {
2930     U::GetMetaKVS(meta, keys, vals, stamps, only_with_stamp );
2931 }
2932 
2933 
2934 /**
2935 NPFold::getMetaNumStamp
2936 --------------------------
2937 
2938 **/
2939 
2940 inline int NPFold::getMetaNumStamp() const
2941 {
2942     std::vector<std::string> keys ;
2943     std::vector<int64_t>   stamps ;
2944     bool only_with_stamp = true ;
2945     getMetaKVS(&keys, nullptr, &stamps, only_with_stamp );
2946     assert( keys.size() == stamps.size() );
2947     int count = 0 ;
2948     for(int i=0 ; i < int(keys.size()) ; i++) count += stamps[i] == 0 ? 0 : 1 ;
2949     return count ;
2950 }
2951 
2952 
2953 
2954 
2955 inline std::string NPFold::descMetaKV() const
2956 {
2957     return NP::DescMetaKV(meta);
2958 }
2959 
2960 inline void NPFold::getMetaKV(
2961     std::vector<std::string>* keys,
2962     std::vector<std::string>* vals,
2963     bool only_with_profile ) const
2964 {
2965     NP::GetMetaKV(meta, keys, vals, only_with_profile );
2966 }
2967 
2968 /**
2969 NPFold::getMetaNumProfile
2970 --------------------------
2971 
2972 Returns the number of meta (key,value) entries where the values contain timestamps.
2973 
2974 **/
2975 
2976 inline int NPFold::getMetaNumProfile() const
2977 {
2978     std::vector<std::string> keys ;
2979     std::vector<std::string> vals ;
2980     bool only_with_profile = true ;
2981     getMetaKV(&keys, &vals, only_with_profile );
2982     assert( keys.size() == vals.size() );
2983     int count = keys.size() ;
2984     return count ;
2985 }
2986 
2987 inline void NPFold::setMetaKV(const std::vector<std::string>& keys, const std::vector<std::string>& vals)
2988 {
2989     NP::SetMetaKV_( meta, keys, vals );
2990 }
2991 
2992 inline std::string NPFold::desc_(int depth) const
2993 {
2994     std::stringstream ss ;
2995     ss << "[NPFold::desc_(" << depth << ")\n" ;
2996     ss << brief() << std::endl ;
2997     ss << descMetaKVS() << std::endl ;
2998     for(unsigned i=0 ; i < kk.size() ; i++)
2999     {
3000         const char* k = kk[i].c_str() ;
3001         const NP* a = aa[i] ;
3002         ss
3003            << std::setw(4) << i << " : "
3004            << ( a && a->nodata ? "ND " : "   " )
3005            << Indent(depth*10)
3006            << std::setw(20) << k
3007            << " : " << ( a ? a->sstr() : "-" )
3008            << std::endl
3009            ;
3010     }
3011     ss << "]NPFold::desc_(" << depth << ")\n" ;
3012     std::string str = ss.str();
3013     return str ;
3014 }
3015 inline std::string NPFold::descf_(int depth) const
3016 {
3017     std::stringstream ss ;
3018     ss << "[NPFold::descf_( " << depth << ")\n" ;
3019     for(unsigned i=0 ; i < ff.size() ; i++)
3020     {
3021         const char* f = ff[i].c_str()  ;
3022         ss << std::endl << f << std::endl ;
3023 
3024         NPFold* sf = subfold[i] ;
3025         ss << sf->desc(depth+1) << std::endl ;
3026     }
3027     ss << "]NPFold::descf_( " << depth << ")\n" ;
3028     std::string str = ss.str();
3029     return str ;
3030 }
3031 
3032 
3033 inline std::string NPFold::desc(int depth) const
3034 {
3035     std::stringstream ss ;
3036     ss << desc_(depth);
3037     ss << descf_(depth);
3038     std::string str = ss.str();
3039     return str ;
3040 }
3041 
3042 inline std::string NPFold::Indent(int width)  // static
3043 {
3044     std::string s(width, ' ');
3045     return s ;
3046 }
3047 
3048 inline std::string NPFold::brief() const
3049 {
3050     std::stringstream ss ;
3051     if(loaddir) ss << " loaddir:" << loaddir ;
3052     if(savedir) ss << " savedir:" << savedir ;
3053     ss << stats() ;
3054     std::string str = ss.str();
3055     return str ;
3056 }
3057 
3058 inline std::string NPFold::stats() const
3059 {
3060     std::stringstream ss ;
3061     ss <<  " subfold " << subfold.size() ;
3062     ss << " ff " << ff.size() ;
3063     ss << " kk " << kk.size() ;
3064     ss << " aa " << aa.size() ;
3065     std::string str = ss.str();
3066     return str ;
3067 }
3068 
3069 inline std::string NPFold::smry() const
3070 {
3071     int num_stamp = getMetaNumStamp() ;
3072     std::stringstream ss ;
3073     ss << " stamp:" << num_stamp ;
3074     std::string str = ss.str();
3075     return str ;
3076 }
3077 
3078 
3079 // STATIC CONVERTERS
3080 
3081 
3082 inline void NPFold::Import_MIMSD( std::map<int,std::map<std::string, double>>& mimsd, const NPFold* f ) // static
3083 {
3084     typedef std::map<std::string, double> MSD ;
3085 
3086     int num_items = f->num_items();
3087     for(int idx=0 ; idx < num_items ; idx++)
3088     {
3089         const char* cat = f->get_key(idx);
3090         int icat = U::To<int>(cat);
3091         const NP* a = f->get_array(idx) ;
3092 
3093         MSD& msd = mimsd[icat] ;
3094         NPX::Import_MSD(msd, a );
3095     }
3096 }
3097 
3098 
3099 inline NPFold* NPFold::Serialize_MIMSD(const std::map<int,std::map<std::string, double>>& mimsd ) // static
3100 {
3101     NPFold* f = new NPFold ;
3102 
3103     typedef std::map<std::string, double> MSD ;
3104     typedef std::map<int, MSD> MIMSD ;
3105 
3106     MIMSD::const_iterator it = mimsd.begin();
3107 
3108     for(unsigned i=0 ; i < mimsd.size() ; i++)
3109     {
3110         int icat = it->first ;
3111         const char* cat = U::FormName(icat) ;
3112         const MSD& msd = it->second ;
3113         NP* a = NPX::Serialize_MSD( msd );
3114         f->add(cat, a );
3115 
3116         std::advance(it, 1);
3117     }
3118     return f;
3119 }
3120 
3121 inline std::string NPFold::Desc_MIMSD(const std::map<int, std::map<std::string, double>>& mimsd) // static
3122 {
3123     std::stringstream ss ;
3124     ss << "NPFold::Desc_MIMSD" << std::endl ;
3125 
3126     typedef std::map<std::string, double> MSD ;
3127     typedef std::map<int, MSD> MIMSD ;
3128 
3129     MIMSD::const_iterator it = mimsd.begin();
3130 
3131     for(unsigned i=0 ; i < mimsd.size() ; i++)
3132     {
3133         int cat = it->first ;
3134         const MSD& msd = it->second ;
3135         ss
3136             << " cat " << cat
3137             << " msd.size " << msd.size()
3138             << std::endl
3139             << NPX::Desc_MSD(msd)
3140             << std::endl
3141             ;
3142 
3143         std::advance(it, 1);
3144     }
3145     std::string s = ss.str();
3146     return s ;
3147 }
3148 
3149 
3150 /**
3151 NPFold::subcount
3152 ------------------
3153 
3154 Collects arrays item counts from multiple subfold
3155 into single array for easy analysis/plotting etc.
3156 Typical use is for comparing genstep, hit, photon etc
3157 counts between multiple events during test scans.
3158 
3159 
3160 1. find subfold of this fold with the prefix argument, eg prefix "//A" finds A000 A001 A002 ...
3161 2. get unique list of array keys (eg "hit", "photon", "genstep") from all subfold
3162 3. create 2d array of shape (num_sub, num_ukey) with array counts for each sub
3163 4. return the array of array counts in each subfold
3164 
3165 
3166 **/
3167 
3168 inline NP* NPFold::subcount( const char* prefix ) const
3169 {
3170     // 1. find subfold with prefix
3171     std::vector<const NPFold*> subs ;
3172     std::vector<std::string> subpaths ;
3173     int maxdepth = 1 ;  // only one level
3174 
3175     find_subfold_with_prefix(subs, &subpaths,  prefix, maxdepth );
3176     assert( subs.size() == subpaths.size() );
3177     int num_sub = int(subs.size()) ;
3178 
3179     // 2. get unique list of array keys from all subfold
3180     std::set<std::string> s_keys ;
3181     for(int i=0 ; i < num_sub ; i++)
3182     {
3183         std::vector<std::string> keys ;
3184         subs[i]->get_counts(&keys, nullptr);
3185         std::transform(
3186             keys.begin(), keys.end(),
3187             std::inserter(s_keys, s_keys.end()),
3188             [](const std::string& obj) { return obj ; }
3189            );
3190     }
3191     std::vector<std::string> ukey(s_keys.begin(), s_keys.end()) ;
3192     int num_ukey = ukey.size() ;
3193 
3194     int ni = num_sub ;
3195     int nj = num_ukey ;
3196 
3197     // 3. create 2d array of shape (num_sub, num_ukey) with array counts for each sub
3198     NP* a = NP::Make<int>( ni, nj  );   // TODO: int64
3199     a->labels = new std::vector<std::string> ;
3200     a->names = subpaths ;
3201 
3202     int* aa = a->values<int>() ;
3203 
3204     for(int i=0 ; i < num_ukey ; i++)
3205     {
3206         const char* uk  = ukey[i].c_str() ;
3207         const char* _uk = IsNPY(uk) ? BareKey(uk) : uk ;
3208         a->labels->push_back(_uk);
3209     }
3210 
3211     int _DUMP = U::GetEnvInt(subcount_DUMP,0);
3212 
3213     if(_DUMP>0) std::cout << "[" << subcount_DUMP << "\n" ;
3214     if(_DUMP>0) std::cout <<  " num_ukey " << num_ukey << std::endl ;
3215     if(_DUMP>0) for(int i=0 ; i < num_ukey ; i++ ) std::cout << a->names[i] << std::endl ;
3216 
3217     for(int i=0 ; i < ni ; i++)
3218     {
3219         std::vector<std::string> keys ;
3220         std::vector<size_t> counts ;
3221         subs[i]->get_counts(&keys, &counts);
3222         assert( keys.size() == counts.size() );
3223         int num_key = keys.size();
3224 
3225         for(int j=0 ; j < nj ; j++)
3226         {
3227             const char* uk = ukey[j].c_str();
3228             int idx = std::distance( keys.begin(), std::find(keys.begin(), keys.end(), uk ) ) ;
3229             size_t count = idx < num_key ? counts[idx] : -1  ;
3230             aa[i*nj+j] = count ;
3231 
3232             if(_DUMP>0) std::cout
3233                 << std::setw(20) << uk
3234                 << " idx " << idx
3235                 << " count " << count
3236                 << std::endl
3237                 ;
3238         }
3239     }
3240     if(_DUMP>0) std::cout << "]" << subcount_DUMP << "\n" ;
3241     return a ;
3242 }
3243 
3244 
3245 
3246 
3247 /**
3248 NPFold::submeta
3249 ------------------
3250 
3251 1. find subfolders with prefix
3252 2. collect metadata (k,v) pairs with common values for all subs into ckey, cval and other keys into okey
3253 3. form an array of shape (num_sub, 1 when column_key provided OR num_okey when not )
3254 
3255 **/
3256 
3257 inline NP* NPFold::submeta(const char* prefix, const char* column_key ) const
3258 {
3259     // 1. find subfolders with prefix
3260 
3261     std::vector<const NPFold*> subs ;
3262     std::vector<std::string> subpaths ;
3263     int maxdepth = 1 ;  // only look one level down
3264 
3265     find_subfold_with_prefix(subs, &subpaths,  prefix, maxdepth );
3266     assert( subs.size() == subpaths.size() );
3267 
3268     // 2. collect metadata (k,v) pairs with common values for all subs into ckey, cval and other keys into okey
3269     std::vector<std::string> okey ;
3270     std::vector<std::string> ckey ;
3271     std::vector<std::string> cval ;
3272     SubCommonKV(okey, ckey, cval, subs );
3273     assert( ckey.size() == cval.size() );
3274     bool dump_common = false ;
3275     if(dump_common) std::cout << DescCommonKV(okey, ckey, cval);
3276 
3277     int column = std::distance( okey.begin(), std::find( okey.begin(), okey.end(), column_key ? column_key : "-" )) ;
3278     bool found_column = column < int(okey.size()) ;
3279 
3280     int num_subs = subs.size() ;
3281     int num_okey = okey.size() ;
3282     int ni = num_subs ;
3283     int nj = found_column ? 1 : num_okey ;
3284 
3285     NP* a = NP::Make<int64_t>( ni, nj );
3286     int64_t* aa = a->values<int64_t>() ;
3287 
3288     a->names = subpaths ;
3289     a->labels = new std::vector<std::string>( okey.begin(), okey.end() ) ;
3290 
3291     for(int i=0 ; i < ni ; i++)
3292     {
3293         const NPFold* sub = subs[i] ;
3294         for(int j=0 ; j < nj ; j++)
3295         {
3296             const char* ok = found_column ? column_key : okey[j].c_str() ;
3297             int64_t val = sub->get_meta<int64_t>( ok, 0 );
3298             aa[i*nj+j] = val ;
3299         }
3300     }
3301     return a ;
3302 }
3303 
3304 
3305 /**
3306 NPFold::substamp
3307 --------------------
3308 
3309 This provides metadata across multiple events, but as it relies on
3310 saving of arrays it is not useful for production running because
3311 SEvt are not saved because they are too big.
3312 
3313 For metadata in production running the alternative SProf.hh
3314 low resource approach should be used.
3315 
3316 Example arguments:
3317 
3318 * prefix "//A" "//B"
3319 * keyname : "substamp"
3320 
3321 
3322 Primary use of substamp is for comparisons of timestamp difference from begin of event
3323 between multiple events eg A000 A001
3324 
3325 1. finds *subs* vector of subfold of this fold with the path prefix, eg "//A" "//B"
3326 
3327 2. create *t* array shaped (num_sub, num_stamp) containing timestamp values of the common keys,
3328    this is particularly useful when scanning with a sequence of
3329    events with increasing numbers of photons
3330 
3331 3. derive *dt* DeltaColumn array, creating first-timestamp-within-each-event-relative-timestamps
3332 
3333 4. create array of array counts (eg num_hit, num_genstep, num_photon) in each subfold
3334 
3335 5. form *out* NPFold with keys "substamp" "delta_substamp" "subcount" containing the above created arrays
3336 
3337 
3338 
3339 Example of NPFold_meta.txt::
3340 
3341     A[blyth@localhost ALL1_Debug_Philox_ref1]$ cat A000/NPFold_meta.txt
3342 
3343     NumPhotonCollected:1000000
3344     NumGenstepCollected:10
3345     MaxBounce:63
3346 
3347     site:SEvt::endMeta
3348     hitmask:8192
3349     index:0
3350     instance:0
3351     SEvt__beginOfEvent_0:1760707886287045,7316444,1222084
3352     SEvt__beginOfEvent_1:1760707886287165,7316444,1222084
3353     SEvt__endOfEvent_0:1760707886541450,8373000,1334844
3354     t_BeginOfEvent:1760707886287057
3355     t_setGenstep_0:0
3356     t_setGenstep_1:0
3357     t_setGenstep_2:0
3358     t_setGenstep_3:1760707886287216
3359     t_setGenstep_4:1760707886287368
3360     t_setGenstep_5:1760707886287387
3361     t_setGenstep_6:1760707886287407
3362     t_setGenstep_7:1760707886288094
3363     t_setGenstep_8:1760707886288391
3364     t_PreLaunch:1760707886288420
3365     t_PostLaunch:1760707886441593
3366     t_EndOfEvent:1760707886541457
3367     t_Event:254400
3368     t_Launch:0.153154
3369 
3370 
3371 The first few of the above entries are written from SEvt::beginOfEvent with SEvt::setMeta.
3372 Entries from "site:SEvt::endMeta" onwards are written from SEvt::endOfEvent/SEvt::endMeta
3373 with::
3374 
3375     SEvt::setMeta
3376     SEvt::setMetaProf
3377 
3378 Both the above methods append to the SEvt::meta string.
3379 SEvt::meta is assigned to the NPFold by SEvt::gather_metadata
3380 from SEvt::endOfEvent.
3381 
3382 **/
3383 
3384 inline NPFold* NPFold::substamp(const char* prefix, const char* keyname) const
3385 {
3386     // 1. finds *subs* vector of subfold of this fold with the path prefix, eg "//A" "//B"
3387 
3388     std::vector<const NPFold*> subs ;
3389     std::vector<std::string> subpaths ;
3390     int maxdepth = 1 ;  // only one level down
3391     find_subfold_with_prefix(subs, &subpaths,  prefix, maxdepth );
3392     assert( subs.size() == subpaths.size() );
3393     int num_sub = int(subs.size()) ;
3394 
3395     int _DUMP = U::GetEnvInt(substamp_DUMP, 0 );
3396 
3397     const NPFold* sub0 = num_sub > 0 ? subs[0] : nullptr ;
3398 
3399     int num_stamp0 = sub0 ? sub0->getMetaNumStamp() : 0 ;
3400     bool skip = num_sub == 0 || num_stamp0 == 0 ;
3401 
3402     if(_DUMP) std::cout
3403         << "[" << substamp_DUMP
3404         << " find_subfold_with_prefix " << prefix
3405         << " maxdepth " << maxdepth
3406         << " num_sub " << num_sub
3407         << " sub0 " << ( sub0 ? sub0->stats() : "-" )
3408         << " num_stamp0 " << num_stamp0
3409         << " skip " << ( skip ? "YES" : "NO ")
3410         << std::endl
3411         << DescFoldAndPaths(subs, subpaths)
3412         ;
3413 
3414     NPFold* out = nullptr ;
3415     if(skip) return out ;
3416 
3417 
3418     // 2. create *t* array shaped (num_sub, num_stamp) containing timestamp values of the common keys
3419 
3420     int ni = num_sub ;
3421     int nj = num_stamp0 ; // num stamps in the first sub
3422 
3423     NP* t = NP::Make<int64_t>( ni, nj ) ;
3424     int64_t* tt = t->values<int64_t>() ;
3425     t->set_meta<std::string>("creator","NPFold::substamp");
3426     t->set_meta<std::string>("base", loaddir ? loaddir : "-" );
3427     t->set_meta<std::string>("prefix", prefix ? prefix : "-" );
3428     t->set_meta<std::string>("keyname", keyname ? keyname : "-" );
3429 
3430     // collect metadata (k,v) pairs that are the same for all the subs
3431     std::vector<std::string> okey ;
3432     std::vector<std::string> ckey ;
3433     std::vector<std::string> cval ;
3434     SubCommonKV(okey, ckey, cval, subs );
3435     assert( ckey.size() == cval.size() );
3436     t->setMetaKV_(ckey, cval);
3437 
3438 
3439     std::vector<std::string> comkeys ;
3440     for(int i=0 ; i < ni ; i++)
3441     {
3442         const NPFold* sub = subs[i] ;
3443         const char* subpath = subpaths[i].c_str() ;
3444         std::vector<std::string> keys ;
3445         std::vector<int64_t>   stamps ;
3446 
3447 
3448         // grab keys and stamps from the sub meta string
3449         bool only_with_stamps = true ;
3450         sub->getMetaKVS(&keys, nullptr, &stamps, only_with_stamps );
3451 
3452         int num_stamp = stamps.size() ;
3453         bool consistent_num_stamp = num_stamp == nj ;
3454 
3455         if(!consistent_num_stamp) std::cerr
3456             << "NPFold::substamp"
3457             << " i " << i
3458             << " subpath " << ( subpath ? subpath : "-" )
3459             << " consistent_num_stamp " << ( consistent_num_stamp ? "YES" : "NO " )
3460             << " num_stamp " << num_stamp
3461             << " nj " << nj
3462             << std::endl
3463             ;
3464         assert(consistent_num_stamp) ;
3465 
3466         if(i == 0) comkeys = keys ;
3467         bool same_keys = i == 0 ? true : keys == comkeys ;
3468         if(_DUMP>0) std::cout << sub->loaddir << " stamps.size " << stamps.size() << " " << ( same_keys ? "Y" : "N" ) << std::endl;
3469         assert(same_keys);
3470 
3471         for(int j=0 ; j < nj ; j++) tt[i*nj+j] = stamps[j] ;
3472         t->names.push_back(subpath);
3473 
3474     }
3475     t->labels = new std::vector<std::string>(comkeys.begin(), comkeys.end())  ;
3476 
3477 
3478     // 3. derive *dt* DeltaColumn array, creating first-timestamp-within-each-event-relative-timestamps
3479 
3480     NP* dt = NP::DeltaColumn<int64_t>(t);
3481     dt->names = t->names ;
3482     dt->labels = new std::vector<std::string>(comkeys.begin(), comkeys.end())  ;
3483 
3484 
3485     // 4. create array of array counts (eg num_hit, num_genstep, num_photon) in each subfold
3486     NP* count = subcount(prefix); // prefix eg "//A"
3487 
3488 
3489     // 5. form NPFold with keys "substamp" "delta_substamp" "subcount" containing the above created arrays
3490 
3491     const char* delta_keyname = U::FormName("delta_",keyname,nullptr) ; // normally "delta_substamp"
3492     out = new NPFold ;
3493     out->add(keyname      , t );  // "substamp"
3494     out->add(delta_keyname, dt ); // "delta_substamp"
3495     out->add("subcount", count );
3496 
3497     if(_DUMP>0) std::cout
3498         << "]" << substamp_DUMP
3499         << "\n"
3500         ;
3501 
3502     return out ;
3503 }
3504 
3505 
3506 
3507 /**
3508 NPFold::subprofile
3509 --------------------
3510 
3511 Collect profile metadata from subfold matching the prefix
3512 
3513 1. find *subs* vector of subfold of this fold with path prefix, eg "//A" "//B"
3514 2. create *t* array of shape (num_sub, num_prof0, 3) with the profile triplets
3515 3. create *out* NPFold containing "subprofile" keyname with the *t* array
3516 
3517 **/
3518 
3519 inline NPFold* NPFold::subprofile(const char* prefix, const char* keyname) const
3520 {
3521     // 1. find *subs* vector of subfold of this fold with path prefix, eg "//A" "//B"
3522 
3523     std::vector<const NPFold*> subs ;
3524     std::vector<std::string> subpaths ;
3525     int maxdepth = 1 ;  // only one level down
3526     find_subfold_with_prefix(subs, &subpaths,  prefix, maxdepth );
3527     assert( subs.size() == subpaths.size() );
3528     int num_sub = int(subs.size()) ;
3529     int num_prof0 = num_sub > 0 ? subs[0]->getMetaNumProfile() : 0 ;
3530     bool skip = num_sub == 0 || num_prof0 == 0 ;
3531 
3532     int _DUMP = U::GetEnvInt(subprofile_DUMP, 0 );
3533     if(_DUMP>0) std::cout
3534         << "[" << subprofile_DUMP
3535         << " find_subfold_with_prefix " << prefix
3536         << " maxdepth " << maxdepth
3537         << " num_sub " << num_sub
3538         << " num_prof0 " << num_prof0
3539         << " skip " << ( skip ? "YES" : "NO ")
3540         << std::endl
3541         ;
3542 
3543     NPFold* out = nullptr ;
3544     if(skip) return out ;
3545 
3546     // 2. create *t* array of shape (num_sub, num_prof0, 3) with the profile triplets
3547 
3548     int ni = num_sub ;
3549     int nj = num_prof0 ;
3550     int nk = 3 ;
3551 
3552     NP* t = NP::Make<int64_t>( ni, nj, nk ) ;
3553     int64_t* tt = t->values<int64_t>() ;
3554     t->set_meta<std::string>("creator","NPFold::subprofile");
3555     t->set_meta<std::string>("base", loaddir ? loaddir : "-" );
3556     t->set_meta<std::string>("prefix", prefix ? prefix : "-" );
3557     t->set_meta<std::string>("keyname", keyname ? keyname : "-" );
3558 
3559     // collect metadata (k,v) pairs that are the same for all the subs
3560     std::vector<std::string> okey ;
3561     std::vector<std::string> ckey ;
3562     std::vector<std::string> cval ;
3563     SubCommonKV(okey, ckey, cval, subs );
3564     assert( ckey.size() == cval.size() );
3565     t->setMetaKV_(ckey, cval);
3566 
3567     std::vector<std::string> comkeys ;
3568     for(int i=0 ; i < ni ; i++)
3569     {
3570         const NPFold* sub = subs[i] ;
3571         const char* subpath = subpaths[i].c_str() ;
3572 
3573         if(_DUMP>0) std::cout
3574             << subpath
3575             << std::endl
3576             << sub->descMetaKV()
3577             << std::endl
3578             ;
3579 
3580         std::vector<std::string> keys ;
3581         std::vector<std::string> vals ;
3582         bool only_with_profiles = true ;
3583         sub->getMetaKV(&keys, &vals, only_with_profiles );
3584         assert( vals.size() == keys.size() ) ;
3585         assert( int(vals.size()) == nj ) ;
3586 
3587         if(i == 0) comkeys = keys ;
3588         bool same_keys = i == 0 ? true : keys == comkeys ;
3589         if(_DUMP>0) std::cout
3590              << "sub.loaddir " << sub->loaddir
3591              << " keys.size " << keys.size()
3592              << " " << ( same_keys ? "Y" : "N" )
3593              << std::endl
3594              ;
3595         assert(same_keys);
3596 
3597         for(int j=0 ; j < nj ; j++)
3598         {
3599             const char* v = vals[j].c_str();
3600             std::vector<int64_t> elem ;
3601             U::MakeVec<int64_t>( elem, v, ',' );
3602             assert( int(elem.size()) == nk );
3603             for(int k=0 ; k < nk ; k++)  tt[i*nj*nk+j*nk+k] = elem[k] ;
3604         }
3605         t->names.push_back(subpath);
3606     }
3607     t->labels = new std::vector<std::string>(comkeys.begin(), comkeys.end())  ;
3608 
3609     // 3. create *out* NPFold containing "subprofile" keyname with the *t* array
3610 
3611     out = new NPFold ;
3612     out->add(keyname, t );
3613 
3614     if(_DUMP>0) std::cout
3615         << "]" << subprofile_DUMP
3616         << std::endl
3617         ;
3618     return out ;
3619 }
3620 
3621 
3622 
3623 /**
3624 NPFold::subfold_summary
3625 -----------------------
3626 
3627 Applies methods to each subfold found within this NPFold specified by k:v delimited argument values.
3628 This creates summary sub or arrays for each group of subfold specified by the argument paths.
3629 
3630 1. collect args containing ':' delimiter into uargs
3631 2. create NPFold/NP for each uarg using *method* named arg , thats added to (NPFold)spec_ff
3632 3. return (NPFold)spec_ff
3633 
3634 
3635 Supported *method* are:
3636 
3637 substamp
3638 
3639 subprofile
3640 
3641 submeta
3642    forms array of shape (num_sub, num_okey) with entries for each sub
3643    providing all non-common metadata values for each sub
3644 
3645 submeta:some-column
3646    forms array of shape (num_sub, 1) with the some-column values for each sub
3647 
3648 subcount
3649 
3650 
3651 
3652 
3653 Example arguments::
3654 
3655    NPFold* ab = NPFold::subfold_summary("substamp",   "a://A", "b://B" ) ;
3656    NPFold* ab = NPFold::subfold_summary("subprofile", "a://A", "b://B" ) ;
3657    NPFold* ab = NPFold::subfold_summary("submeta",    "a://A", "b://B" ) ;
3658    NPFold* ab = NPFold::subfold_summary("subcount",   "a://A", "b://B" ) ;
3659 
3660 **/
3661 
3662 template<typename ... Args>
3663 inline NPFold* NPFold::subfold_summary(const char* method, Args ... args_  ) const
3664 {
3665     int _DUMP = U::GetEnvInt( subfold_summary_DUMP, 0 );
3666 
3667 
3668     // 1. collect args containing ':' delimiter into uargs
3669 
3670     std::vector<std::string> args = {args_...};
3671     std::vector<std::string> uargs ;
3672     char delim = ':' ;
3673     for(int i=0 ; i < int(args.size()) ; i++)
3674     {
3675         const std::string& arg = args[i] ;
3676         size_t pos = arg.empty() ? std::string::npos : arg.find(delim) ;
3677         if( pos == std::string::npos ) continue ;
3678         uargs.push_back( arg );
3679     }
3680     int num_uargs = uargs.size() ;
3681     if(_DUMP > 0)
3682     {
3683         std::cerr
3684            << "@[" << subfold_summary_DUMP
3685            << " method [" << ( method ? method : "-" ) << "]"
3686            << " args.size " << args.size()
3687            << " uargs.size " << uargs.size()
3688            << " uargs("
3689            ;
3690 
3691         for(int i=0 ; i < num_uargs ; i++) std::cerr << uargs[i] << " " ;
3692         std::cerr << ")\n" ;
3693     }
3694 
3695 
3696     std::stringstream hh ;
3697     hh << "NPFold::subfold_summary(\"" << method << "\"," ;
3698 
3699     // 2. create NPFold/NP for each argument using *method* named argument, thats added to (NPFold)spec_ff
3700 
3701     NPFold* spec_ff = nullptr ;
3702 
3703     for(int i=0 ; i < num_uargs ; i++)
3704     {
3705         const std::string& arg = uargs[i] ;
3706         hh << "\"" << arg << "\"" << ( i < num_uargs - 1 ? "," : " " ) ;
3707 
3708         size_t pos = arg.find(delim) ;
3709         std::string _k = arg.substr(0, pos);
3710         std::string _v = arg.substr(pos+1);
3711         const char* k = _k.c_str();   // "a" OR "b"
3712         const char* v = _v.c_str();   // eg "//A" "//B"
3713 
3714 
3715         NPFold* sub = nullptr ;
3716         NP* arr = nullptr ;
3717 
3718         if(strcmp(method, "substamp")==0)
3719         {
3720             sub = substamp(v, "substamp") ;
3721         }
3722         else if(strcmp(method, "subprofile")==0)
3723         {
3724             sub = subprofile(v, "subprofile") ;
3725         }
3726         else if(strcmp(method, "submeta")==0)
3727         {
3728             arr = submeta(v) ;
3729         }
3730         else if(strcmp(method, "subcount")==0)
3731         {
3732             arr = subcount(v) ;
3733         }
3734         else if(U::StartsWith(method, "submeta:"))
3735         {
3736             arr = submeta(v, method+strlen("submeta:") );
3737         }
3738 
3739         if(sub == nullptr && arr == nullptr)
3740         {
3741             if( _DUMP > 0 ) std::cerr
3742                 << "@-NPFold::subfold_summary"
3743                 << " method [" << ( method ? method : "-" ) << "]"
3744                 << " k [" << k << "]"
3745                 << " v [" << v << "]"
3746                 << " sub " << ( sub ? "YES" : "NO " )
3747                 << " arr " << ( arr ? "YES" : "NO " )
3748                 << std::endl
3749                 ;
3750 
3751             continue ;
3752         }
3753         if(spec_ff == nullptr) spec_ff = new NPFold ;
3754         if(sub) spec_ff->add_subfold(k, sub );
3755         if(arr) spec_ff->add(k, arr) ;
3756         // k does not stomp : as those are different spec_ff
3757         // HUH: looks to be same spec_ff - the k must be different to avoid stomping
3758     }
3759     hh << ")" ;
3760 
3761     if(spec_ff) spec_ff->headline = hh.str();
3762     if(_DUMP > 0) std::cerr
3763         << "@[" << subfold_summary_DUMP
3764         << " method [" << ( method ? method : "-" ) << "]"
3765         << "\n"
3766         ;
3767 
3768     return spec_ff ;
3769 }
3770 
3771 template NPFold* NPFold::subfold_summary( const char*, const char* ) const ;
3772 template NPFold* NPFold::subfold_summary( const char*, const char*, const char* ) const ;
3773 template NPFold* NPFold::subfold_summary( const char*, const char*, const char*, const char* ) const ;
3774 
3775 
3776 /**
3777 NPFold::compare_subarrays
3778 ----------------------------
3779 
3780 1. access *key* array from two subfold (*asym* and *bsym*)
3781    eg A000 and B000 which could be Opticks and Geant4 events
3782 
3783 2. look for "subcount" summary arrays in the two folders,
3784    "subcount" sumaries contain array counts from multiple folders
3785 
3786 
3787 **/
3788 
3789 template<typename F, typename T>
3790 NP* NPFold::compare_subarrays(const char* key, const char* asym, const char* bsym,  std::ostream* out  )
3791 {
3792     NPFold* af = find_subfold_(asym) ;
3793     NPFold* bf = find_subfold_(bsym) ;
3794     NP* a = af ? af->get_(key) : nullptr ;
3795     NP* b = bf ? bf->get_(key) : nullptr ;
3796 
3797     const NP* a_subcount = af ? af->get("subcount") : nullptr ;
3798     const NP* b_subcount = bf ? bf->get("subcount") : nullptr ;
3799 
3800     int a_column = -1 ;
3801     int b_column = -1 ;
3802 
3803     NP* boa = NPX::BOA<F,T>( a, b, a_column, b_column, out );
3804 
3805     if(out) *out
3806        << "[NPFold::compare_subarray"
3807        << " key " << key
3808        << " asym " << asym
3809        << " bsym " << bsym
3810        << " af " << ( af ? "YES" : "NO " )
3811        << " bf " << ( bf ? "YES" : "NO " )
3812        << " a " << ( a ? "YES" : "NO " )
3813        << " b " << ( b ? "YES" : "NO " )
3814        << " a_subcount " << ( a_subcount ? "YES" : "NO " )
3815        << " b_subcount " << ( b_subcount ? "YES" : "NO " )
3816        << " boa " << ( boa ? "YES" : "NO " )
3817        << "\n"
3818        << "-[NPFold::compare_subarray.a_subcount" << "\n"
3819        << ( a_subcount ? a_subcount->descTable<int>(8) : "-\n" )
3820        << "-]NPFold::compare_subarray.a_subcount"
3821        << "\n"
3822        << "-[NPFold::compare_subarray.b_subcount" << "\n"
3823        << ( b_subcount ? b_subcount->descTable<int>(8) : "-\n" )
3824        << "-]NPFold::compare_subarray.b_subcount"
3825        << "\n"
3826        << "-[NPFold::compare_subarray." << asym << "\n"
3827        << ( a ? a->descTable<T>(8) : "-\n" )
3828        << "-]NPFold::compare_subarray." << asym
3829        << "\n"
3830        << "-[NPFold::compare_subarray." << bsym << "\n"
3831        << ( b ? b->descTable<T>(8) : "-\n" )
3832        << "-]NPFold::compare_subarray." << bsym
3833        << "\n"
3834        << "-[NPFold::compare_subarray.boa " << "\n"
3835        << ( boa ? boa->descTable<F>(12) : "-\n" )
3836        << "-]NPFold::compare_subarray.boa "
3837        << "\n"
3838        << "]NPFold::compare_subarray"
3839        << "\n"
3840        ;
3841     return boa ;
3842 }
3843 
3844 template<typename F, typename T>
3845 std::string NPFold::compare_subarrays_report(const char* key, const char* asym, const char* bsym )
3846 {
3847     std::stringstream ss ;
3848     compare_subarrays<F, T>(key, asym, bsym, &ss );
3849     std::string str = ss.str();
3850     return str ;
3851 }
3852 
3853 
3854 
3855 /**
3856 NPFold::Subkey
3857 ------------------
3858 
3859 Collect union of all keys from all the subs that are present in the metadata
3860 of all the subfold.
3861 
3862 **/
3863 
3864 inline void NPFold::Subkey(std::vector<std::string>& ukey, const std::vector<const NPFold*>& subs ) // static
3865 {
3866     int num_sub = subs.size();
3867     for(int i=0 ; i < num_sub ; i++)
3868     {
3869         std::vector<std::string> keys ;
3870         bool only_with_profiles = false ;
3871         subs[i]->getMetaKV(&keys, nullptr, only_with_profiles );
3872         int num_keys = keys.size();
3873         for(int j=0 ; j < num_keys ; j++)
3874         {
3875             const char* k = keys[j].c_str();
3876             if(std::find(ukey.begin(), ukey.end(), k ) == ukey.end()) ukey.push_back(k) ;
3877         }
3878     }
3879 }
3880 
3881 /**
3882 NPFold::SubCommonKV
3883 ---------------------
3884 
3885 Return (k,v) pairs that are in common for all the subs
3886 
3887 1. collect union of all keys present in the metadata of all the subfold
3888 2. for each of the union of keys iterate over all the subs and add entries
3889    with the common values into *ckey*, *cval*.
3890    Other keys with varying values are added to *okey*
3891 
3892 **/
3893 
3894 inline void NPFold::SubCommonKV(std::vector<std::string>& okey, std::vector<std::string>& ckey, std::vector<std::string>& cval, const std::vector<const NPFold*>& subs ) // static
3895 {
3896     // 1. collect union of all keys present in the metadata of all the subfold
3897     std::vector<std::string> ukey ;
3898     Subkey( ukey, subs );
3899 
3900     int num_sub = subs.size();
3901     int num_ukey = ukey.size();
3902 
3903     bool dump_ukey = false ;
3904     if(dump_ukey)
3905     {
3906         std::cout << "[NPFold::SubCommonKV num_ukey:" << num_ukey ;
3907         for(int i=0 ; i < num_ukey ; i++ ) std::cout << ukey[i] << "\n" ;
3908         std::cout << "]NPFold::SubCommonKV num_ukey:" << num_ukey ;
3909     }
3910 
3911     ckey.clear();
3912     cval.clear();
3913 
3914     for(int i=0 ; i < num_ukey ; i++)
3915     {
3916         const char* k = ukey[i].c_str();
3917 
3918         int total = 0 ;
3919         int same = 0 ;
3920         std::string v0 ;
3921 
3922         for(int j=0 ; j < num_sub ; j++)
3923         {
3924             std::string v = subs[j]->get_meta_string(k) ;
3925             bool has_key = !v.empty();
3926             if(!has_key) std::cerr
3927                  << "NPFold::SubCommonKV MISSING KEY "
3928                  << " num_sub " << num_sub
3929                  << " num_ukey " << num_ukey
3930                  << " k " << ( k ? k : "-" )
3931                  << " v " << ( v.empty() ? "-" : v )
3932                  << std::endl
3933                  ;
3934             if(!has_key) std::raise(SIGINT) ;
3935             assert(has_key);
3936 
3937             total += 1 ;
3938             if(v0.empty())
3939             {
3940                 v0 = v ;
3941                 same += 1 ;
3942             }
3943             else
3944             {
3945                 bool same_value = strcmp(v0.c_str(), v.c_str())==0 ;
3946                 if(same_value) same += 1 ;
3947             }
3948         }
3949 
3950         bool all_same_value = total == same ;
3951         if(all_same_value)
3952         {
3953             ckey.push_back(k);
3954             cval.push_back(v0);
3955         }
3956         else
3957         {
3958             okey.push_back(k);
3959         }
3960     }
3961 }
3962 
3963 inline std::string NPFold::DescCommonKV(
3964      const std::vector<std::string>& okey,
3965      const std::vector<std::string>& ckey,
3966      const std::vector<std::string>& cval ) // static
3967 {
3968     assert( ckey.size() == cval.size() );
3969     int num_ckey = ckey.size();
3970     int num_okey = okey.size();
3971     std::stringstream ss ;
3972     ss
3973        << "[NPFold::DescCommonKV" << std::endl
3974        << "-num_ckey " << num_ckey << std::endl
3975        ;
3976     for(int i=0 ; i < num_ckey ; i++) ss
3977          << std::setw(25) << ckey[i]
3978          << " : "
3979          << std::setw(25) << cval[i]
3980          << std::endl
3981          ;
3982 
3983     ss << "-num_okey "  << num_okey
3984        << std::endl
3985        ;
3986     for(int i=0 ; i < num_okey ; i++) ss
3987          << std::setw(25) << okey[i]
3988          << std::endl
3989          ;
3990 
3991     ss
3992        << "]NPFold::DescCommonKV"
3993        << std::endl
3994        ;
3995 
3996     std::string str = ss.str();
3997     return str ;
3998 }
3999 
4000