eic-opticks/sysrap/NP.hh

0001 #ifndef NP_HH
0002 #define NP_HH
0003
0004 /**
0005 NP : Header-only Array Creation and persisting as NumPy .npy files
0006 ====================================================================
0007
0008 * TODO: relocate higher level NP.hh functionality up to NPX.h
0009 * TODO: relocate lower level NP.hh functionality down to NPU.hh
0010
0011
0012 Dependencies of the NP family of headers::
0013
0014     NPFold.h : NPX.h
0015
0016     NPX.h : NP.hh
0017
0018     NP.hh : NPU.hh
0019
0020     NPU.hh : system headers only
0021
0022
0023
0024 NPU.hh
0025     underpinnings of NP.hh
0026 NP.hh
0027     core of save/load arrays into .npy NumPy format files
0028 NPX.h
0029     extras such as static converters
0030 NPFold.h
0031     managing and persisting collections of arrays and other NPFold
0032
0033 Primary source is https://github.com/simoncblyth/np/
0034 but the headers are also copied into opticks/sysrap.
0035
0036 **/
0037
0038 #include <iostream>
0039 #include <iomanip>
0040 #include <string>
0041 #include <vector>
0042 #include <cassert>
0043 #include <csignal>
0044 #include <fstream>
0045 #include <cstdint>
0046 #include <limits>
0047 #include <random>
0048 #include <map>
0049 #include <functional>
0050 #include <locale>
0051 #include <optional>
0052
0053 #include "NPU.hh"
0054
0055
0056 template<typename T>
0057 struct NP_slice
0058 {
0059     T start ;
0060     T stop ;
0061     T step ;
0062
0063     bool is_arange() const ;
0064     bool is_linspace() const ;
0065     bool is_match(const NP_slice<T>& other) const ;
0066     std::string desc() const ;
0067     int count() const ;
0068 };
0069
0070 template<typename T>
0071 inline bool NP_slice<T>::is_arange() const
0072 {
0073    return step > 0 ;
0074 }
0075 template<typename T>
0076 inline bool NP_slice<T>::is_linspace() const
0077 {
0078    return step < 0 ;
0079 }
0080 template<typename T>
0081 inline bool NP_slice<T>::is_match(const NP_slice& other) const
0082 {
0083     return start == other.start && stop == other.stop && step == other.step ;
0084 }
0085 template<typename T>
0086 inline std::string NP_slice<T>::desc() const
0087 {
0088     std::stringstream ss ;
0089     ss << "NP_slice(" <<  start << "," << stop << "," << step << ")" ;
0090     std::string str = ss.str();
0091     return str ;
0092 }
0093
0094
0095 template<typename T>
0096 inline int NP_slice<T>::count() const
0097 {
0098     int _count = 0 ;
0099     if( step < 0 )
0100     {
0101         _count = int(-step) ;  // linspace
0102     }
0103     else
0104     {
0105         for(T v=start ; v < stop ; v += step ) _count++ ;
0106     }
0107     return _count ;
0108 }
0109
0110
0111 struct NP
0112 {
0113     typedef std::int64_t INT ;
0114     typedef std::uint64_t UINT ;
0115     static constexpr const INT TEN = 10 ;
0116
0117     static constexpr const char* EXT = ".npy" ;
0118 #ifdef WITH_VERBOSE
0119     static const bool VERBOSE = true ;
0120 #else
0121     static const bool VERBOSE = false ;
0122 #endif
0123
0124     union UIF32
0125     {
0126         std::uint32_t u ;
0127         std::int32_t  i ;
0128         float         f ;
0129     };
0130
0131     union UIF64
0132     {
0133         std::uint64_t  u ;
0134         std::int64_t   i ;
0135         double         f ;
0136     };
0137
0138
0139     // SPECIALIZED MEMBER FUNCTIONS
0140
0141     template<typename T> const T*  cvalues() const  ;
0142     template<typename T> T*       values() ;
0143
0144     template<typename T> void fill(T value);
0145     template<typename T> void _fillIndexFlat(T offset=0);
0146
0147     // BLOCK OF TEMPLATE SPECIALIZATIONS cvalues, values, _fillIndexFlat : IN IMPL BELOW AT THIS POINT
0148
0149
0150     // STATIC CREATION METHODS
0151
0152     template<typename T> static NP* MakeFromValues( const T* vals, INT num_vals );
0153     template<typename T> static INT ALength(  T x0, T x1, T st );
0154     template<typename T> static NP* ARange(   T x0, T x1, T st );
0155
0156     template<typename T> static NP* ARange_FromString( const char* spec );
0157     template<typename T> static NP* ARange_(T start, T stop, T step);
0158
0159
0160     template<typename T> static NP* Linspace( T x0, T x1, unsigned nx, INT npayload=-1 );
0161     template<typename T> static NP* DeltaColumn(const NP* a, INT jcol=0 ) ;
0162
0163     template<typename T> static NP* MinusCosThetaLinearAngle(INT nx=181); // from -1. to 1.
0164     template<typename T> static NP* ThetaRadians(INT nx=181, T theta_max_pi=1. ); // from 0. to theta_max_pi*PI
0165                          static NP* SqrtOneMinusSquare( const NP* a );
0166                          static NP* Cos( const NP* src );
0167                          static NP* MakeWithCosineDomain( const NP* src, bool reverse );
0168                          static NP* Incremented( const NP* a, INT offset  );
0169
0170     template<typename T> static NP* MakeDiv( const NP* src, unsigned mul  );
0171
0172     template<typename T> static NP* Make( INT ni_=-1, INT nj_=-1, INT nk_=-1, INT nl_=-1, INT nm_=-1, INT no_=-1 );
0173     template<typename T, typename ... Args> static NP*  Make_( Args ... shape ) ;  // Make_shape
0174     template<typename T> static NP* MakeFlat(INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 );
0175
0176     static std::string HexDump(const std::string& str );
0177     static std::string HexDump(const char* buffer, size_t size);
0178
0179     static size_t ReadToBufferCallback(char* buffer, size_t size, size_t nitems, void* arr);
0180     void serializeToBuffer( std::vector<char>& buf, size_t size=16, size_t nitems=1 ) ; // not const as need to change *position*
0181     static void SaveBufferToFile(const std::vector<char>& buf, const char* path_ );
0182
0183     void prepareForStreamIn();
0184     static NP* CreateFromBuffer( const std::vector<char>& buf, size_t size=16, size_t nitems=1 );
0185     static size_t WriteToArrayCallback(char* buffer, size_t size, size_t nitems, void* arg);
0186
0187     //  MEMBER FUNCTIONS
0188
0189     char*       bytes();
0190     const char* bytes() const ;
0191
0192     bool hdr_complete() const ;
0193     char hdr_lastchar() const ;
0194     INT hdr_bytes() const ;
0195     UINT uhdr_bytes() const ;
0196
0197     INT num_items() const ;       // shape[0]
0198     INT num_values() const ;      // all values, product of shape[0]*shape[1]*...
0199     INT num_itemvalues() const ;  // values after first dimension
0200     INT arr_bytes() const ;       // formerly num_bytes
0201     UINT uarr_bytes() const ;
0202     UINT serialize_bytes() const ;  // total of hdr and array
0203
0204     INT item_bytes() const ;      // *item* comprises all dimensions beyond the first
0205     INT meta_bytes() const ;
0206     UINT umeta_bytes() const ;
0207
0208     template<typename T> bool is_itemtype() const ;  // size of item matches size of type
0209
0210     void clear() ;
0211
0212     void        update_headers();
0213     std::string make_header() const ;
0214     std::string make_prefix() const ;
0215     std::string make_jsonhdr() const ;
0216
0217     bool        decode_header(bool data_resize) ; // sets shape based on arr header
0218     bool        decode_prefix() ; // also resizes buffers ready for reading in
0219     unsigned    prefix_size(unsigned index) const ;
0220
0221
0222     // CTOR
0223     NP(const char* dtype_, const std::vector<INT>& shape_ );
0224     NP(const char* dtype_="<f4", INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 );
0225
0226     void init();
0227     void set_shape( INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1);
0228     void set_shape( const std::vector<INT>& src_shape );
0229     void get_shape( std::vector<size_t>& sh ) const ;
0230     // CAUTION: DO NOT USE *set_shape* TO CHANGE SHAPE (as it calls *init*) INSTEAD USE *change_shape*
0231     bool has_shape(INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 ) const ;
0232     void change_shape(INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 ) ;   // one dimension entry left at -1 can be auto-set
0233     void _change_shape_ni(INT ni, bool data_resize);
0234
0235     void change_shape_to_3D() ;
0236     void reshape( const std::vector<INT>& new_shape ); // product of shape before and after must be the same
0237
0238
0239     template<int P> void size_2D( INT& width, INT& height ) const ;
0240
0241
0242     void set_dtype(const char* dtype_); // *set_dtype* may change shape and size of array while retaining the same underlying bytes
0243     std::string dtype_name() const ;   // eg float32 uint8
0244
0245
0246
0247
0248     INT index(  INT i,  INT j=0,  INT k=0,  INT l=0, INT m=0, INT o=0) const ;
0249     INT index0( INT i,  INT j=-1, INT k=-1,  INT l=-1, INT m=-1, INT o=-1) const ;
0250
0251     INT dimprod(unsigned q) const ;    // product of dimensions starting from dimension q
0252
0253     template<typename... Args>
0254     INT index_(Args ... idxx ) const ;
0255
0256     template<typename... Args>
0257     INT stride_(Args ... idxx ) const ;
0258
0259     template<typename... Args>
0260     INT offset_(Args ... idxx ) const ;
0261
0262
0263     template<typename T>
0264     static std::string ArrayString(const std::vector<T>& vec, unsigned modulo=10 );
0265
0266     template<typename T, typename... Args>
0267     std::string sliceArrayString(Args ... idxx ) const ;
0268
0269     // use -1 to mark the last dimension to select upon
0270     // eg to select first item use (0, -1)
0271     //
0272     // Like think NumPy indexing
0273
0274     template<typename T, typename... Args>
0275     void slice(std::vector<T>& out, Args ... idxx ) const ;  // slice_ellipsis
0276
0277
0278     template<typename T>
0279     void slice_(std::vector<T>& out, const std::vector<INT>& idxx ) const ;
0280
0281     template<typename T>
0282     static std::string DescSlice(const std::vector<T>& out, unsigned edge );
0283
0284     template<typename T>
0285     static std::string DescSliceBrief(const std::vector<T>& out);
0286
0287
0288     static std::string DescIdx(const std::vector<INT>& idxx );
0289
0290
0291     INT pickdim__(    const std::vector<INT>& idxx) const ;
0292
0293     INT index__( const std::vector<INT>& idxx) const ;
0294     INT stride__(const std::vector<INT>& idxx) const ;
0295     INT offset__(const std::vector<INT>& idxx) const ;
0296
0297
0298
0299     INT       itemsize_(INT i=-1, INT j=-1, INT k=-1, INT l=-1, INT m=-1, INT o=-1) const ;
0300     void      itembytes_(const char** start,  INT& num_bytes, INT i=-1, INT j=-1, INT k=-1, INT l=-1, INT m=-1, INT o=-1 ) const  ;
0301
0302     template<typename T> T           get( INT i,  INT j=0,  INT k=0,  INT l=0, INT m=0, INT o=0) const ;
0303     template<typename T> void        set( T val, INT i,  INT j=0,  INT k=0,  INT l=0, INT m=0, INT o=0 ) ;
0304
0305     template<typename T> bool is_allzero() const ;
0306     bool is_empty() const ;
0307
0308
0309     std::string descValues() const ;
0310     std::string descSize() const ;
0311
0312
0313
0314     template<typename T>
0315     std::string descTable(int wid=7) const ;
0316
0317     template<typename T>
0318     T findMinimumTimestamp() const ;
0319
0320     template<typename T>
0321     std::string descTable_(
0322        int wid=7,
0323        const std::vector<std::string>* column_labels=nullptr,
0324        const std::vector<std::string>* row_labels=nullptr
0325        ) const ;
0326
0327     static NP* MakeLike(  const NP* src);
0328     static void CopyMeta( NP* b, const NP* a );
0329
0330     static constexpr const char* Preserve_Last_Column_Integer_Annotation = "Preserve_Last_Column_Integer_Annotation" ;
0331     void set_preserve_last_column_integer_annotation();
0332     bool is_preserve_last_column_integer_annotation() const ;
0333     static float PreserveNarrowedDoubleInteger( double f );
0334
0335     // STATIC CONVERSION METHODS
0336
0337     static NP* MakeNarrow(const NP* src);
0338     static NP* MakeWide(  const NP* src);
0339     static NP* MakeCopy(  const NP* src);
0340     static NP* MakeCopy3D(const NP* src);
0341     static NP* ChangeShape3D(NP* src);
0342
0343     static NP* MakeWideIfNarrow(  const NP* src);
0344     static NP* MakeNarrowIfWide(  const NP* src);
0345
0346     template<typename T>
0347     static NP* MakeWithType(const NP* src);
0348
0349
0350     template<typename... Args>
0351     static NP* MakeSelectCopy(  const NP* src, Args ... items );  // MakeSelectCopy_ellipsis
0352
0353     static NP* MakeSelectCopyE_( const NP* src, const char* ekey, const char* fallback=nullptr, char delim=',' );
0354     static NP* MakeSelectCopy_( const NP* src, const char* items );
0355     static NP* MakeSelectCopy_( const NP* src, const std::vector<INT>* items );
0356     static NP* MakeSelectCopy_( const NP* src, const INT* items, INT num_items );
0357
0358     static NP* MakeSelection( const NP* src, const NP* sel );  // sel expected to contain integer indices selecting items in src
0359
0360     static int ParseSliceString(std::vector<INT>& idxx, const char* _sli );
0361
0362     template<typename T>
0363     static int ParseSliceIndexString(T& start, T& stop, T& step, const char* _sli, bool dump=false );
0364     static bool LooksLikeSliceIndexString(const char* _sli );
0365     static bool LooksLikeSliceIndexStringIsEmpty(const char* _sli );
0366     static bool LooksLikeSliceIndexStringSuffix(const char* _sli, char** body, char** suffix );
0367
0368
0369     template<typename T>
0370     void parse_slice( NP_slice<T>& sli, const char* _sli) const ;
0371
0372
0373     template<typename T> static NP* MakeSliceSelection( const NP* src, const char* sel );
0374     template<typename T> NP* makeWhereSelection( const char* _sel ) const ;
0375     static bool LooksLikeWhereSelection(const char* _sel );
0376
0377
0378     static NP* MakeItemCopy(  const NP* src, INT i,INT j=-1,INT k=-1,INT l=-1,INT m=-1, INT o=-1 );
0379     void  item_shape(std::vector<INT>& sub, INT i, INT j=-1, INT k=-1, INT l=-1, INT m=-1, INT o=-1 ) const ;
0380     NP*   spawn_item(  INT i, INT j=-1, INT k=-1, INT l=-1, INT m=-1, INT o=-1  ) const ;
0381
0382     template<typename T> static NP* MakeCDF(  const NP* src );
0383     template<typename T> static NP* MakeICDF(  const NP* src, unsigned nu, unsigned hd_factor, bool dump );
0384     template<typename T> static NP* MakeProperty(const NP* a, unsigned hd_factor );
0385     template<typename T> static NP* MakeLookupSample(const NP* icdf_prop, unsigned ni, unsigned seed=0u, unsigned hd_factor=0u );
0386     template<typename T> static NP* MakeUniform( unsigned ni, unsigned seed=0u );
0387
0388     NP* copy() const ;
0389
0390     template<typename S>                               INT         count_if( std::function<bool(const S*)>) const ;
0391     template<typename T>                               NP*   simple_copy_if( std::function<bool(const T*)>) const ;  // atomic types only
0392     template<typename T, typename S>                   NP*          copy_if( std::function<bool(const S*)>) const ;
0393     template<typename T, typename S, typename... Args> NP* flexible_copy_if( std::function<bool(const S*)>, Args ... itemshape ) const ;
0394
0395
0396     // load array asis
0397     static NP* LoadIfExists(const char* path);
0398     static NP* Load(const char* path);
0399     static NP* LoadSlice(const char* _path, const char* _sli);
0400
0401
0402     template<typename T> static NP* LoadThenSlice( const char* path, const char* _sel );
0403
0404     static bool ExistsArrayFolder(const char* path );
0405
0406     static NP* Load_(const char* path);
0407     static NP* LoadFromBuffer_(const char* buffer, size_t size );
0408
0409
0410     static NP* LoadSlice_(const char* path, const char* sli);
0411
0412     static NP* Load(const char* dir, const char* name);
0413     static NP* Load(const char* dir, const char* reldir, const char* name);
0414
0415     // load float OR double array and if float(4 bytes per element) widens it to double(8 bytes per element)
0416     static NP* LoadWide(const char* dir, const char* reldir, const char* name);
0417     static NP* LoadWide(const char* dir, const char* name);
0418     static NP* LoadWide(const char* path);
0419
0420     // load float OR double array and if double(8 bytes per element) narrows it to float(4 bytes per element)
0421     static NP* LoadNarrow(const char* dir, const char* reldir, const char* name);
0422     static NP* LoadNarrow(const char* dir, const char* name);
0423     static NP* LoadNarrow(const char* path);
0424
0425     template<typename T> INT find_value_index(T value, T epsilon) const ;
0426     template<typename T> T   ifind2D(T value, INT jcol, INT jret ) const ;
0427
0428
0429     bool is_pshaped() const ;
0430     template<typename T> bool is_pconst() const ;
0431     template<typename T> bool is_pconst_dumb() const ;  // constant prop with more that 2 items
0432     template<typename T> T    pconst(T fallback=-1) const ;
0433
0434     template<typename T>
0435     static NP* MakePCopyNotDumb(const NP* a);
0436
0437     template<typename T>
0438     static NP* MakePConst( T dl, T dr, T vc );
0439
0440
0441     template<typename T> T    plhs(unsigned column ) const ;
0442     template<typename T> T    prhs(unsigned column ) const ;
0443     template<typename T> INT  pfindbin(const T value, unsigned column, bool& in_range ) const ;
0444     template<typename T> void get_edges(T& lo, T& hi, unsigned column, INT ibin) const ;
0445
0446
0447     template<typename T> T    psum(unsigned column ) const ;
0448     template<typename T> void pscale(T scale, unsigned column);
0449     template<typename T> void pscale_add(T scale, T add, unsigned column);
0450     template<typename T> void pdump(const char* msg="NP::pdump", T d_scale=1., T v_scale=1.) const ;
0451
0452     template<typename T> void minmax(T& mn, T&mx, unsigned j=1, INT item=-1 ) const ;
0453     template<int N, typename T> void minmax2D_reshaped(T* mn, T* mx, INT item_stride=1, INT item_offset=0) ; // not-const as temporarily changes shape
0454     template<typename T>        void minmax2D(T* mn, T* mx, INT item_stride=1, INT item_offset=0 ) const ;
0455
0456     template<typename T> void linear_crossings( T value, std::vector<T>& crossings ) const ;
0457     template<typename T> NP*  trapz() const ;                      // composite trapezoidal integration, requires pshaped
0458
0459     template<typename T> void psplit(std::vector<T>& domain, std::vector<T>& values) const ;
0460     template<typename T> T    pdomain(const T value, INT item=-1, bool dump=false  ) const ;
0461     template<typename T> T    interp(T x, INT item=-1) const ;                  // requires pshaped
0462     template<typename T> T    interp2D(T x, T y, INT item=-1) const ;
0463
0464
0465     template<typename T> T    interpHD(T u, unsigned hd_factor, INT item=-1 ) const ;
0466
0467     template<typename T> T    interp(INT iprop, T x) const ;           // deprecated signature for combined_interp
0468     template<typename T> T    combined_interp_3(INT i,               T x) const ;  // requires NP::Combine of pshaped arrays
0469     template<typename T> T    combined_interp_5(INT i, INT j, INT k, T x) const ;  // requires NP::Combine of pshapes arrays
0470
0471     template<typename T> T    _combined_interp(const T* vv, INT niv, T x) const  ;
0472
0473     template<typename T> static T FractionalRange( T x, T x0, T x1 );
0474
0475
0476     template<typename T> NP*  cumsum(INT axis=0) const ;
0477     template<typename T> void divide_by_last() ;
0478     void fillIndexFlat();
0479     void dump(INT i0=-1, INT i1=-1, INT j0=-1, INT j1=-1) const ;
0480
0481     static std::string Brief(const NP* a);
0482     std::string sstr() const ;
0483     std::string desc() const ;
0484     std::string brief() const ;
0485
0486     template<typename T> std::string repr() const ;
0487
0488
0489     void set_meta( const std::vector<std::string>& lines, char delim='\n' );
0490     void get_meta( std::vector<std::string>& lines,       char delim='\n' ) const ;
0491
0492     void set_names( const std::vector<std::string>& lines ) ;
0493     void get_names( std::vector<std::string>& lines ) const ;
0494
0495     INT  get_name_index( const char* qname ) const ;
0496     INT  get_name_index( const char* qname, unsigned& count ) const ;
0497     static INT NameIndex( const char* qname, unsigned& count, const std::vector<std::string>& names );
0498
0499     bool is_named_shape() const ;
0500     template<typename T> T  get_named_value( const char* qname, T fallback ) const ;
0501
0502     bool has_meta() const ;
0503     static std::string               get_meta_string_(const char* metadata, const char* key);
0504     static std::string               get_meta_string( const std::string& meta, const char* key) ;
0505
0506     typedef std::vector<std::string> VS ;
0507     typedef std::vector<int64_t> VT ;
0508
0509
0510     static NP* MakeMetaKVProfileArray(const std::string& meta, const char* ptn=nullptr);
0511     static void GetMetaKV_( const char* metadata    , VS* keys, VS* vals, bool only_with_profile, const char* ptn=nullptr );
0512     static void GetMetaKV(  const std::string& meta , VS* keys, VS* vals, bool only_with_profile, const char* ptn=nullptr );
0513
0514     template<typename T> static T    GetMeta( const std::string& mt, const char* key, T fallback );
0515
0516     template<typename T> static T    get_meta_(const char* metadata, const char* key, T fallback=0) ;  // for T=std::string must set fallback to ""
0517     template<typename T> T    get_meta(const char* key, T fallback=0) const ;  // for T=std::string must set fallback to ""
0518
0519     template<typename T> static void SetMeta(       std::string& mt, const char* key, T value );
0520     template<typename T> void set_meta(const char* key, T value ) ;
0521
0522     template<typename T> void        set_meta_kv(                  const std::vector<std::pair<std::string, T>>& kvs );
0523     template<typename T> static void        SetMetaKV( std::string& meta, const std::vector<std::pair<std::string, T>>& kvs );
0524     template<typename T> static std::string    DescKV(                    const std::vector<std::pair<std::string, T>>& kvs );
0525
0526     static void SetMetaKV_( std::string& meta, const VS& keys, const VS& vals );
0527     void        setMetaKV_( const VS& keys, const VS& vals );
0528
0529
0530     std::string descMeta() const ;
0531
0532     static INT         GetFirstStampIndex_OLD(const std::vector<int64_t>& stamps, int64_t discount=200000 );  // 200k us, ie 0.2 s
0533
0534
0535
0536
0537     static NP* MakeMetaKVS_ranges( const std::string& meta_, const char* ranges_, std::ostream* ss=nullptr );
0538     static NP* MakeMetaKVS_ranges2(const std::string& meta_, const char* ranges_, std::ostream* ss=nullptr );
0539
0540     static void Resolve_ranges( std::vector<std::string>& specs, const std::vector<std::string>& keys, const char* ranges_, std::ostream* ss=nullptr );
0541     static void TimeOrder_ranges( std::vector<int>& spec_order, const std::vector<std::string>& specs, const std::vector<std::string>& keys, const std::vector<int64_t>& tt, std::ostream* ss=nullptr );
0542
0543     static NP*  MakeMetaKVS_ranges_table(
0544         const std::vector<int>& spec_order,
0545         const std::vector<std::string>& specs,
0546         const std::vector<std::string>& keys,
0547         const std::vector<int64_t>& tt,
0548         std::ostream* ss=nullptr ) ;
0549
0550     static NP* MakeMetaKVS_ranges2_table(
0551         const std::vector<std::string>& specs,
0552         const std::vector<std::string>& keys,
0553         const std::vector<int64_t>& tt,
0554         std::ostream* ss=nullptr ) ;
0555
0556     static NP* MakeMetaKVS_ranges(  const std::vector<std::string>& keys, const std::vector<int64_t>& tt, const char* ranges_, std::ostream* ss=nullptr );
0557     static NP* MakeMetaKVS_ranges2( const std::vector<std::string>& keys, const std::vector<int64_t>& tt, const char* ranges_, std::ostream* ss=nullptr );
0558
0559
0560
0561     static std::string DescMetaKVS_kvs(      const std::vector<std::string>& keys, const std::vector<std::string>& vals, const std::vector<int64_t>& tt );
0562     static std::string DescMetaKVS_juncture( const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* juncture_ );
0563     static std::string DescMetaKVS_ranges(   const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* ranges_ ) ;
0564     static std::string DescMetaKVS_ranges2(  const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* ranges_ ) ;
0565
0566     static std::string DescMetaKVS(const std::string& meta, const char* juncture = nullptr, const char* ranges=nullptr );
0567     std::string descMetaKVS(const char* juncture=nullptr, const char* ranges=nullptr) const ;
0568
0569
0570
0571
0572
0573
0574     static std::string DescMetaKV(const std::string& meta, const char* juncture = nullptr, const char* ranges=nullptr );
0575     std::string descMetaKV(const char* juncture=nullptr, const char* ranges=nullptr) const ;
0576
0577
0578     const char* get_lpath() const ;
0579
0580
0581     template<typename T> static INT DumpCompare( const NP* a, const NP* b, unsigned a_column, unsigned b_column, const T epsilon );
0582     static INT Memcmp( const NP* a, const NP* b );
0583     static bool SameData( const NP* a, const NP* b );
0584
0585     static NP* Concatenate(const char* dir, const std::vector<std::string>& names);
0586
0587     template<typename T>
0588     static NP* Concatenate(const std::vector<T*>& aa );  // template allows use with "NP" and "const NP"
0589
0590     static NP* Combine(const std::vector<const NP*>& aa, bool annotate=true, const NP* parasite=nullptr );
0591     template<typename... Args> static NP* Combine_(Args ... aa);  // Combine_ellipsis
0592
0593
0594     static bool Exists(const char* base, const char* rel, const char* name);
0595     static bool Exists(const char* dir, const char* name);
0596     static bool Exists(const char* path);
0597     static bool ExistsSidecar( const char* path, const char* ext );
0598
0599
0600     static const char NODATA_PREFIX = '@' ;
0601     static bool IsNoData(const char* path);
0602     static const char* PathWithNoDataPrefix(const char* path);
0603
0604
0605     int load(const char* path, const char* sli );
0606
0607     std::ifstream* load_header(const char* _path, const char* _sli);
0608
0609     static bool   HasChar( const char* buffer, size_t size, char q);
0610     static size_t FindChar(const char* buffer, size_t size, char q);
0611     static std::optional<size_t> FindChar_(const char* buffer, size_t size, char q);
0612
0613
0614     void load_data( std::ifstream* fp, const char* sli );
0615
0616     int load_from_buffer(const char* buffer, size_t size);
0617     size_t load_header_from_buffer(const char* buffer, size_t size);
0618     size_t load_data_from_buffer( const char* buffer, size_t size, size_t pos );
0619     size_t load_meta_from_buffer( const char* buffer, size_t size, size_t pos );
0620
0621
0622     void load_data_sliced( std::ifstream* fp, const char* sli );
0623     void load_data_where(  std::ifstream* fp, const char* _sli );
0624
0625
0626     int load_string_(  const char* path, const char* ext, std::string& str );
0627     int load_strings_( const char* path, const char* ext, std::vector<std::string>* vstr );
0628     int load_meta(  const char* path );
0629     int load_names( const char* path );
0630     int load_labels( const char* path );
0631
0632     void save_string_( const char* path, const char* ext, const std::string& str ) const ;
0633     void save_strings_(const char* path, const char* ext, const std::vector<std::string>& vstr ) const ;
0634     void save_meta( const char* path) const ;
0635     void save_names(const char* path) const ;
0636     void save_labels(const char* path) const ;
0637
0638     void save_header(const char* path);
0639     void old_save(const char* path) ;  // formerly the *save* methods could not be const because of update_headers
0640     void save(const char* path) const ;  // *save* methods now can be const due to dynamic creation of header
0641
0642     void save(const char* dir, const char* name) const ;
0643     void save(const char* dir, const char* reldir, const char* name) const ;
0644
0645     void save_jsonhdr(const char* path) const ;
0646     void save_jsonhdr(const char* dir, const char* name) const ;
0647
0648     std::string get_jsonhdr_path() const ; // .npy -> .npj on loaded path
0649     void save_jsonhdr() const ;
0650
0651     template<typename T> std::string _present(T v) const ;
0652     template<typename T> void _dump(INT i0=-1, INT i1=-1, INT j0=-1, INT j1=-1) const ;
0653
0654
0655     template<typename T> void read(const T* src);
0656     template<typename T> void read2(const T* src);
0657     void read_bytes(char* src);
0658     template<typename T> void write(T* dst) const ;
0659
0660
0661     template<typename T> static void Write(const char* dir, const char* name, const std::vector<T>& values );
0662     template<typename T> static void Write(const char* dir, const char* name, const T* data, INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 );
0663     template<typename T> static void Write(const char* dir, const char* reldir, const char* name, const T* data, INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 );
0664     template<typename T> static void Write(const char* path                 , const T* data, INT ni=-1, INT nj=-1, INT nk=-1, INT nl=-1, INT nm=-1, INT no=-1 );
0665
0666
0667     static void WriteNames(const char* dir, const char* name,                     const std::vector<std::string>& names, unsigned num_names=0, bool append=false );
0668     static void WriteNames(const char* dir, const char* reldir, const char* name, const std::vector<std::string>& names, unsigned num_names=0, bool append=false );
0669     static void WriteNames(const char* path,                                      const std::vector<std::string>& names, unsigned num_names=0, bool append=false );
0670
0671
0672     static void WriteNames_Simple( const char* dir, const char* name, const std::vector<std::string>& names );
0673     static void WriteNames_Simple( const char* path,                  const std::vector<std::string>& names );
0674
0675     static void WriteString(const char* dir, const char* name, const char* ext, const std::string& str, bool append=false );
0676
0677     static void ReadNames(const char* dir, const char* name, std::vector<std::string>& names ) ;
0678     static void ReadNames(const char* path,                  std::vector<std::string>& names ) ;
0679
0680
0681     template<typename T>
0682     static std::string DescKV(const std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extra);
0683
0684     template<typename T>
0685     static void ReadKV(const char* dir, const char* name,
0686                       std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extra=nullptr ) ;
0687
0688     template<typename T>
0689     static void ReadKV(const char* path,
0690                        std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extra=nullptr ) ;
0691
0692
0693     template<typename T>
0694     static T ReadKV_Value(const char* dir, const char* name, const char* key );
0695
0696     template<typename T>
0697     static T ReadKV_Value(const char* spec_or_path, const char* key );
0698
0699     template <typename T>
0700     static NP* LoadFromTxtFile(const char* path);
0701
0702     template <typename T>
0703     static NP* LoadFromTxtFile(const char* base, const char* relp);
0704
0705     // FindUnit returns last matching unit string, so more specific strings that contain earlier
0706     // ones should come later in list
0707     static constexpr const char* UNITS = "eV MeV nm mm cm m ns g/cm2/MeV" ;
0708     static char* FindUnit(const char* line, const std::vector<std::string>& units  );
0709     static void Split(std::vector<std::string>& elems, const char* str, char delim);
0710     static void GetUnits(std::vector<std::string>& units );
0711     static bool IsListed(const std::vector<std::string>& ls, const char* str);
0712     static std::string StringConcat(const std::vector<std::string>& ls, char delim=' ' );
0713
0714     template <typename T>
0715     static NP* ZEROProp(T dscale=1.);
0716
0717     template <typename T>
0718     static NP* LoadFromString(const char* str, const char* path_for_debug_messages=nullptr );
0719
0720     static unsigned CountChar(const char* str, char q );
0721     static void ReplaceCharInsitu(       char* str, char q, char n, bool first );
0722     static const char* ReplaceChar(const char* str, char q, char n, bool first );
0723
0724     static const char* Resolve( const char* spec) ;
0725     static const char* ResolveProp(const char* spec);
0726
0727     // END OF TAIL STATICS
0728
0729     // primary data members
0730     std::vector<char> data = {} ;
0731     std::vector<INT>  shape ;
0732     std::string       meta ;
0733     std::vector<std::string>  names ;
0734     std::vector<std::string>* labels ;
0735
0736     // non-persisted transients, set on loading
0737     std::string lpath ;
0738     std::string lfold ;
0739
0740     // headers used for transport
0741     std::string _hdr ;
0742     std::string _prefix ;
0743
0744     // results from parsing _hdr or set_dtype
0745     const char* dtype ;
0746     char        uifc ;    // element type code
0747     INT         ebyte ;   // element bytes
0748     INT         size ;    // number of elements from shape
0749     size_t      position ;
0750
0751
0752     // nodata:true used for lightweight access to metadata from many arrays
0753     bool        nodata ;
0754
0755
0756 };
0757
0758
0759 //  SPECIALIZED MEMBER FUNCTIONS
0760
0761
0762 template<typename T> inline const T*  NP::cvalues() const { return (T*)data.data() ;  }
0763 template<typename T> inline T*        NP::values() { return (T*)data.data() ;  }
0764
0765 template<typename T> inline void NP::fill(T value)
0766 {
0767     T* vv = values<T>();
0768     for(INT i=0 ; i < size ; i++) *(vv+i) = value ;
0769 }
0770
0771 template<typename T> inline void NP::_fillIndexFlat(T offset)
0772 {
0773     T* vv = values<T>();
0774     for(INT i=0 ; i < size ; i++) *(vv+i) = T(i) + offset ;
0775 }
0776
0777
0778 /**
0779 BLOCK OF TEMPLATE SPECIALIZATIONS cvalues, values, _fillIndexFlat
0780 -------------------------------------------------------------------
0781
0782 specialize-types(){ cat << EOT
0783 float
0784 double
0785 char
0786 short
0787 int
0788 long
0789 long long
0790 unsigned char
0791 unsigned short
0792 unsigned int
0793 unsigned long
0794 unsigned long long
0795 EOT
0796 }
0797
0798 specialize-(){
0799     cat << EOC | perl -pe "s,T,$1,g" -
0800 template<> inline const T* NP::values<T>() const { return (T*)data.data() ; }
0801 template<> inline       T* NP::values<T>()      {  return (T*)data.data() ; }
0802 template   void NP::_fillIndexFlat<T>(T) ;
0803
0804 EOC
0805 }
0806 specialize(){ specialize-types | while read t ; do specialize- "$t" ; done  ; }
0807 specialize
0808
0809 **/
0810
0811 // template specializations generated by above bash function
0812
0813 template<>  inline const float* NP::cvalues<float>() const { return (float*)data.data() ; }
0814 template<>  inline       float* NP::values<float>()      {  return (float*)data.data() ; }
0815 template    void NP::_fillIndexFlat<float>(float) ;
0816
0817 template<> inline const double* NP::cvalues<double>() const { return (double*)data.data() ; }
0818 template<> inline       double* NP::values<double>()      {  return (double*)data.data() ; }
0819 template   void NP::_fillIndexFlat<double>(double) ;
0820
0821 template<> inline const char* NP::cvalues<char>() const { return (char*)data.data() ; }
0822 template<> inline       char* NP::values<char>()      {  return (char*)data.data() ; }
0823 template   void NP::_fillIndexFlat<char>(char) ;
0824
0825 template<> inline const short* NP::cvalues<short>() const { return (short*)data.data() ; }
0826 template<> inline       short* NP::values<short>()      {  return (short*)data.data() ; }
0827 template   void NP::_fillIndexFlat<short>(short) ;
0828
0829 template<> inline const int* NP::cvalues<int>() const { return (int*)data.data() ; }
0830 template<> inline       int* NP::values<int>()      {  return (int*)data.data() ; }
0831 template   void NP::_fillIndexFlat<int>(int) ;
0832
0833 template<> inline const long* NP::cvalues<long>() const { return (long*)data.data() ; }
0834 template<> inline       long* NP::values<long>()      {  return (long*)data.data() ; }
0835 template   void NP::_fillIndexFlat<long>(long) ;
0836
0837 template<> inline const long long* NP::cvalues<long long>() const { return (long long*)data.data() ; }
0838 template<> inline       long long* NP::values<long long>()      {  return (long long*)data.data() ; }
0839 template   void NP::_fillIndexFlat<long long>(long long) ;
0840
0841 template<> inline const unsigned char* NP::cvalues<unsigned char>() const { return (unsigned char*)data.data() ; }
0842 template<> inline       unsigned char* NP::values<unsigned char>()      {  return (unsigned char*)data.data() ; }
0843 template   void NP::_fillIndexFlat<unsigned char>(unsigned char) ;
0844
0845 template<> inline const unsigned short* NP::cvalues<unsigned short>() const { return (unsigned short*)data.data() ; }
0846 template<> inline       unsigned short* NP::values<unsigned short>()      {  return (unsigned short*)data.data() ; }
0847 template   void NP::_fillIndexFlat<unsigned short>(unsigned short) ;
0848
0849 template<> inline const unsigned int* NP::cvalues<unsigned int>() const { return (unsigned int*)data.data() ; }
0850 template<> inline       unsigned int* NP::values<unsigned int>()      {  return (unsigned int*)data.data() ; }
0851 template   void NP::_fillIndexFlat<unsigned int>(unsigned int) ;
0852
0853 template<> inline const unsigned long* NP::cvalues<unsigned long>() const { return (unsigned long*)data.data() ; }
0854 template<> inline       unsigned long* NP::values<unsigned long>()      {  return (unsigned long*)data.data() ; }
0855 template   void NP::_fillIndexFlat<unsigned long>(unsigned long) ;
0856
0857 template<> inline const unsigned long long* NP::cvalues<unsigned long long>() const { return (unsigned long long*)data.data() ; }
0858 template<> inline       unsigned long long* NP::values<unsigned long long>()      {  return (unsigned long long*)data.data() ; }
0859 template   void NP::_fillIndexFlat<unsigned long long>(unsigned long long) ;
0860
0861
0862 // STATIC CREATION METHODS
0863
0864 template<typename T>
0865 inline NP* NP::MakeFromValues( const T* vals, INT num_vals )
0866 {
0867     NP* a = NP::Make<T>(num_vals) ;
0868     T* aa = a->values<T>();
0869     for(INT i=0 ; i < num_vals ; i++) aa[i] = vals[i] ;
0870     return a ;
0871 }
0872
0873 template <typename T>
0874 inline NP::INT NP::ALength(T x0, T x1, T dx) // static
0875 {
0876     T x = x0 ;
0877     INT n = 0 ;
0878     while( x < x1 )  // "<=" OR "<" ?  Follow np.arange
0879     {
0880        x += dx ;
0881        n++ ;
0882     }
0883     return n ;
0884 }
0885
0886 /**
0887 NP::ARange
0888 -------------
0889
0890 This follows NumPy np.arange in not giving end values.
0891 If you want to hit an end value use NP::Linspace.
0892
0893 ::
0894
0895     In [6]: a = np.arange(10,100,10,dtype=np.float32) ; a
0896     Out[6]: array([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=float32)
0897
0898     In [7]: a.shape
0899     Out[7]: (9,)
0900
0901 **/
0902
0903
0904 template<typename T>
0905 inline NP* NP::ARange( T x0, T x1, T dx ) // static
0906 {
0907     assert( x1 > x0 );
0908     assert( dx > 0. ) ;
0909     INT ni = ALength(x0,x1,dx) ;
0910     NP* a = NP::Make<T>(ni) ;
0911     T* aa = a->values<T>() ;
0912     for(INT i=0 ; i < ni ; i++ ) aa[i] = x0 + T(i)*dx ;
0913     return a ;
0914 }
0915
0916 /**
0917 NP::ARange_FromString
0918 ----------------------
0919
0920 Spec examples::
0921
0922    [20000:45600]
0923    [20000:45600:10]
0924
0925 **/
0926
0927
0928 template<typename T>
0929 inline NP* NP::ARange_FromString( const char* spec ) // static
0930 {
0931     NP_slice<T> sli = {} ;
0932     sli.start = 0 ;
0933     sli.stop  = 0 ;
0934     sli.step  = 1 ;
0935
0936     int rc = ParseSliceIndexString<T>( sli.start, sli.stop, sli.step, spec );
0937     bool valid = rc == 0 && sli.stop > 0 ;
0938
0939     if(!valid) std::cerr
0940         << "NP::ARange_FromString spec{" << ( spec ? spec : "-" ) << "}\n"
0941         << " valid " << ( valid ? "YES" : "NO " )
0942         << " ParseSliceIndexString.rc [" << rc << "]\n"
0943         << " sli.desc  " << sli.desc() << "\n"
0944         << " sli.stop == 0 " << ( sli.stop == 0 ? "YES" : "NO " ) << "\n"
0945         << " ERROR FAILED TO PARSE OR SLICE HAS ZERO STOP\n"
0946         ;
0947
0948     if(!valid) return nullptr ;
0949     return ARange_<T>(sli.start, sli.stop, sli.step);
0950 }
0951
0952
0953 /**
0954 NP::ARange_
0955 --------------
0956
0957 step>0
0958    like np.arange with step increment
0959
0960 step<0
0961    like np.linspace with int(-step) values between start and stop inclusive
0962
0963 **/
0964
0965 template<typename T>
0966 inline NP* NP::ARange_(T start, T stop, T step) // static
0967 {
0968     NP_slice<T> sli = { start, stop, step };
0969     INT num = sli.count();
0970
0971     NP* a = NP::Make<T>(num);
0972     T* aa = a->values<T>();
0973
0974     if( step > 0 )
0975     {
0976         // arange
0977         INT count = 0 ;
0978         for(T v=start ; v < stop ; v += step )
0979         {
0980             aa[count] = v ;
0981             count += 1;
0982         }
0983         assert( count == num );
0984     }
0985     else
0986     {
0987         // linspace
0988         INT ni = -step ;
0989         for(INT i=0 ; i < ni ; i++) aa[i] = start + (stop-start)*T(i)/T(ni-1) ;
0990     }
0991     return a ;
0992 }
0993
0994
0995 template <typename T>
0996 inline NP* NP::Linspace( T x0, T x1, unsigned nx, INT npayload )  // static
0997 {
0998     assert( x1 > x0 );
0999     assert( nx > 0 ) ;
1000     NP* a = NP::Make<T>(nx, npayload );  // npayload default is -1
1001
1002     if( nx == 1 )
1003     {
1004         a->set<T>(x0, 0 );
1005     }
1006     else
1007     {
1008         for(unsigned i=0 ; i < nx ; i++) a->set<T>( x0 + (x1-x0)*T(i)/T(nx-1), i )  ;
1009     }
1010     return a ;
1011 }
1012
1013
1014 /**
1015 NP::DeltaColumn
1016 ------------------
1017
1018 * for input array *a* of shape (ni,nj) returns array *b* of the same shape
1019   with all columns subtracted from the *jcol* column which default to zero for first column,
1020   this is useful to convert epoch-relative-timestamps to first-timestamp-within-each-event-relative-timestamps
1021
1022 ::
1023
1024     In [6]: ab.a.stamps.shape
1025     Out[6]: (10, 13)
1026
1027     In [7]: delta_stamps = ab.a.stamps - ab.a.stamps[:,0, np.newaxis]  ; delta_stamps
1028     Out[7]:
1029     array([[    0,   209,   223,   265,   265,   489,   505,   522,   723,  2097,  2097, 63816, 63919],
1030            [    0,   231,   244,   284,   284,   285,   368,   394,   590,   633,   633, 57248, 57356],
1031            [    0,   233,   245,   285,   285,   286,   351,   380,   638,   681,   681, 57402, 57480],
1032            [    0,   133,   170,   173,   173,   175,   259,   305,   844, 30887, 30888, 60904, 60961],
1033            [    0,   187,   226,   229,   230,   232,   396,   471,  1188, 33499, 33500, 63340, 63406],
1034            [    0,   170,   210,   214,   215,   217,   294,   328,   634, 31164, 31164, 60558, 60630],
1035            [    0,   131,   171,   174,   175,   177,   237,   273,   570, 32739, 32740, 62156, 62219],
1036            [    0,   136,   175,   179,   179,   181,   242,   292,   827, 32244, 32244, 62329, 62389],
1037            [    0,   135,   175,   179,   179,   181,   247,   281,   597, 32904, 32904, 62951, 63012],
1038            [    0,   132,   170,   174,   175,   177,   237,   271,   565, 32285, 32285, 62043, 62105]])
1039
1040     In [8]: delta_stamps.shape
1041     Out[8]: (10, 13)
1042
1043 **/
1044
1045 template<typename T> inline NP* NP::DeltaColumn(const NP* a, INT jcol )
1046 {
1047     assert( a->shape.size() == 2 );
1048     INT ni = a->shape[0] ;
1049     INT nj = a->shape[1] ;
1050     assert( jcol < nj );
1051
1052     NP* b = NP::MakeLike(a) ;
1053
1054     const T* aa = a->cvalues<T>();
1055     T* bb = b->values<T>();
1056
1057     for(INT i=0 ; i < ni ; i++)
1058     for(INT j=0 ; j < nj ; j++)
1059     bb[i*nj+j] = aa[i*nj+j] - aa[i*nj+jcol] ;
1060
1061     return b ;
1062 }
1063
1064 /**
1065 NP::ThetaRadians
1066 -------------------
1067
1068 Angle range from zero to theta_max_pi
1069
1070 **/
1071
1072 template<typename T> inline NP* NP::ThetaRadians(INT nx, T theta_max_pi ) // static
1073 {
1074     NP* a = NP::Make<T>(nx);
1075     T* aa = a->values<T>();
1076     for(INT i=0 ; i < nx ; i++)
1077     {
1078         T frac = nx == 1 ? T(0) : T(i)/T(nx-1) ;
1079         aa[i] = theta_max_pi*M_PI*frac ;
1080     }
1081     return a ;
1082 }
1083
1084 /**
1085 NP::MinusCosThetaLinearAngle
1086 ------------------------------
1087
1088 Returns array of nx values from -1 to 1 whwre the
1089 spacing is calculated to make the steps linear
1090 in the angle. For example with nx=181 the -cos(theta)
1091 values will be provided at integer degrees from 0. to 180.
1092
1093 **/
1094
1095 template<typename T> inline NP* NP::MinusCosThetaLinearAngle(INT nx) // static
1096 {
1097     NP* a = NP::Make<T>(nx);
1098     T* aa = a->values<T>();
1099     for(INT i=0 ; i < nx ; i++)
1100     {
1101         T frac = nx == 1 ? T(0) : T(i)/T(nx-1) ;
1102         T theta = frac*M_PI ;
1103         aa[i] = -cos(theta) ;
1104     }
1105     return a ;
1106 }
1107
1108 inline NP* NP::SqrtOneMinusSquare( const NP* a ) // static
1109 {
1110     assert( a->uifc == 'f' );
1111     assert( a->ebyte == 4 || a->ebyte == 8  );
1112     assert( a->shape.size() == 1 );
1113     INT num = a->shape[0] ;
1114
1115     NP* b = NP::MakeLike(a);
1116     assert( b->ebyte == a->ebyte );
1117
1118     if( a->ebyte == 8 )
1119     {
1120         const double* aa = a->cvalues<double>();
1121         double* bb = b->values<double>();
1122         for(INT i=0 ; i < num ; i++ ) bb[i] = sqrt(1.  - aa[i]*aa[i]) ;
1123     }
1124     else if( a->ebyte == 4 )
1125     {
1126         const float* aa = a->cvalues<float>();
1127         float* bb = b->values<float>();
1128         for(INT i=0 ; i < num ; i++ ) bb[i] = sqrt(1.f - aa[i]*aa[i]) ;
1129     }
1130     return b ;
1131 }
1132
1133
1134 inline NP* NP::Cos( const NP* a ) // static
1135 {
1136     assert( a->uifc == 'f' );
1137     assert( a->ebyte == 4 || a->ebyte == 8  );
1138     assert( a->shape.size() == 1 );
1139     INT ni = a->shape[0] ;
1140     NP* b = NP::MakeLike(a);
1141     for(INT i=0 ; i < ni ; i++ )
1142     {
1143         int idx = i ;
1144         if( a->ebyte == 8 )
1145         {
1146             const double* aa = a->cvalues<double>();
1147             double* bb = b->values<double>();
1148             bb[idx] = std::cos(aa[idx]) ;
1149         }
1150         else if ( a->ebyte == 4 )
1151         {
1152             const float* aa = a->cvalues<float>();
1153             float* bb = b->values<float>();
1154             bb[idx] = std::cos(aa[idx]) ;
1155         }
1156     }
1157     return b ;
1158
1159 }
1160
1161 inline NP* NP::MakeWithCosineDomain( const NP* a, bool reverse ) // static
1162 {
1163     assert( a->uifc == 'f' );
1164     assert( a->ebyte == 4 || a->ebyte == 8  );
1165     assert( a->shape.size() == 2 );
1166     INT ni = a->shape[0] ;
1167     INT nj = a->shape[1] ;
1168     assert( nj == 2 );
1169
1170     NP* b = NP::MakeLike(a);
1171     assert( b->ebyte == a->ebyte );
1172
1173     for(INT i=0 ; i < ni ; i++ )
1174     {
1175         INT a_item = i ;
1176         INT b_item = reverse ? ni - 1 - i : i ;
1177
1178         for(INT j=0 ; j < nj ; j++ )
1179         {
1180             int a_idx = a_item*nj + j ;
1181             int b_idx = b_item*nj + j ;  ;
1182
1183             if( a->ebyte == 8 )
1184             {
1185                 const double* aa = a->cvalues<double>();
1186                 double* bb = b->values<double>();
1187                 bb[b_idx] = j == 0 ? std::cos(aa[a_idx]) : aa[a_idx] ;
1188             }
1189             else if ( a->ebyte == 4 )
1190             {
1191                 const float* aa = a->cvalues<float>();
1192                 float* bb = b->values<float>();
1193                 bb[b_idx] = j == 0 ? std::cos(aa[a_idx]) : aa[a_idx] ;
1194             }
1195         }
1196     }
1197
1198     return b ;
1199 }
1200
1201
1202
1203
1204
1205 inline NP* NP::Incremented( const NP* a, INT offset ) // static
1206 {
1207     assert( a->uifc == 'i' );
1208     assert( a->ebyte == 4 || a->ebyte == 8  );
1209     INT num = a->num_values() ;  // all dimensions
1210
1211     NP* b = NP::MakeLike(a);
1212
1213     if( a->ebyte == 8 )
1214     {
1215         const long* aa = a->cvalues<long>();
1216         long* bb = b->values<long>();
1217         for(INT i=0 ; i < num ; i++ ) bb[i] = aa[i] + long(offset) ;
1218     }
1219     else if( a->ebyte == 4 )
1220     {
1221         const int* aa = a->cvalues<int>();
1222         int* bb = b->values<int>();
1223         for(INT i=0 ; i < num ; i++ ) bb[i] = aa[i] + offset ;
1224     }
1225     return b ;
1226 }
1227
1228
1229 /**
1230 NP::MakeDiv
1231 -------------
1232
1233 When applied to a 1d array the contents are assummed to be domain edges
1234 that are divided by an integer multiple *mul*. For a src array of length ni
1235 the output array length is::
1236
1237     (ni - 1)*mul + 1
1238
1239 When applied to a 2d array the contents are assumed to be (ni,2) with
1240 (domain,value) pairs. The domain is divided as in the 1d case and values
1241 are filled in via linear interpolation.
1242
1243 For example,
1244
1245 * mul=1 -> ni
1246 * mul=2 -> (ni-1)*2+1 = 2*ni-1
1247 * mul=3 -> (ni-1)*3+1 = 3*ni-2
1248
1249 That is easier to understand in terms of the number of bins:
1250
1251 * mul=1   ni-1 -> 1*(ni-1)
1252 * mul=2   ni-1 -> 2*(ni-1)
1253 * mul=3   ni-1 -> 3*(ni-1)
1254
1255 Avoids repeating the top sub-edge of one bin that is the same as the first sub-edge
1256 of the next bin by skipping the last sub-edge unless it is from the last bin.
1257
1258
1259          +-----------------+     2 values, 1 bin    (mul 1)
1260
1261          +--------+--------+     3 values, 2 bins   (mul 2)
1262
1263          +----+---+---+----+     5 values, 4 bins   (mul 4)
1264
1265          +--+-+-+-+-+-+--+-+     9 values, 8 bins   (mul 8)
1266
1267 **/
1268
1269
1270 template <typename T>
1271 inline NP* NP::MakeDiv( const NP* src, unsigned mul  )
1272 {
1273     assert( mul > 0 );
1274     unsigned ndim = src->shape.size();
1275     assert( ndim == 1 || ndim == 2 );
1276
1277     unsigned src_ni = src->shape[0] ;
1278     unsigned src_bins = src_ni - 1 ;
1279     unsigned dst_bins = src_bins*mul ;
1280
1281     INT dst_ni = dst_bins + 1 ;
1282     INT dst_nj = ndim == 2 ? src->shape[1] : -1 ;
1283
1284 #ifdef DEBUG
1285     std::cout
1286         << " mul " << std::setw(3) << mul
1287         << " src_ni " << std::setw(3) << src_ni
1288         << " src_bins " << std::setw(3) << src_bins
1289         << " dst_bins " << std::setw(3) << dst_bins
1290         << " dst_ni " << std::setw(3) << dst_ni
1291         << " dst_nj " << std::setw(3) << dst_nj
1292         << std::endl
1293         ;
1294 #endif
1295
1296     NP* dst = NP::Make<T>( dst_ni, dst_nj );
1297     T* dst_v = dst->values<T>();
1298
1299     for(unsigned i=0 ; i < src_ni - 1 ; i++)
1300     {
1301         bool first_i = i == 0 ;
1302         const T s0 = src->get<T>(i,0) ;
1303         const T s1 = src->get<T>(i+1,0) ;
1304
1305 #ifdef DEBUG
1306         std::cout
1307             << " i " << std::setw(3) << i
1308             << " first_i " << std::setw(1) << first_i
1309             << " s0 " << std::setw(10) << std::fixed << std::setprecision(4) << s0
1310             << " s1 " << std::setw(10) << std::fixed << std::setprecision(4) << s1
1311             << std::endl
1312             ;
1313 #endif
1314         for(unsigned s=0 ; s < 1+mul ; s++) // s=0,1,2,... mul
1315         {
1316             bool first_s = s == 0 ;
1317             if( first_s && !first_i ) continue ;  // avoid repeating idx from bin to bin
1318
1319             const T frac = T(s)/T(mul) ;    //  frac(s=0)=0  frac(s=mul)=1
1320             const T ss = s0 + (s1 - s0)*frac ;
1321             unsigned idx = i*mul + s ;
1322
1323 #ifdef DEBUG
1324             std::cout
1325                 << " s " << std::setw(3) << s
1326                 << " first_s " << std::setw(1) << first_s
1327                 << " idx " << std::setw(3) << idx
1328                 << " ss " << std::setw(10) << std::fixed << std::setprecision(4) << ss
1329                 << std::endl
1330                 ;
1331 #endif
1332
1333             assert( idx < dst_ni );
1334
1335             if( dst_nj == -1 )
1336             {
1337                 dst_v[idx] = ss ;
1338             }
1339             else if( dst_nj == 2 )
1340             {
1341                 dst_v[2*idx+0] = ss ;
1342                 dst_v[2*idx+1] = src->interp<T>(ss) ;
1343             }
1344         }
1345     }
1346     return dst ;
1347 }
1348
1349
1350
1351 template <typename T> NP* NP::Make( INT ni_, INT nj_, INT nk_, INT nl_, INT nm_, INT no_ ) // static
1352 {
1353     std::string dtype = descr_<T>::dtype() ;
1354     NP* a = new NP(dtype.c_str(), ni_,nj_,nk_,nl_,nm_, no_) ;
1355     return a ;
1356 }
1357
1358 template<typename T, typename ... Args> NP*  NP::Make_( Args ... shape_ )   // Make_shape static
1359 {
1360     std::string dtype = descr_<T>::dtype() ;
1361     std::vector<INT> shape = {shape_ ...};
1362     NP* a = new NP(dtype.c_str(), shape ) ;
1363     return a ;
1364 }
1365
1366 template<typename T> NP* NP::MakeFlat(INT ni, INT nj, INT nk, INT nl, INT nm, INT no ) // static
1367 {
1368     NP* a = NP::Make<T>(ni, nj, nk, nl, nm, no );
1369     a->fillIndexFlat();
1370     return a ;
1371 }
1372
1373 inline std::string NP::HexDump(const std::string& str)  // static
1374 {
1375     return HexDump(str.data(), str.size());
1376 }
1377
1378 inline std::string NP::HexDump(const char* buffer, size_t size)  // static
1379 {
1380     std::stringstream ss ;
1381     for (size_t i = 0; i < size; i += 16)
1382     {
1383         ss << std::hex << std::setfill('0') << std::setw(8) << i << ": ";
1384
1385         for (size_t j = 0; j < 16; ++j)
1386         {
1387             if (i + j < size)
1388             {
1389                 ss << std::hex << std::setw(2) << (unsigned int)(unsigned char)buffer[i + j] ;
1390                 if( (j + 1) % 2 == 0 ) ss << " " ;
1391             }
1392             else
1393             {
1394                 ss << "   "; // Pad for alignment
1395             }
1396         }
1397
1398         ss << " ";
1399         for (size_t j = 0; j < 16 && i + j < size; ++j)
1400         {
1401             char c = buffer[i + j];
1402             ss << (std::isprint(c) ? c : '.');
1403         }
1404         ss << std::endl;
1405     }
1406     return ss.str();
1407 }
1408
1409
1410
1411 /**
1412 NP::ReadToBufferCallback
1413 --------------------------
1414
1415 Need to handle any size and nitems the
1416 caller throws at us. That means the
1417 number of bytes read could be as small
1418 as 1 byte requiring multiple calls to
1419 read even the header.
1420
1421 Note it is not appropriate to while loop
1422 in the callback, as need to read precisely
1423 the number of bytes instructed until run out
1424 of header+data to provide at which point must
1425 return zero.
1426
1427 So are relying on the caller to invoke this
1428 repeatedly until this returns zero.
1429
1430 CAUTION : SOME OF THE READS WILL CROSS BETWEEN
1431 HEADER AND DATA
1432
1433 Prior to calling this, do::
1434
1435     arr->update_headers();  // in addition to updating headers this zeros position
1436
1437
1438
1439     +------------+       +
1440     |   hdr      |       |
1441     +------------+       |
1442     |            |       +
1443     |   data     |
1444     |            |
1445     |            |
1446     +------------+
1447     |   meta     |
1448     +------------+
1449
1450
1451
1452 Reads the NP array serialized bytes into the buffer via multiple calls to this callback,
1453 so it could be serialized byte-by-byte if size*nitems = 1.
1454 Progress from call to call is stored in arr->position
1455
1456 **/
1457
1458 inline size_t NP::ReadToBufferCallback(char* buffer, size_t size, size_t nitems, void* arg ) // static
1459 {
1460     char* dest = buffer ;
1461     size_t max_read = size*nitems ;
1462     size_t remaining = max_read ;
1463
1464     NP* arr = (NP*)arg ; // established array being serialized to the buffer
1465
1466     size_t hdr_size = arr->uhdr_bytes() ;
1467     size_t data_size = arr->uarr_bytes();
1468     size_t meta_size = arr->umeta_bytes();
1469     size_t total_copy = 0 ;
1470
1471     bool dump = false ;
1472
1473     if(dump) std::cout
1474          << "[NP::ReadToBufferCallback"
1475          << " arr.sstr " << ( arr ? arr->sstr() : "-" )
1476          << " arr.position " << ( arr ? arr->position : 0 )
1477          << " hdr_size " << hdr_size
1478          << " data_size " << data_size
1479          << " meta_size " << meta_size
1480          << " max_read " << max_read
1481          << "\n"
1482          ;
1483
1484     bool reading_hdr = arr->position < hdr_size ;
1485     size_t hdr_copy = 0 ;
1486     if( reading_hdr )
1487     {
1488         size_t hdr_offset = arr->position ;
1489         size_t hdr_left   = hdr_size - hdr_offset ;
1490         hdr_copy   = remaining > hdr_left ? hdr_left : remaining ;
1491
1492         if( hdr_copy > 0 )
1493         {
1494             memcpy( dest, arr->_hdr.data() + hdr_offset, hdr_copy );
1495             arr->position    += hdr_copy ;
1496             total_copy       += hdr_copy ;
1497             dest             += hdr_copy ;  // move target
1498             remaining        -= hdr_copy ;
1499         }
1500
1501         if(dump) std::cout
1502              << "-NP::ReadToBufferCallback.reading_hdr"
1503              << " arr.position " << arr->position
1504              << " hdr_offset " << hdr_offset
1505              << " hdr_left " << hdr_left
1506              << " hdr_copy " << hdr_copy
1507              << " total_copy " << total_copy
1508              << " remaining " << remaining
1509              << "\n"
1510              ;
1511     }
1512     if( remaining == 0 ) return total_copy ;
1513
1514
1515     bool reading_data = arr->position >= hdr_size && arr->position < hdr_size + data_size ;
1516     size_t data_copy = 0 ;
1517     if( reading_data )
1518     {
1519         size_t data_offset = arr->position - hdr_size ;
1520         size_t data_left   = data_size - data_offset  ;
1521         data_copy   = remaining > data_left ? data_left : remaining ;
1522
1523         if( data_copy > 0 )
1524         {
1525             memcpy( dest, arr->bytes() + data_offset, data_copy );
1526             arr->position += data_copy ;
1527             total_copy    += data_copy ;
1528             dest          += data_copy ;
1529             remaining     -= data_copy ;
1530         }
1531
1532         if(dump) std::cout
1533              << "-NP::ReadToBufferCallback.reading_data"
1534              << " arr.position " << arr->position
1535              << " data_offset " << data_offset
1536              << " data_left " << data_left
1537              << " data_copy " << data_copy
1538              << " total_copy " << total_copy
1539              << " remaining " << remaining
1540              << "\n"
1541              ;
1542     }
1543     if( remaining == 0 ) return total_copy ;
1544
1545     bool reading_meta = arr->position >= hdr_size + data_size && arr->position < hdr_size + data_size + meta_size ;
1546     size_t meta_copy = 0 ;
1547     if( reading_meta )
1548     {
1549         size_t meta_offset = arr->position - hdr_size - data_size ;
1550         size_t meta_left   = meta_size - meta_offset ;
1551         meta_copy   = remaining > meta_left ? meta_left : remaining ;
1552
1553         if( meta_copy > 0)
1554         {
1555             memcpy( dest, arr->meta.data() + meta_offset, meta_copy );
1556             arr->position += meta_copy ;
1557             total_copy    += meta_copy ;
1558             dest          += meta_copy ;
1559             remaining     -= meta_copy ;
1560         }
1561         if(dump) std::cout
1562              << "-NP::ReadToBufferCallback.reading_meta"
1563              << " arr.position " << arr->position
1564              << " meta_offset " << meta_offset
1565              << " meta_left " << meta_left
1566              << " meta_copy " << meta_copy
1567              << " total_copy " << total_copy
1568              << " remaining " << remaining
1569              << "\n"
1570              ;
1571     }
1572
1573     if(dump) std::cout
1574          << "]NP::ReadToBufferCallback"
1575          << " reading_hdr " << reading_hdr
1576          << " reading_data " << reading_data
1577          << " reading_meta " << reading_meta
1578          << " hdr_copy " << hdr_copy
1579          << " data_copy " << data_copy
1580          << " meta_copy " << meta_copy
1581          << " total_copy " << total_copy
1582          << " remaining " << remaining
1583          << "\n"
1584          ;
1585
1586     return total_copy ;
1587 }
1588
1589
1590
1591 /**
1592 NP::serializeToBuffer
1593 ----------------------
1594
1595 Primary purpose of NP::serializeToBuffer is to test the NP::ReadToBufferCallback
1596 which is used by NP_CURL.h
1597
1598 The bytes written to buf should be exactly the same for
1599 any non-zero values of size and nitems.
1600
1601 **/
1602
1603 inline void NP::serializeToBuffer( std::vector<char>& buf, size_t size, size_t nitems )
1604 {
1605     update_headers();
1606
1607     size_t tot_bytes = serialize_bytes() ;  // hdr + data + meta
1608     buf.resize(tot_bytes);
1609
1610     char* buffer = buf.data();
1611     size_t read = 0 ;
1612     while(( read = ReadToBufferCallback(buffer, size, nitems, (void*)this ))) buffer += read ;
1613     size_t bytes_read = buffer - buf.data() ;
1614     bool expect_read = bytes_read == tot_bytes ;
1615
1616     if(1) std::cout
1617         << "NP::serializeToBuffer"
1618         << " size " << size
1619         << " nitems " << nitems
1620         << " size*nitems " << size*nitems
1621         << " tot_bytes " << tot_bytes
1622         << " buf.size " << buf.size()
1623         << " bytes_read " << bytes_read
1624         << " expect_read " << ( expect_read ? "YES" : "NO " )
1625         << "\n"
1626         ;
1627
1628     assert( expect_read );
1629 }
1630
1631 inline void NP::SaveBufferToFile(const std::vector<char>& buf, const char* path_ ) // static
1632 {
1633     const char* path = U::Resolve(path_);
1634     std::ofstream fp(path, std::ios::out|std::ios::binary);
1635     std::copy(buf.cbegin(), buf.cend(), std::ostreambuf_iterator<char>(fp));
1636 }
1637
1638
1639
1640 inline void NP::prepareForStreamIn()
1641 {
1642     _hdr = "" ; // scrub the placeholder default header, as use completed hdr for stream state transition
1643     position = 0 ;
1644     nodata = false ;
1645     lpath = "prepareForStream" ;
1646     lfold = "" ;
1647 }
1648
1649 /**
1650 NP::CreateFromBuffer
1651 ----------------------
1652
1653 The array created should be exactly the same for any non-zero values of size and nitems.
1654
1655 **/
1656
1657 inline NP* NP::CreateFromBuffer( const std::vector<char>& buf, size_t size, size_t nitems )
1658 {
1659     NP* arr = new NP ;
1660     arr->prepareForStreamIn();
1661
1662     bool dump = false ;
1663
1664     if(dump) std::cout
1665         << "[NP::CreateFromBuffer"
1666         << " buf.size " << buf.size()
1667         << " size " << size
1668         << " nitems " << nitems
1669         << " size*nitems " << size*nitems
1670         << "\n"
1671         ;
1672
1673     char* src0 = (char*)buf.data();
1674     char* src = src0 ;
1675     size_t write = 0 ;
1676     size_t bytes_write = src - src0 ;
1677
1678     while(( write = WriteToArrayCallback(src, size, nitems, (void*)arr )))
1679     {
1680         src += write ;
1681         bytes_write = src - src0 ;
1682
1683         if(dump) std::cout
1684             << "-NP::CreateFromBuffer"
1685             << " write " << write
1686             << " bytes_write " << bytes_write
1687             << "\n"
1688             ;
1689
1690         if(bytes_write == buf.size()) break ;   // seems no way to avoid this because meta is addon
1691     }
1692
1693     size_t tot_bytes = arr->serialize_bytes() ;
1694
1695     bool expect_write_0 = bytes_write == tot_bytes ;
1696     bool expect_write_1 = bytes_write == buf.size() ;
1697
1698     if(dump) std::cout
1699         << "]NP::CreateFromBuffer"
1700         << " buf.size " << buf.size()
1701         << " size " << size
1702         << " nitems " << nitems
1703         << " size*nitems " << size*nitems
1704         << " bytes_write " << bytes_write
1705         << " tot_bytes " << tot_bytes
1706         << " expect_write_0 " << ( expect_write_0 ? "YES" : "NO " )
1707         << " expect_write_1 " << ( expect_write_1 ? "YES" : "NO " )
1708         << "\n"
1709         ;
1710
1711     assert( expect_write_0 );
1712     assert( expect_write_1 );
1713
1714     return arr ;
1715 }
1716
1717
1718
1719 /**
1720 NP::WriteToArrayCallback
1721 -------------------------
1722
1723 This callback is called multiple times with non-zero size*nitems bytes
1724 which must be copied from the src into the array.
1725 This does something similar to NP::load_from_buffer
1726 but potentially it must operate byte-by-byte as the callback is
1727 repeatedly called.
1728
1729 The nascent array needs some setup before using this callback::
1730
1731     arr->prepareForStreamIn();
1732
1733
1734 Writes serialized bytes from src buffer directly into the nascent NP array
1735 instance via multiple calls to this callback, so the NP object is
1736 byte-by-byte reconstructed if size*nitems = 1.
1737 Progress from call to call is stored in arr->position
1738
1739 **/
1740
1741 inline size_t NP::WriteToArrayCallback(char* src, size_t size, size_t nitems, void* arg) // static
1742 {
1743     size_t max_write = size*nitems ;
1744     size_t remaining = max_write ;
1745     size_t total_copy = 0 ;
1746
1747     NP* arr = (NP*)arg ;  // nascent array that will be populated from the buffer
1748     std::string& _hdr = arr->_hdr ;
1749     bool hdr_complete = arr->hdr_complete();
1750     bool dump = false ;
1751
1752     if(dump) std::cout
1753         << "[NP::WriteToArrayCallback"
1754         << " max_write " << max_write
1755         << " arr.position " << arr->position
1756         << " hdr_complete " << ( hdr_complete ? "YES" : "NO " )
1757         << "\n"
1758         ;
1759
1760     size_t hdr_copy = 0 ;
1761     if(!hdr_complete)  // _hdr does not end with '\n' yet
1762     {
1763         char q = '\n' ;
1764
1765         bool has_newline = HasChar(src, max_write, q) ;
1766         hdr_copy = has_newline ? 1 + FindChar(src, max_write, q) : max_write ;  // 1 + includes '\n' into _hdr
1767
1768         /*
1769         size_t len0 = _hdr.length();
1770         _hdr.resize(len0 + hdr_copy );
1771         char* dst = (char*)_hdr.data() ;
1772         memcpy( dst + len0,  src, hdr_copy );
1773         */
1774         _hdr.append(src + total_copy, hdr_copy);
1775
1776         arr->position += hdr_copy ;
1777         total_copy    += hdr_copy ;
1778         remaining     -= hdr_copy ;
1779
1780
1781         if(!has_newline) return hdr_copy ;  // can do nothing more until hdr has completely arrived
1782
1783         assert( arr->hdr_complete() );        // _hdr must now end with '\n'
1784         bool data_resize = true ;
1785         arr->decode_header( data_resize );
1786     }
1787
1788     assert( arr->hdr_complete() );
1789
1790     // following decode_header know hdr and data size
1791     // but cannot know meta_size as not all bytes arrived yet
1792     size_t hdr_size = arr->uhdr_bytes() ;
1793     size_t data_size = arr->uarr_bytes();
1794
1795     bool reading_data = arr->position >= hdr_size && arr->position < hdr_size + data_size ;
1796     size_t data_copy = 0 ;
1797     if( reading_data )
1798     {
1799         size_t data_offset = arr->position - hdr_size ;
1800         size_t data_left   = data_offset < data_size ? data_size - data_offset : 0  ;
1801         data_copy   = remaining > data_left ? data_left : remaining ;
1802
1803         if( data_copy > 0 )
1804         {
1805             memcpy( arr->bytes() + data_offset, src + hdr_copy, data_copy );
1806             arr->position += data_copy ;
1807             total_copy    += data_copy ;
1808             remaining     -= data_copy ;
1809         }
1810
1811         if(dump) std::cout
1812              << "-NP::WriteToArrayCallback.reading_data"
1813              << " arr.position " << arr->position
1814              << " data_offset " << data_offset
1815              << " data_left " << data_left
1816              << " data_copy " << data_copy
1817              << " total_copy " << total_copy
1818              << " remaining " << remaining
1819              << "\n"
1820              ;
1821     }
1822     if( remaining == 0 ) return total_copy ;
1823
1824
1825     bool reading_meta = arr->position >= hdr_size + data_size ;  // dont know meta_size yet
1826     size_t meta_copy = 0 ;
1827     std::string& meta = arr->meta ;
1828     if( reading_meta )
1829     {
1830         meta_copy  = remaining ;   // everything after hdr and data assumed to be meta
1831
1832         if( meta_copy > 0)
1833         {
1834
1835             /*
1836             size_t len0 = meta.length();
1837             meta.resize( len0 + meta_copy );
1838
1839             char* dst = (char*)meta.data();
1840             memcpy( dst + len0, src + total_copy, meta_copy );
1841             */
1842             meta.append(src + total_copy, meta_copy);
1843
1844
1845             arr->position += meta_copy ;
1846             total_copy    += meta_copy ;
1847             remaining     -= meta_copy ;
1848         }
1849         if(dump) std::cout
1850              << "-NP::WriteToArrayCallback.reading_meta"
1851              << " arr.position " << arr->position
1852              << " meta_copy " << meta_copy
1853              << " total_copy " << total_copy
1854              << " remaining " << remaining
1855              << "\n"
1856              ;
1857     }
1858
1859     if(dump) std::cout
1860         << "]NP::WriteToArrayCallback"
1861         << " arr->position " << arr->position
1862         << " max_write " << max_write
1863         << " hdr_size " << hdr_size
1864         << " data_size " << data_size
1865         << " meta.size " << meta.size()
1866         << " total_copy " << total_copy
1867         << "\n"
1868         ;
1869
1870     return total_copy ;
1871 }
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881 //  MEMBER FUNCTIONS
1882
1883
1884 inline char*        NP::bytes() { return (char*)data.data() ;  }
1885 inline const char*  NP::bytes() const { return (char*)data.data() ;  }
1886
1887 inline bool     NP::hdr_complete() const { return hdr_lastchar() == '\n' ; }
1888 inline char     NP::hdr_lastchar() const { return _hdr.length() > 0 ? _hdr[_hdr.length() - 1] : '\0' ; }
1889 inline NP::INT  NP::hdr_bytes() const { return _hdr.length() ; }
1890 inline NP::UINT NP::uhdr_bytes() const { return _hdr.length() ; }
1891
1892 inline NP::INT NP::num_items() const { return shape[0] ;  }
1893 inline NP::INT NP::num_values() const { return NPS::size(shape) ;  }
1894 inline NP::INT NP::num_itemvalues() const { return NPS::itemsize(shape) ;  }
1895 inline NP::INT  NP::arr_bytes()  const {  return NPS::size(shape)*ebyte ; }
1896 inline NP::UINT NP::uarr_bytes()  const { return NPS::usize(shape)*UINT(ebyte) ; }
1897 inline NP::UINT NP::serialize_bytes()  const { return uhdr_bytes() + uarr_bytes() + umeta_bytes() ; }
1898
1899 inline NP::INT NP::item_bytes() const { return NPS::itemsize(shape)*ebyte ; }
1900 inline NP::INT NP::meta_bytes() const { return meta.length() ; }
1901 inline NP::UINT NP::umeta_bytes() const { return meta.length() ; }
1902
1903
1904 template<typename T>
1905 inline bool NP::is_itemtype() const  // size of item matches size of type
1906 {
1907     return item_bytes() == sizeof(T) ;
1908 }
1909
1910 /**
1911 NP::clear
1912 ----------
1913
1914 Note that std::vector::clear by itself does not deallocate
1915 the memory, it is necessary in addition to call std::vector::shrink_to_fit
1916 and even that is non-binding.
1917
1918 **/
1919
1920
1921 inline void NP::clear()
1922 {
1923     data.clear();
1924     data.shrink_to_fit();
1925     shape[0] = 0 ;
1926 }
1927
1928 /**
1929 NP::update_headers
1930 -------------------
1931
1932 Updates network header "prefix" and array header descriptions of the object.
1933
1934 Cannot do this automatically in setters that change shape, dtype or metadata
1935 because are using a struct.  So just have to invoke this before streaming.
1936
1937 HMM : do not like this as it prevents NP::save from being const
1938
1939 **/
1940
1941 inline void NP::update_headers()
1942 {
1943     std::string net_hdr = make_prefix();
1944     _prefix.assign(net_hdr.data(), net_hdr.length());
1945
1946     std::string hdr =  make_header();
1947     _hdr.resize(hdr.length());
1948     _hdr.assign(hdr.data(), hdr.length());
1949
1950     position = 0 ;  // used by streaming static  : ReadToBufferCallback
1951 }
1952
1953 inline std::string NP::make_header() const
1954 {
1955     std::string hdr =  NPU::_make_header( shape, dtype ) ;
1956     return hdr ;
1957 }
1958 inline std::string NP::make_prefix() const
1959 {
1960     std::vector<unsigned> parts ;
1961     parts.push_back(hdr_bytes());
1962     parts.push_back(arr_bytes());
1963     parts.push_back(meta_bytes());
1964     parts.push_back(0);    // xxd neater to have 16 byte prefix
1965
1966     std::string net_hdr = net_hdr::pack( parts );
1967     return net_hdr ;
1968 }
1969 inline std::string NP::make_jsonhdr() const
1970 {
1971     std::string json = NPU::_make_jsonhdr( shape, dtype ) ;
1972     return json ;
1973 }
1974
1975 /**
1976 NP::decode_header
1977 -----------------------
1978
1979 Array header _hdr is parsed setting the below and data is resized.
1980
1981 shape
1982     vector of INT
1983 uifc
1984     element type code
1985 ebyte
1986     element number of bytes
1987 size
1988     number of elements
1989
1990 Decoding the header gives the shape of the
1991 data, so together with the size of the type
1992 know how many bytes can read from the remainder of the stream
1993 following the header.
1994
1995 **/
1996
1997 inline bool NP::decode_header(bool data_resize)
1998 {
1999     shape.clear();
2000     std::string descr ;
2001     NPU::parse_header( shape, descr, uifc, ebyte, _hdr ) ;
2002     dtype = strdup(descr.c_str());
2003     size = NPS::size(shape);    // product of shape dimensions
2004     if(data_resize) data.resize(size*ebyte) ;   // data is now just char
2005     return true  ;
2006 }
2007
2008
2009
2010 /**
2011 NP::decode_prefix
2012 -------------------
2013
2014 This is used for boost asio handlers to resize the
2015 object buffers as directed by the sizes extracted
2016 from the prefix header. For example see::
2017
2018    np_client::handle_read_header
2019    np_session::handle_read_header
2020
2021 Note that this is not used when streaming in
2022 from file. There is no prefix header in that
2023 situation.
2024
2025 **/
2026 inline bool NP::decode_prefix()
2027 {
2028     unsigned hdr_bytes_nh = prefix_size(0);
2029     unsigned arr_bytes_nh = prefix_size(1);
2030     unsigned meta_bytes_nh = prefix_size(2);
2031
2032     if(VERBOSE) std::cout
2033         << "NP::decode_prefix"
2034         << " hdr_bytes_nh " << hdr_bytes_nh
2035         << " arr_bytes_nh " << arr_bytes_nh
2036         << " meta_bytes_nh " << meta_bytes_nh
2037         << std::endl
2038         ;
2039
2040     bool valid = hdr_bytes_nh > 0 ;
2041     if(valid)
2042     {
2043         _hdr.resize(hdr_bytes_nh);
2044         data.resize(arr_bytes_nh);   // data now vector of chars
2045         meta.resize(meta_bytes_nh);
2046     }
2047     return valid ;
2048 }
2049 inline unsigned NP::prefix_size(unsigned index) const { return net_hdr::unpack(_prefix, index); }
2050
2051
2052
2053
2054 // CTOR
2055 inline NP::NP(const char* dtype_, const std::vector<INT>& shape_ )
2056     :
2057     shape(shape_),
2058     labels(nullptr),
2059     dtype(strdup(dtype_)),
2060     uifc(NPU::_dtype_uifc(dtype)),
2061     ebyte(NPU::_dtype_ebyte(dtype)),
2062     size(NPS::size(shape)),
2063     position(0),
2064     nodata(false)
2065 {
2066     init();
2067 }
2068
2069 // DEFAULT CTOR
2070 inline NP::NP(const char* dtype_, INT ni, INT nj, INT nk, INT nl, INT nm, INT no )
2071     :
2072     labels(nullptr),
2073     dtype(strdup(dtype_)),
2074     uifc(NPU::_dtype_uifc(dtype)),
2075     ebyte(NPU::_dtype_ebyte(dtype)),
2076     size(NPS::set_shape(shape, ni,nj,nk,nl,nm,no )),
2077     position(0),
2078     nodata(false)
2079 {
2080     init();
2081 }
2082
2083 inline void NP::init()
2084 {
2085     unsigned long long size_ = size ;
2086     unsigned long long ebyte_ = ebyte ;
2087     unsigned long long num_char = size_*ebyte_ ;
2088
2089     if(VERBOSE) std::cout
2090         << "NP::init"
2091         << " size " << size
2092         << " ebyte " << ebyte
2093         << " num_char " << num_char
2094         << std::endl
2095         ;
2096
2097     data.resize( num_char ) ;  // vector of char
2098     std::fill( data.begin(), data.end(), 0 );
2099     _prefix.assign(net_hdr::LENGTH, '\0' );
2100     _hdr = make_header();
2101 }
2102
2103
2104
2105
2106 inline void NP::set_shape(INT ni, INT nj, INT nk, INT nl, INT nm, INT no)
2107 {
2108     size = NPS::copy_shape(shape, ni, nj, nk, nl, nm, no);
2109     init();
2110 }
2111 inline void NP::set_shape(const std::vector<INT>& src_shape)
2112 {
2113     size = NPS::copy_shape(shape, src_shape);
2114     init();
2115 }
2116 inline void NP::get_shape( std::vector<size_t>& sh ) const
2117 {
2118     size_t sz = NPS::copy_shape(sh, shape);
2119     assert( sz == size_t(size) );
2120 }
2121
2122 inline bool NP::has_shape(INT ni, INT nj, INT nk, INT nl, INT nm, INT no) const
2123 {
2124     unsigned ndim = shape.size() ;
2125     return
2126            ( ni == -1 || ( ndim > 0 && INT(shape[0]) == ni)) &&
2127            ( nj == -1 || ( ndim > 1 && INT(shape[1]) == nj)) &&
2128            ( nk == -1 || ( ndim > 2 && INT(shape[2]) == nk)) &&
2129            ( nl == -1 || ( ndim > 3 && INT(shape[3]) == nl)) &&
2130            ( nm == -1 || ( ndim > 4 && INT(shape[4]) == nm)) &&
2131            ( no == -1 || ( ndim > 5 && INT(shape[5]) == no))
2132            ;
2133 }
2134
2135
2136 /**
2137 NP::change_shape
2138 ------------------
2139
2140 One dimension can be -1 causing it to be filled automatically.
2141 See tests/NPchange_shapeTest.cc
2142
2143 **/
2144
2145 inline void NP::change_shape(INT ni, INT nj, INT nk, INT nl, INT nm, INT no)
2146 {
2147     INT size2 = NPS::change_shape(shape, ni, nj, nk, nl, nm, no);
2148     bool expect =  size == size2  ;
2149     if(!expect) std::raise(SIGINT) ;
2150     assert( size == size2 );
2151 }
2152
2153 /**
2154 NP::_change_shape_ni
2155 -----------------------
2156
2157 This is used during sliced loading
2158 to reduce the num_items to conform
2159 to the slicing.
2160
2161 **/
2162
2163 inline void NP::_change_shape_ni(INT ni, bool data_resize)
2164 {
2165     unsigned ndim = shape.size() ;
2166     assert( ndim > 0 );
2167
2168     if(!data_resize) // eg from NP::LoadSlice when the slice is larger than the array
2169     {
2170         assert( ni <= shape[0] );
2171     }
2172
2173     shape[0] = std::min( ni, shape[0] ) ;       // slicing can only keep the same or reduce
2174     size = NPS::size(shape);                    // product of shape dimensions
2175     if(data_resize) data.resize(size*ebyte) ;   // data is now just char
2176 }
2177
2178
2179
2180 inline void NP::change_shape_to_3D()
2181 {
2182     unsigned ndim = shape.size() ;
2183     if(VERBOSE) std::cerr << "NP::change_shape_to_3D sstr " << sstr() << std::endl ;
2184
2185     if( ndim < 3 )
2186     {
2187         std::cerr << "NP::change_shape_to_3D : ndim < 3 : must be 3 or more, not: " << ndim << std::endl ;
2188         assert(0);
2189     }
2190     else if( ndim == 3 )
2191     {
2192         if(VERBOSE) std::cerr << "NP::change_shape_to_3D : ndim == 3, no reshaping needed " << std::endl ;
2193     }
2194     else if( ndim > 3 )
2195     {
2196         if(VERBOSE) std::cerr << "NP::change_shape_to_3D : ndim > 3, reshaping needed, ndim: " << ndim  << std::endl ;
2197         INT ni = 1 ;
2198         for(INT i=0 ; i < INT(ndim) - 2 ; i++) ni *= shape[i] ;
2199         // scrunch up the higher dimensions
2200         change_shape(ni, shape[ndim-2], shape[ndim-1] );
2201         if(VERBOSE) std::cerr << "NP::change_shape_to_3D : changed shape to : " << sstr() << std::endl  ;
2202     }
2203 }
2204
2205 inline void NP::reshape( const std::vector<INT>& new_shape )
2206 {
2207     NPS::reshape(shape, new_shape);
2208 }
2209
2210 /**
2211 NP::size_2D
2212 -------------
2213
2214 Returns the conventional 2D (width, height) for payload last dimension P
2215 passed by template variable. For example with an array of shape::
2216
2217     (ni, nj, nk, nl, 4 )
2218
2219 A call to::
2220
2221    a->size_2D<4>( width, height)
2222
2223 Would return::
2224
2225    height = ni*nj*nk
2226    width = nl
2227
2228 NB the last dimension must match the template variable, 4 in the above example.
2229
2230 **/
2231
2232 template<int P>
2233 inline void NP::size_2D( INT& width, INT& height ) const
2234 {
2235     NPS::size_2D<P>(width, height, shape) ;
2236 }
2237
2238
2239 /**
2240 NP::set_dtype
2241 --------------
2242
2243 Setting a dtype with a different element size ebyte
2244 necessarily changes shape and size of array.
2245
2246 CAUTION this will cause asserts if the array shape is such
2247 that the dtype change and resulting shape change would
2248 change the total number of bytes in the array.
2249
2250 **/
2251
2252 inline void NP::set_dtype(const char* dtype_)
2253 {
2254     char uifc_ = NPU::_dtype_uifc(dtype_) ;
2255     INT  ebyte_ = NPU::_dtype_ebyte(dtype_) ;
2256     assert( ebyte_ == 1 || ebyte_ == 2 || ebyte_ == 4 || ebyte_ == 8 );
2257
2258     if(VERBOSE) std::cout
2259         << "changing dtype/uifc/ebyte from: "
2260         << dtype << "/" << uifc << "/" << ebyte
2261         << " to: "
2262         << dtype_ << "/" << uifc_ << "/" << ebyte_
2263         << std::endl
2264         ;
2265
2266     if( ebyte_ == ebyte )
2267     {
2268         std::cout << "NP::set_dtype : no change in ebyte keeps same array dimensions" << std::endl ;
2269     }
2270     else if( ebyte_ < ebyte )
2271     {
2272         INT expand = ebyte/ebyte_ ;
2273         std::cout << "NP::set_dtype : shifting to smaller ebyte increases array dimensions, expand: " << expand << std::endl ;
2274         for(unsigned i=0 ; i < shape.size() ; i++ ) shape[i] *= expand ;
2275     }
2276     else if( ebyte_ > ebyte )
2277     {
2278         INT shrink = ebyte_/ebyte ;
2279         std::cout << "NP::set_dtype : shifting to larger ebyte decreases array dimensions, shrink: " << shrink << std::endl ;
2280         for(unsigned i=0 ; i < shape.size() ; i++ ) shape[i] /= shrink  ;
2281     }
2282
2283     INT num_bytes  = size*ebyte ;      // old
2284     INT size_ = NPS::size(shape) ;     // new
2285     INT num_bytes_ = size_*ebyte_ ;    // new
2286
2287     bool allowed_change = num_bytes_ == num_bytes ;
2288     if(!allowed_change)
2289     {
2290         std::cout << "NP::set_dtype : NOT ALLOWED as it would change the number of bytes " << std::endl ;
2291         std::cout << " old num_bytes " << num_bytes << " proposed num_bytes_ " << num_bytes_ << std::endl ;
2292     }
2293     assert( allowed_change );
2294
2295     // change the members
2296
2297     dtype = strdup(dtype_);
2298     uifc  = uifc_ ;
2299     ebyte = ebyte_ ;
2300     size = size_ ;
2301
2302     std::cout << desc() << std::endl ;
2303 }
2304
2305 inline std::string NP::dtype_name() const
2306 {
2307     std::stringstream ss ;
2308     switch(uifc)
2309     {
2310         case 'u': ss << "uint"    ; break ;
2311         case 'i': ss << "int"     ; break ;
2312         case 'f': ss << "float"   ; break ;
2313         case 'c': ss << "complex" ; break ;
2314     }
2315     ss << ebyte*8 ;
2316     std::string str = ss.str();
2317     return str ;
2318 }
2319
2320
2321 /**
2322 NP::index
2323 -----------
2324
2325 Provides the flat value index from a set of integer dimension indices.
2326 Negative dimension indices are interpreted to count from the back, ie -1 is the last element
2327 in a dimension.
2328
2329 **/
2330
2331 inline NP::INT NP::index( INT i,  INT j,  INT k,  INT l, INT m, INT o ) const
2332 {
2333     INT nd = shape.size() ;
2334     INT ni = nd > 0 ? shape[0] : 1 ;
2335     INT nj = nd > 1 ? shape[1] : 1 ;
2336     INT nk = nd > 2 ? shape[2] : 1 ;
2337     INT nl = nd > 3 ? shape[3] : 1 ;
2338     INT nm = nd > 4 ? shape[4] : 1 ;
2339     INT no = nd > 5 ? shape[5] : 1 ;
2340
2341     INT ii = i < 0 ? ni + i : i ;
2342     INT jj = j < 0 ? nj + j : j ;
2343     INT kk = k < 0 ? nk + k : k ;
2344     INT ll = l < 0 ? nl + l : l ;
2345     INT mm = m < 0 ? nm + m : m ;
2346     INT oo = o < 0 ? no + o : o ;
2347
2348     return  ii*nj*nk*nl*nm*no + jj*nk*nl*nm*no + kk*nl*nm*no + ll*nm*no + mm*no + oo ;
2349 }
2350
2351 /**
2352 NP::index0 : Provides element offset
2353 ---------------------------------------
2354
2355 Same as NP::index but -ve "missing" indices are treated as if they were zero.
2356
2357 **/
2358
2359 inline NP::INT NP::index0( INT i,  INT j,  INT k,  INT l, INT m, INT o) const
2360 {
2361     INT nd = shape.size() ;
2362
2363     INT ni = nd > 0 ? shape[0] : 1 ;
2364     INT nj = nd > 1 ? shape[1] : 1 ;
2365     INT nk = nd > 2 ? shape[2] : 1 ;
2366     INT nl = nd > 3 ? shape[3] : 1 ;
2367     INT nm = nd > 4 ? shape[4] : 1 ;
2368     INT no = nd > 5 ? shape[5] : 1 ;
2369
2370     INT ii = i < 0 ? 0 : i ;
2371     INT jj = j < 0 ? 0 : j ;
2372     INT kk = k < 0 ? 0 : k ;
2373     INT ll = l < 0 ? 0 : l ;
2374     INT mm = m < 0 ? 0 : m ;
2375     INT oo = o < 0 ? 0 : o ;
2376
2377     if(!(ii <  ni)) std::cerr << "NP::index0 ii/ni " << ii << "/" << ni  << std::endl ;
2378
2379     assert( ii < ni );
2380     assert( jj < nj );
2381     assert( kk < nk );
2382     assert( ll < nl );
2383     assert( mm < nm );
2384     assert( oo < no );
2385
2386     /*
2387     std::cout << " ii " << ii << " nj*nk*nl*nm*no " << std::setw(10) << nj*nk*nl*nm*no << " ii*nj*nk*nl*nm*no " << ii*nj*nk*nl*nm*no << std::endl ;
2388     std::cout << " jj " << jj << "    nk*nl*nm*no " << std::setw(10) <<    nk*nl*nm*no << " jj*   nk*nl*nm*no " << jj*nk*nl*nm*no    << std::endl ;
2389     std::cout << " kk " << kk << "       nl*nm*no " << std::setw(10) <<       nl*nm*no << " kk*      nl*nm*no " << kk*nl*nm*no       << std::endl ;
2390     std::cout << " ll " << ll << "          nm*no " << std::setw(10) <<          nm*no << " ll*         nm*no " << ll*nm*no          << std::endl ;
2391     std::cout << " mm " << mm << "             no " << std::setw(10) <<             no << " mm*            no " << mm*no             << std::endl ;
2392     std::cout << " oo " << oo << "              1 " << std::setw(10) <<              1 << " oo*             1 " << oo                << std::endl ;
2393     */
2394
2395     return  ii*nj*nk*nl*nm*no + jj*nk*nl*nm*no + kk*nl*nm*no + ll*nm*no + mm*no + oo ;
2396     //      i                   j                k             l          m       o
2397 }
2398
2399 inline NP::INT NP::dimprod(unsigned q) const   // product of dimensions starting from dimension q
2400 {
2401     INT dim = 1 ;
2402     for(INT d=q ; d < INT(shape.size()) ; d++) dim *= shape[d] ;
2403     return dim ;
2404 }
2405
2406
2407 template<typename... Args>
2408 inline NP::INT NP::index_(Args ... idxx_) const
2409 {
2410     std::vector<INT> idxx = {idxx_...};
2411     return index__(idxx);
2412 }
2413
2414 template<typename... Args>
2415 inline NP::INT NP::stride_(Args ... idxx_) const
2416 {
2417     std::vector<INT> idxx = {idxx_...};
2418     return stride__(idxx);
2419 }
2420
2421 template<typename... Args>
2422 inline NP::INT NP::offset_(Args ... idxx_) const
2423 {
2424     std::vector<INT> idxx = {idxx_...};
2425     return offset__(idxx);
2426 }
2427
2428
2429 template<typename T>
2430 inline std::string NP::ArrayString(const std::vector<T>& vec, unsigned modulo ) // static
2431 {
2432     const char* np_type = "uint64" ;   // TODO: make this depend on type
2433     unsigned size = vec.size();
2434
2435     std::stringstream ss ;
2436     ss << "np.array([ "  ;
2437     for(unsigned i=0 ; i < size ; i++)
2438     {
2439         if( size > modulo && (( i % modulo ) == 0) ) ss << std::endl ;
2440         ss << vec[i] << ( i < size - 1 ? ", " : " " ) ;
2441     }
2442     ss << "], dtype=np." << np_type << " )"  ;
2443
2444     std::string s = ss.str();
2445     return s ;
2446 }
2447
2448
2449 template<typename T, typename... Args>
2450 inline std::string NP::sliceArrayString(Args ... idxx_ ) const
2451 {
2452     std::vector<INT> idxx = {idxx_...};
2453     std::vector<T> out ;
2454     slice(out, idxx );
2455     return ArrayString(out, 10);
2456 }
2457
2458
2459 /**
2460 NP::slice "slice_ellipsis"
2461 ---------------------------
2462
2463 **/
2464
2465 template<typename T, typename... Args> inline void NP::slice(std::vector<T>& out, Args ... idxx_ ) const
2466 {
2467    std::vector<INT> idxx = {idxx_...};
2468    slice_(out, idxx);
2469 }
2470
2471
2472
2473
2474 /**
2475 NP::slice_
2476 -----------
2477
2478 Collect a slice of values from the array into the out vector which is
2479 first resized for fit them.
2480
2481 **/
2482
2483
2484 template<typename T> inline void NP::slice_(std::vector<T>& out, const std::vector<INT>& idxx ) const
2485 {
2486     if(NP::VERBOSE)
2487     std::cout
2488         << " DescIdx(idxx) " << DescIdx(idxx)
2489         << " sstr() " << sstr()
2490         << std::endl
2491         ;
2492
2493     bool all_dim =  idxx.size() == shape.size() ;
2494     if(!all_dim) std::cerr << " idxx.size " << idxx.size() << " shape.size " << shape.size() << " all_dim " << all_dim << std::endl ;
2495     assert(all_dim) ;
2496
2497     INT slicedim = pickdim__(idxx);
2498     assert( slicedim > -1 );
2499
2500     unsigned start = index__(idxx) ;
2501     unsigned stride = stride__(idxx) ;
2502     unsigned offset = offset__(idxx) ;
2503     unsigned numval = shape[slicedim] ;
2504
2505     if(NP::VERBOSE)
2506     std::cout
2507         << " idxx " << DescIdx(idxx)
2508         << " slicedim " << slicedim
2509         << " start " << start
2510         << " stride " << stride
2511         << " offset " << offset
2512         << " numval " << numval
2513         << std::endl
2514         ;
2515
2516     const T* vv = cvalues<T>();
2517     out.resize(numval);
2518     for(unsigned i=0 ; i < numval ; i++) out[i] = vv[start+i*stride+offset] ;
2519 }
2520
2521
2522 template<typename T> inline std::string NP::DescSlice(const std::vector<T>& out, unsigned edge )  // static
2523 {
2524     std::stringstream ss ;
2525     for(unsigned i=0 ; i < out.size() ; i++ )
2526     {
2527          if( i < edge || i > (out.size() - edge) )
2528             ss << std::setw(4) << i << std::setw(15) << std::setprecision(5) << std::fixed << out[i] << std::endl ;
2529          else if( i == edge )
2530             ss << "..." << std::endl;
2531     }
2532     std::string s = ss.str();
2533     return s ;
2534 }
2535
2536
2537 template<typename T> inline std::string NP::DescSliceBrief(const std::vector<T>& out )  // static
2538 {
2539     T mn = std::numeric_limits<T>::max();
2540     T mx = std::numeric_limits<T>::lowest();
2541
2542     for(unsigned i=0 ; i < out.size() ; i++ )
2543     {
2544         T v = out[i] ;
2545         if( mn > v ) mn = v ;
2546         if( mx < v ) mx = v ;
2547     }
2548     std::stringstream ss ;
2549     ss << " mn " << std::setw(15) << std::setprecision(5) << std::fixed << mn ;
2550     ss << " mx " << std::setw(15) << std::setprecision(5) << std::fixed << mx ;
2551     std::string s = ss.str();
2552     return s ;
2553 }
2554
2555 inline std::string NP::DescIdx(const std::vector<INT>& idxx ) // static
2556 {
2557     std::stringstream ss ;
2558     for(INT d=0 ; d < INT(idxx.size()) ; d++) ss << idxx[d] << " " ;
2559     std::string s = ss.str();
2560     return s ;
2561 }
2562
2563
2564 /**
2565 NP::pickdim__
2566 ----------------
2567
2568 Returns ordinal of first -1 in idxx ?
2569
2570 **/
2571
2572 inline NP::INT NP::pickdim__(const std::vector<INT>& idxx) const
2573 {
2574     INT pd = -1 ;
2575     INT num = 0 ;
2576     for(INT d=0 ; d < INT(shape.size()) ; d++)
2577     {
2578         INT dd = (d < INT(idxx.size()) ? idxx[d] : 1) ;
2579         if( dd == -1 )
2580         {
2581             if(num == 0) pd = d ;
2582             num += 1 ;
2583         }
2584     }
2585     assert( num == 0 || num == 1 );
2586     return pd ;
2587 }
2588
2589
2590 /**
2591 NP::index__
2592 -------------
2593
2594 Flat value index obtained from array indices, a -ve index terminates
2595 the summation over dimensions so only the dimensions to the left of the
2596 -1 are summed.  This is used from NP::slice to give the start index
2597 of the slice where the slice dimension is marked by the -1.
2598
2599 **/
2600
2601 inline NP::INT NP::index__(const std::vector<INT>& idxx) const
2602 {
2603     INT idx = 0 ;
2604     for(INT d=0 ; d < INT(shape.size()) ; d++)
2605     {
2606         INT dd = (d < INT(idxx.size()) ? idxx[d] : 1) ;
2607         if( dd == -1 ) break ;
2608         idx += dd*dimprod(d+1) ;
2609     }
2610     return idx ;
2611 }
2612
2613 /**
2614 NP::stride__
2615 --------------
2616
2617 1. find ordinal of first -1 in idxx
2618 2. compute stride from product of array dimensions to the right of the -1
2619
2620 For example with an array of shape (100000, 32, 4, 4) and idxx (-1,)
2621 the pickdim is 0 and the stride is 32*4*4
2622
2623 **/
2624
2625
2626 inline NP::INT NP::stride__(const std::vector<INT>& idxx) const
2627 {
2628     INT pd = pickdim__(idxx);
2629     assert( pd > -1 );
2630     INT stride = dimprod(pd+1) ;
2631     return stride ;
2632 }
2633
2634
2635 /**
2636 NP::offset__
2637 --------------
2638
2639 1. find ordinal of first -1 in idxx
2640 2.
2641
2642
2643 **/
2644
2645
2646 inline NP::INT NP::offset__(const std::vector<INT>& idxx) const
2647 {
2648     INT pd = pickdim__(idxx);
2649     assert( pd > -1 );
2650
2651     INT offset = 0 ;
2652     for(INT d=pd+1 ; d < INT(shape.size()) ; d++)
2653     {
2654         INT dd = (d < INT(idxx.size()) ? idxx[d] : 1) ;
2655         offset += dd*dimprod(d+1) ;
2656     }
2657     return offset ;
2658 }
2659
2660
2661 inline NP::INT NP::itemsize_(INT i, INT j, INT k, INT l, INT m, INT o) const
2662 {
2663     return NPS::itemsize_(shape, i, j, k, l, m, o) ;
2664 }
2665
2666 /**
2667 NP::itembytes_
2668 ----------------
2669
2670 Sets the argument *start* pointer to the address of the (i,j,k,l,m,o) item
2671 and num_bytes to the number of bytes in the item::
2672
2673
2674     const char* start = nullptr ;
2675     NP::INT num_bytes = 0 ;
2676     a->itembytes_(&start, num_bytes, i, j, k, l, m, o );
2677     assert( start && num_bytes > 0 );
2678
2679 This is used to form digests of item bytes with sdigest::Item
2680
2681
2682 **/
2683
2684
2685 inline void NP::itembytes_(const char** start,  INT& num_bytes,  INT i,  INT j,  INT k,  INT l, INT m, INT o ) const
2686 {
2687     INT idx0 = index0(i,j,k,l,m,o) ;
2688     *start = bytes() + idx0*ebyte ;
2689
2690     INT sz = itemsize_(i, j, k, l, m, o) ;
2691     num_bytes = sz*ebyte ;
2692 }
2693
2694
2695
2696
2697 template<typename T> inline T NP::get( INT i,  INT j,  INT k,  INT l, INT m, INT o) const
2698 {
2699     unsigned idx = index(i, j, k, l, m, o);
2700     const T* vv = cvalues<T>() ;
2701     return vv[idx] ;
2702 }
2703
2704 template<typename T> inline void NP::set( T val, INT i,  INT j,  INT k,  INT l, INT m, INT o)
2705 {
2706     unsigned idx = index(i, j, k, l, m, o);
2707     T* vv = values<T>() ;
2708     vv[idx] = val ;
2709 }
2710
2711
2712
2713 template<typename T> inline bool NP::is_allzero() const
2714 {
2715     T zero = T(0) ;
2716     const T* vv = cvalues<T>();
2717     INT num = 0 ;
2718     for(INT i=0 ; i < size ; i++) if(vv[i] == zero) num += 1 ;
2719     bool allzero = num == size ;
2720     return allzero ;
2721 }
2722
2723 inline bool NP::is_empty() const
2724 {
2725     return shape.size() > 0 && shape[0] == 0 ;
2726 }
2727
2728
2729 /**
2730 NP::descValues
2731 ----------------
2732
2733 NB the implicit double instanciation used by this method requires
2734 this method to be implemented after the block of explicit cvalues specializations.
2735
2736 **/
2737 inline std::string NP::descValues() const
2738 {
2739     assert( shape.size() == 1 );
2740     unsigned num_val = shape[0] ;
2741     assert( names.size() == num_val );
2742     assert( ebyte == 8 );
2743     std::stringstream ss ;
2744     ss << "NP::descValues num_val " << num_val  << std::endl ;
2745     const double* vv = cvalues<double>() ;
2746     for(unsigned i=0 ; i < num_val ; i++)
2747     {
2748         const char* k = names[i].c_str();
2749         ss
2750             << std::setw(3) << i
2751             << " v " << std::setw(10) << std::fixed << std::setprecision(4) << vv[i]
2752             << " k " << std::setw(60) << std::left << k << std::right
2753             <<  std::endl
2754             ;
2755     }
2756     std::string s = ss.str();
2757     return s ;
2758 }
2759
2760
2761 inline std::string NP::descSize() const
2762 {
2763     std::stringstream ss ;
2764     ss << "NP::descSize"
2765        << " arr_bytes " << arr_bytes()
2766        << " arr_kb " << arr_bytes()/1000
2767        ;
2768     std::string str = ss.str();
2769     return str ;
2770 }
2771
2772
2773
2774 /**
2775 NP::descTable
2776 ----------------
2777
2778 **/
2779
2780 template<typename T>
2781 inline std::string NP::descTable(int wid) const
2782 {
2783     return descTable_<T>(wid, labels, &names );
2784 }
2785
2786
2787
2788 template<typename T>
2789 inline T NP::findMinimumTimestamp() const
2790 {
2791     const T* vv = cvalues<T>() ;
2792
2793     T MAX = std::numeric_limits<T>::max();
2794     T t0 = MAX ;
2795
2796     INT nv = num_values() ;
2797     for(INT i=0 ; i < nv ; i++)
2798     {
2799         T t = vv[i] ;
2800         if(!U::LooksLikeTimestamp<T>(t)) continue ;
2801         if( t < t0 ) t0 = t ;
2802     }
2803     return t0 == MAX ? 0 : t0 ;
2804 }
2805
2806
2807 /**
2808 NP::descTable_
2809 -----------------
2810
2811 **/
2812
2813 template<typename T>
2814 inline std::string NP::descTable_(int wid,
2815     const std::vector<std::string>* column_labels,
2816     const std::vector<std::string>* row_labels
2817   ) const
2818 {
2819     bool with_column_totals = true ;
2820
2821
2822
2823     std::stringstream ss ;
2824     ss << "[NP::descTable_ " << sstr() << std::endl ;
2825     int ndim = shape.size() ;
2826     bool skip = ndim != 2 ;
2827     if(skip)
2828     {
2829         ss << " ERROR : UNEXPECTED SHAPE ndim " << ndim << std::endl ;
2830         ss << " column_labels " << std::endl ;
2831         if(column_labels) for(int i=0 ; i < int(column_labels->size()) ; i++) ss << (*column_labels)[i] << std::endl ;
2832         ss << " row_labels " << std::endl ;
2833         if(row_labels) for(int i=0 ; i < int(row_labels->size()) ; i++) ss << (*row_labels)[i] << std::endl ;
2834     }
2835
2836     if(!skip)
2837     {
2838         const T* vv = cvalues<T>() ;
2839         T t0 = findMinimumTimestamp<T>() ;
2840
2841         INT ni = shape[0] ;
2842         INT nj = shape[1] ;
2843         INT cwid = wid ;
2844         INT rwid = 2*wid ;
2845
2846         std::vector<std::string> column_smry ;
2847         if(column_labels) U::Summarize( column_smry, column_labels, cwid );
2848         bool with_column_labels = int(column_smry.size()) == nj ;
2849
2850         std::vector<std::string> row_smry ;
2851         if(row_labels) U::Summarize( row_smry, row_labels, rwid );
2852         bool with_row_labels = int(row_smry.size()) == ni ;
2853
2854
2855         if(with_column_labels) for(int j=0 ; j < nj ; j++) ss
2856             << U::Space( with_row_labels && j == 0  ? rwid+1 : 0 )
2857             << std::setw(cwid)
2858             << column_smry[j]
2859             << ( j < nj -1 ? " " : "\n" )
2860             ;
2861
2862         std::vector<T> column_totals(nj,0);
2863         int num_timestamp = 0 ;
2864
2865         for(int i=0 ; i < ni ; i++)
2866         {
2867             if(with_row_labels) ss << std::setw(rwid) << row_smry[i] << " " ;
2868             for(int j=0 ; j < nj ; j++)
2869             {
2870                 T v = vv[i*nj+j] ;
2871                 bool timestamp = U::LooksLikeTimestamp<T>(v) ;
2872                 if(timestamp) num_timestamp += 1 ;
2873                 T pv = timestamp ? v - t0 : v  ;
2874
2875                 column_totals[j] += pv ;
2876
2877                 if( timestamp )
2878                 {
2879                     ss
2880                         << std::setw(cwid)
2881                         << std::fixed
2882                         << std::setprecision(6)
2883                         << double(pv)/1000000
2884                         ;
2885                 }
2886                 else
2887                 {
2888                     ss
2889                         << std::setw(cwid)
2890                         << pv
2891                         ;
2892                 }
2893                 ss << ( j < nj-1 ? " " : "\n" ) ;
2894
2895             }
2896         }
2897
2898         ss << "num_timestamp " << num_timestamp << " auto-offset from t0 " << t0 << std::endl ;
2899
2900         if(with_column_totals)
2901         {
2902             if(with_row_labels) ss << std::setw(rwid) << "TOTAL:" << " " ;
2903             for(int j=0 ; j < nj ; j++)
2904             {
2905                 T v = column_totals[j] ;
2906                 ss
2907                     << std::setw(cwid)
2908                     << v
2909                     << ( j < nj - 1 ? " " : "\n" )
2910                     ;
2911             }
2912         }
2913
2914         if(with_column_labels)
2915         {
2916             for(int j=0 ; j < nj ; j++)
2917             {
2918                 if( strcmp(column_smry[j].c_str(), (*column_labels)[j].c_str()) != 0) ss
2919                     << ( j == 0 ? "\n" : "" )
2920                     << std::setw(cwid)
2921                     << column_smry[j]
2922                     << " : "
2923                     << (*column_labels)[j]
2924                     << std::endl
2925                     ;
2926             }
2927         }
2928
2929         if(with_row_labels)
2930         {
2931             for(int i=0 ; i < ni ; i++)
2932             {
2933                 if( strcmp(row_smry[i].c_str(), (*row_labels)[i].c_str()) != 0) ss
2934                     << ( i == 0 ? "\n" : "" )
2935                     << std::setw(rwid)
2936                     << row_smry[i]
2937                     << " : "
2938                     << (*row_labels)[i]
2939                     << std::endl
2940                     ;
2941             }
2942         }
2943     }
2944     ss << "]NP::descTable_ " << sstr() << std::endl ;
2945
2946     std::string str = ss.str();
2947     return str ;
2948 }
2949
2950
2951
2952
2953
2954
2955
2956 /**
2957 NP::MakeLike
2958 --------------
2959
2960 Creates an array of the same shape and type as the *src* array.
2961 Values are *NOT* copied from *src*.
2962
2963 **/
2964
2965 inline NP* NP::MakeLike(const NP* src) // static
2966 {
2967     if(src == nullptr) return nullptr ;
2968     NP* dst = new NP(src->dtype);
2969     dst->set_shape(src->shape) ;
2970     return dst ;
2971 }
2972
2973 inline void NP::CopyMeta( NP* b, const NP* a ) // static
2974 {
2975     b->set_shape( a->shape );
2976     b->meta = a->meta ;    // pass along the metadata
2977     b->names = a->names ;
2978     b->nodata = a->nodata ;
2979     if(a->labels) b->labels = new std::vector<std::string>( a->labels->begin(), a->labels->end() ) ;
2980
2981     // pass along transient strings set on loading
2982     b->lpath = a->lpath ;
2983     b->lfold = a->lfold ;
2984 }
2985
2986
2987 inline void NP::set_preserve_last_column_integer_annotation()
2988 {
2989     set_meta<INT>(Preserve_Last_Column_Integer_Annotation, 1 );
2990 }
2991 inline bool NP::is_preserve_last_column_integer_annotation() const
2992 {
2993     return 1 == get_meta<INT>(Preserve_Last_Column_Integer_Annotation, 0) ;
2994 }
2995
2996 inline float NP::PreserveNarrowedDoubleInteger( double f )
2997 {
2998      UIF64 uif64 ;
2999      uif64.f = f ;
3000      if(VERBOSE) std::cout << "NP::PreserveNarrowedDoubleInteger  uif64.u " << uif64.u << std::endl ;
3001
3002      UIF32 uif32 ;
3003      uif32.u = int(uif64.u) ;
3004      return uif32.f ;
3005 }
3006
3007 inline NP* NP::MakeNarrow(const NP* a) // static
3008 {
3009     assert( a->ebyte == 8 );
3010     std::string b_dtype = NPU::_make_narrow(a->dtype);
3011
3012     NP* b = new NP(b_dtype.c_str());
3013     CopyMeta(b, a );
3014
3015     bool plcia = b->is_preserve_last_column_integer_annotation() ;
3016     if(VERBOSE && plcia) std::cout
3017         << "NP::MakeNarrow"
3018         << " b.plcia " << plcia
3019         << " a.ni " << a->num_items()
3020         << " b.ni " << b->num_items()
3021         << " a.iv " << a->num_itemvalues()
3022         << " b.iv " << b->num_itemvalues()
3023         << std::endl
3024         ;
3025
3026
3027     assert( a->num_values() == b->num_values() );
3028     unsigned nv = a->num_values();
3029     unsigned iv = a->num_itemvalues();
3030
3031     if( a->uifc == 'f' && b->uifc == 'f')
3032     {
3033         const double* aa = a->cvalues<double>() ;
3034         float*        bb = b->values<float>() ;
3035         for(unsigned i=0 ; i < nv ; i++)
3036         {
3037             bb[i] = float(aa[i]);
3038             bool preserve_last_column_integer = plcia && ((i % iv) == iv - 1 ) ; // only works for 3D not higher D
3039             if(preserve_last_column_integer) bb[i] = PreserveNarrowedDoubleInteger(aa[i]) ;
3040         }
3041     }
3042
3043     if(VERBOSE) std::cout
3044         << "NP::MakeNarrow"
3045         << " a.dtype " << a->dtype
3046         << " b.dtype " << b->dtype
3047         << std::endl
3048         ;
3049     return b ;
3050 }
3051
3052 inline NP* NP::MakeWide(const NP* a) // static
3053 {
3054     assert( a->ebyte == 4 );
3055     std::string b_dtype = NPU::_make_wide(a->dtype);
3056
3057     NP* b = new NP(b_dtype.c_str());
3058     CopyMeta(b, a );
3059
3060     assert( a->num_values() == b->num_values() );
3061     unsigned nv = a->num_values();
3062
3063     if( a->uifc == 'f' && b->uifc == 'f')
3064     {
3065         const float* aa = a->cvalues<float>() ;
3066         double* bb = b->values<double>() ;
3067         for(unsigned i=0 ; i < nv ; i++)
3068         {
3069             bb[i] = double(aa[i]);
3070         }
3071     }
3072
3073     if(VERBOSE) std::cout
3074         << "NP::MakeWide"
3075         << " a.dtype " << a->dtype
3076         << " b.dtype " << b->dtype
3077         << std::endl
3078         ;
3079
3080     return b ;
3081 }
3082
3083 inline NP* NP::MakeCopy(const NP* a) // static
3084 {
3085     NP* b = new NP(a->dtype);
3086     CopyMeta(b, a );
3087
3088     assert( a->arr_bytes() == b->arr_bytes() );
3089
3090     if(a->nodata == false)
3091     {
3092         memcpy( b->bytes(), a->bytes(), a->arr_bytes() );
3093     }
3094     unsigned nv = a->num_values();
3095
3096     if(VERBOSE) std::cout
3097         << "NP::MakeCopy"
3098         << " a.dtype " << a->dtype
3099         << " b.dtype " << b->dtype
3100         << " a.nodata " << a->nodata
3101         << " b.nodata " << b->nodata
3102         << " nv " << nv
3103         << std::endl
3104         ;
3105
3106     return b ;
3107 }
3108
3109 /**
3110 NP::MakeCopy3D
3111 ----------------
3112
3113 Copy and change shape to 3D, original dimensions must be 3D or more.
3114
3115 **/
3116
3117 inline NP* NP::MakeCopy3D(const NP* a) // static
3118 {
3119     NP* b = MakeCopy(a);
3120     b->change_shape_to_3D();
3121     return b ;
3122 }
3123
3124 inline NP* NP::ChangeShape3D(NP* a) // static
3125 {
3126     a->change_shape_to_3D();
3127     return a ;
3128 }
3129
3130
3131
3132
3133
3134 inline NP* NP::MakeWideIfNarrow(const NP* a) // static
3135 {
3136     if(a == nullptr) return nullptr ;
3137     return a->ebyte == 4 ? MakeWide(a) : MakeCopy(a) ;
3138 }
3139 inline NP* NP::MakeNarrowIfWide(const NP* a) // static
3140 {
3141     if(a == nullptr) return nullptr ;
3142     return a->ebyte == 8 ? MakeNarrow(a) : MakeCopy(a) ;
3143 }
3144
3145 /**
3146 NP::MakeWithType
3147 -------------------
3148
3149 Copies, Narrows or Widens as needed to transform the
3150 source array into the template type.
3151 Copies are done when there is no need to narrow or widen
3152 for memory management consistency.
3153
3154 **/
3155
3156 template<typename T>
3157 inline NP* NP::MakeWithType(const NP* a) // static
3158 {
3159     if(VERBOSE) std::cout
3160         << "NP::MakeWithType"
3161         << " source type a->ebyte " << a->ebyte
3162         << " sizeof(T) " << sizeof(T)
3163         << std::endl
3164         ;
3165
3166     assert( sizeof(T) == 4 || sizeof(T) == 8 );
3167     assert( a->ebyte == 4 || a->ebyte == 8 );
3168
3169     NP* b = nullptr ;
3170     if( a->ebyte == 4 && sizeof(T) == 4)
3171     {
3172         b = MakeCopy(a);
3173     }
3174     else if( a->ebyte == 8 && sizeof(T) == 8)
3175     {
3176         b = MakeCopy(a);
3177     }
3178     else if( a->ebyte == 8 && sizeof(T) == 4)
3179     {
3180         b = MakeNarrow(a) ;
3181     }
3182     else if( a->ebyte == 4 && sizeof(T) == 8)
3183     {
3184         b = MakeWide(a) ;
3185     }
3186     return b ;
3187 }
3188
3189
3190 /**
3191 NP::MakeSelectCopy
3192 --------------------
3193 **/
3194
3195 template<typename... Args>
3196 inline NP* NP::MakeSelectCopy( const NP* src, Args ... items_ )  // MakeSelectCopy_ellipsis
3197 {
3198    std::vector<INT> items = {items_...};
3199    return MakeSelectCopy_(src, &items );
3200 }
3201
3202 template NP* NP::MakeSelectCopy( const NP* , INT );
3203 template NP* NP::MakeSelectCopy( const NP* , INT, INT );
3204 template NP* NP::MakeSelectCopy( const NP* , INT, INT, INT );
3205 template NP* NP::MakeSelectCopy( const NP* , INT, INT, INT, INT );
3206
3207 /**
3208 NP::MakeSelectCopyE_
3209 -----------------------
3210
3211 Create an array from index listed items specified in the *ekey* envvar.
3212 For example with the default delim of ',' and envvar 0,1,10 would
3213 select those items from the source array.
3214
3215 **/
3216
3217 inline NP* NP::MakeSelectCopyE_(  const NP* src, const char* ekey, const char* fallback, char delim )
3218 {
3219     std::vector<INT>* items = U::GetEnvVec<INT>(ekey, fallback, delim );
3220     return NP::MakeSelectCopy_( src, items )  ;
3221 }
3222 inline NP* NP::MakeSelectCopy_(  const NP* src, const char* items_ )
3223 {
3224     std::vector<INT>* items = U::MakeVec<INT>(items_);
3225     return NP::MakeSelectCopy_( src, items );
3226 }
3227 inline NP* NP::MakeSelectCopy_(  const NP* src, const std::vector<INT>* items )
3228 {
3229     return items ? MakeSelectCopy_(src, items->data(), INT(items->size()) ) : NP::MakeCopy(src) ;
3230 }
3231
3232 /**
3233 NP::MakeSelectCopy_
3234 --------------------
3235
3236 Create an array from the index listed *items* in the *src* array.
3237
3238 **/
3239
3240 inline NP* NP::MakeSelectCopy_(  const NP* src, const INT* items, INT num_items )
3241 {
3242     assert( items );
3243     for(INT i=0 ; i < num_items ; i++) assert( items[i] < INT(src->shape[0]) );
3244     std::vector<INT> dst_shape(src->shape) ;
3245     dst_shape[0] = num_items ;
3246     NP* dst = new NP(src->dtype, dst_shape);
3247     assert( src->item_bytes() == dst->item_bytes() );
3248     unsigned size = src->item_bytes();
3249     for(INT i=0 ; i < num_items ; i++)
3250     {
3251         memcpy( dst->bytes() + i*size, src->bytes() + items[i]*size , size );
3252     }
3253
3254     // format string idlist list of items and set into metadata
3255     std::stringstream ss ;
3256     for(INT i=0 ; i < num_items ; i++) ss << items[i] << ( i < num_items-1 ? "," : "" ) ;
3257     std::string idlist = ss.str() ;
3258     dst->set_meta<std::string>("idlist", idlist );
3259     // item indices become "id" when you use them to make a selection
3260
3261     return dst ;
3262 }
3263
3264 /**
3265 NP::MakeSelection
3266 --------------------
3267
3268 *sel* is an array of indices into the *src* array
3269 which is used to create a new *dst* array with just the
3270 *sel* selected items from *src*.
3271
3272 **/
3273
3274
3275 inline NP* NP::MakeSelection( const NP* src, const NP* sel )
3276 {
3277     INT num_sel = sel->shape[0] ;
3278
3279     assert( sel->uifc == 'i' && sel->ebyte == 8 );
3280     assert( sel->shape.size() == 1 );
3281
3282     std::vector<INT> dst_shape(src->shape) ;
3283     dst_shape[0] = num_sel ;
3284     NP* dst = new NP(src->dtype, dst_shape);
3285
3286     unsigned size = src->item_bytes();
3287     const int64_t* sel_vv = sel->cvalues<int64_t>();
3288     for(INT i=0 ; i < num_sel ; i++)
3289     {
3290         int64_t sel_v = sel_vv[i] ;
3291         memcpy( dst->bytes() + i*size, src->bytes() + sel_v*size,  size );
3292     }
3293
3294     return dst ;
3295 }
3296
3297
3298
3299
3300 /**
3301 NP::ParseSliceString
3302 ------------------------
3303
3304 Parse string of the below forms int vector of integers where ":"
3305 is special cased to become -1::
3306
3307      [:,0,0,0]
3308      [:,0,0,1]
3309      [:,0,0,2]
3310
3311
3312 **/
3313
3314 inline int NP::ParseSliceString(std::vector<INT>& idxx, const char* _sli )
3315 {
3316     size_t len = _sli ? strlen(_sli) : 0 ;
3317     if(len < 2) return 1 ;
3318
3319     const char* o = strstr(_sli, "[");
3320     const char* c = strstr(_sli, "]");
3321
3322     if(o == nullptr) return 2 ;
3323     if(c == nullptr) return 3 ;
3324     if(c - o <= 0 ) return 4 ;
3325
3326     // copy starting from the char after the "[" up to the char before the "]"
3327     char* sli = strndup(o+1, c - o - 1 );
3328     //std::cout << "NP::ParseSliceString /" << sli << "/\n" ;
3329
3330     char delim = ',' ;
3331
3332     std::stringstream ss;
3333     ss.str(sli);
3334     std::string s;
3335     while (std::getline(ss, s, delim))
3336     {
3337         if(0 == strcmp(s.c_str(),":"))
3338         {
3339             idxx.push_back(-1);
3340         }
3341         else
3342         {
3343             std::istringstream iss(s);
3344             INT t ;
3345             iss >> t ;
3346             idxx.push_back(t) ;
3347         }
3348     }
3349     return 0 ;
3350 }
3351
3352
3353
3354
3355 /**
3356 NP::ParseSliceIndexString
3357 ------------------------
3358
3359 Index slice (start,stop,step) strings of form::
3360
3361     [:5]       # start:0 stop:5 step:1
3362     [::2]      # start:0 stop:- step:2
3363     [1:10]     # start:1 stop:10 step:1
3364     [1:10:2]   # start:1 stop:10 step:2
3365
3366     [100]      # start:100 stop:101 step:1  special cased to allow single value
3367
3368 Usage::
3369
3370     struct slice { int start, stop, step ; }
3371     slice sli = {} ;
3372
3373     sli.start = 0 ;
3374     sli.stop = num_items ;
3375     sli.step = 1 ;
3376
3377     int rc = NP::ParseSliceIndexString<int>(sli.start, sli.stop, sli.step, _sli );
3378
3379
3380 **/
3381
3382 template<typename T>
3383 inline int NP::ParseSliceIndexString(T& start, T& stop, T& step, const char* _sli, bool dump )
3384 {
3385     size_t len = _sli ? strlen(_sli) : 0 ;
3386     if(len < 2) return 1 ;
3387
3388     const char* o = strstr(_sli, "[");
3389     const char* c = strstr(_sli, "]");
3390
3391     if(o == nullptr) return 2 ;
3392     if(c == nullptr) return 3 ;
3393     if(c - o <= 0 ) return 4 ;
3394
3395     // copy starting from the char after the "[" up to the char before the "]"
3396     char* sli = strndup(o+1, c - o - 1 );
3397     if(dump) std::cout << "NP::ParseSliceIndexString {" << sli << "}\n" ;
3398
3399     if(strlen(sli)>2 && sli[0] == ':' && sli[1] == ':' )  // eg "::2"
3400     {
3401         std::string s(sli+2);
3402         std::istringstream iss(s);
3403         T t ;
3404         iss >> t ;
3405
3406         step = t ;
3407     }
3408     else if(strlen(sli)>2 && sli[0] == ':' && sli[1] != ':' ) // eg ":5"
3409     {
3410         std::string s(sli+1);
3411         std::istringstream iss(s);
3412         T t ;
3413         iss >> t ;
3414
3415         stop = t ;
3416     }
3417     else if(strlen(sli)>0 && strstr(sli,":") == nullptr ) // eg "5" "50.5"
3418     {
3419         std::string s(sli);
3420         std::istringstream iss(s);
3421         T t ;
3422         iss >> t ;
3423
3424         start = t ;
3425         stop = t + T(1) ;
3426         step = T(1) ;
3427
3428         // kludge to simplify giving single value within range/sli spec
3429         // np.arange(100,101,1) == np.array([100])
3430
3431         if(dump) std::cout
3432            << "NP::ParseSliceIndexString.here"
3433            << " sli {" << sli << "}"
3434            << " start " << start
3435            << " stop " << stop
3436            << " step " << step
3437            << "\n"
3438            ;
3439
3440     }
3441     else  // eg 1:10 1:10:2
3442     {
3443         char delim = ':' ;
3444
3445         std::stringstream ss;
3446         ss.str(sli);
3447         std::string s;
3448         int count = 0 ;
3449
3450         while (std::getline(ss, s, delim))
3451         {
3452             std::istringstream iss(s);
3453             T t ;
3454             iss >> t ;
3455
3456             switch(count)
3457             {
3458                case 0: start = t ; break ;
3459                case 1: stop  = t ; break ;
3460                case 2: step  = t ; break ;
3461             }
3462             count++ ;
3463         }
3464     }
3465     return 0 ;
3466 }
3467
3468
3469 /**
3470 NP::LooksLikeSliceIndexString
3471 ------------------------------
3472
3473 String starting with '[' and ending with ']'
3474
3475 **/
3476
3477 inline bool NP::LooksLikeSliceIndexString(const char* _sli ) //
3478 {
3479     if(!_sli) return false ;
3480     bool start_br = _sli[0] == '[' ;
3481     bool end_br = _sli[strlen(_sli)-1] == ']' ;
3482     return start_br && end_br ;
3483 }
3484
3485 inline bool NP::LooksLikeSliceIndexStringIsEmpty(const char* _sli ) //
3486 {
3487     if(_sli == nullptr) return true ;
3488     if(strcmp(_sli, "") == 0) return true ;
3489     if(strcmp(_sli, "[]") == 0) return true ;
3490     return false ;
3491 }
3492
3493
3494
3495
3496 /**
3497 NP::LooksLikeSliceIndexStringSuffix
3498 -------------------------------------
3499
3500 String containing '[' not are start and ending with ']'
3501 For example::
3502
3503     "/tmp/w54.npy[0:5]"
3504
3505 **/
3506
3507 inline bool NP::LooksLikeSliceIndexStringSuffix(const char* _sli, char** body, char** suffix ) //
3508 {
3509     if(!_sli) return false ;
3510     bool has_suffix = U::prefix_suffix( body, suffix, "[",  _sli );
3511     return has_suffix ;
3512 }
3513
3514
3515 template<typename T>
3516 inline void NP::parse_slice( NP_slice<T>& sli, const char* _sli) const
3517 {
3518     INT ni = num_items();
3519
3520     sli.start = T(0) ;
3521     sli.stop = T(ni) ;
3522     sli.step = T(1) ;
3523
3524     int rc = ParseSliceIndexString<T>(sli.start, sli.stop, sli.step, _sli );
3525     if( rc != 0 ) std::cerr
3526         << "NP::parse_slice "
3527         << " ParseSliceIndexString FAILED "
3528         << " _sli [" << ( _sli ? _sli : "-" ) << "]"
3529         << " rc " << rc
3530         << "\n"
3531         ;
3532     assert( rc == 0 );
3533     if( rc != 0 ) std::raise(SIGINT);
3534 }
3535
3536
3537
3538
3539
3540
3541
3542
3543 /**
3544 NP::MakeSliceSelection
3545 ------------------------
3546
3547 In [15]: a[a[:,0,0,0] < 0].shape
3548 Out[15]: (514, 32, 4, 4)
3549
3550 In [16]: a[a[:,0,0,0] > 0].shape
3551 Out[16]: (486, 32, 4, 4)
3552
3553 **/
3554
3555
3556 template<typename T>
3557 inline NP* NP::MakeSliceSelection( const NP* src, const char* _sel )
3558 {
3559     NP* sel = src->makeWhereSelection<T>(_sel);
3560     return MakeSelection(src, sel) ;
3561 }
3562
3563 /**
3564 NP::makeWhereSelection
3565 -------------------------
3566
3567 1. parse selection string like [:,0,0,2] < 0.5
3568    extracting the slice specification and cut
3569
3570 2. get the slice and apply the cut to yield
3571    a where array of indices for which the
3572    selection is true
3573
3574 **/
3575
3576 template<typename T>
3577 inline NP* NP::makeWhereSelection( const char* _sel ) const
3578 {
3579     std::vector<INT> idxx ;
3580     int rc = ParseSliceString(idxx, _sel);
3581     if(rc !=0 ) std::cout << "NP::makeWhereSelection FAIL to parse [" << ( _sel ? _sel : "-" ) << "]\n" ;
3582     if(rc !=0 ) return nullptr ;
3583
3584     const char* gt = strstr(_sel, ">");
3585     const char* lt = strstr(_sel, "<");
3586     if( gt && lt ) return nullptr ;
3587     const char* pt = gt ? gt : lt ;
3588     std::istringstream iss(pt+1) ;
3589
3590     T cut(0.f);
3591     iss >> cut ;
3592     //std::cout << " cut[" << cut << "]\n";
3593
3594     std::vector<T> vals ;
3595     slice_(vals, idxx);
3596
3597     std::vector<INT> where ;
3598     //std::cout << " vals.size " << vals.size() << "\n" ;
3599     for(INT i=0 ; i < INT(vals.size()) ; i++)
3600     {
3601         T value = vals[i] ;
3602         bool select = ( gt && value > cut ) || ( lt && value < cut );
3603         if(select) where.push_back(i);
3604     }
3605
3606     return MakeFromValues<INT>(where.data(), where.size() ) ;
3607 }
3608
3609 inline bool NP::LooksLikeWhereSelection(const char* _sel ) // static
3610 {
3611     bool candidate = _sel && strlen(_sel ) > 1 ;
3612     if(!candidate) return false ;
3613     bool starts_with_dollar = _sel[0] == '$' ;
3614
3615     const char* sel = starts_with_dollar ? U::GetEnv(_sel+1, nullptr) : _sel ;
3616     if( sel == nullptr ) return false ;
3617
3618     const char* gt = strstr(sel, ">");
3619     const char* lt = strstr(sel, "<");
3620     if( gt && lt ) return false ;
3621     if( gt || lt ) return true ;
3622     return false ;
3623 }
3624
3625
3626
3627
3628 /**
3629 NP::MakeItemCopy
3630 ------------------
3631
3632 Finds the index of a single item from the src array specified by (i,j,k,l,m,n)
3633 and copies that item into the destination array.
3634
3635 **/
3636
3637 inline NP* NP::MakeItemCopy(  const NP* src, INT i, INT j, INT k, INT l, INT m, INT o )
3638 {
3639     std::vector<INT> sub_shape ;
3640     src->item_shape(sub_shape, i, j, k, l, m, o );   // shape of the item specified by (i,j,k,l,m,n)
3641     unsigned idx = src->index0(i, j, k, l, m, o );
3642
3643     if(NP::VERBOSE) std::cout
3644         << "NP::MakeItemCopy"
3645         << " i " << i
3646         << " j " << j
3647         << " k " << k
3648         << " l " << l
3649         << " m " << m
3650         << " o " << o
3651         << " idx " << idx
3652         << " src.ebyte " << src->ebyte
3653         << " src.shape " << NPS::desc(src->shape)
3654         << " sub_shape " << NPS::desc(sub_shape)
3655         << std::endl
3656         ;
3657
3658     NP* dst = new NP(src->dtype, sub_shape);
3659     memcpy( dst->bytes(), src->bytes() + idx*src->ebyte , dst->arr_bytes() );
3660     return dst ;
3661 }
3662
3663
3664 /**
3665 NP::item_shape
3666 ---------------
3667
3668 Consider an array of the below shape, which has 6 top level items::
3669
3670    (6, 2, 4096, 4096, 4)
3671
3672 The *item_shape* method returns sub shapes, for example
3673 a single non-negative argument i=0/1/2/3/4/5
3674 would yield the the top level items shape::
3675
3676     (2, 4096, 4096, 4 )
3677
3678 Similarly with two non-negative arguments i=0/1/2/3/4/5, j=0/1
3679 would give item shape::
3680
3681     (4096, 4096, 4 )
3682
3683 **/
3684 inline void NP::item_shape(std::vector<INT>& sub, INT i, INT j, INT k, INT l, INT m, INT o ) const
3685 {
3686     unsigned nd = shape.size() ;
3687
3688     if(i > -1 && j==-1)
3689     {
3690         if( nd > 1 ) sub.push_back(shape[1]);
3691         if( nd > 2 ) sub.push_back(shape[2]);
3692         if( nd > 3 ) sub.push_back(shape[3]);
3693         if( nd > 4 ) sub.push_back(shape[4]);
3694         if( nd > 5 ) sub.push_back(shape[5]);
3695     }
3696     else if(i > -1 && j > -1 && k==-1)
3697     {
3698         if( nd > 2 ) sub.push_back(shape[2]);
3699         if( nd > 3 ) sub.push_back(shape[3]);
3700         if( nd > 4 ) sub.push_back(shape[4]);
3701         if( nd > 5 ) sub.push_back(shape[5]);
3702     }
3703     else if(i > -1 && j > -1 && k > -1 && l == -1)
3704     {
3705         if( nd > 3 ) sub.push_back(shape[3]);
3706         if( nd > 4 ) sub.push_back(shape[4]);
3707         if( nd > 5 ) sub.push_back(shape[5]);
3708     }
3709     else if(i > -1 && j > -1 && k > -1 && l >  -1 && m == -1)
3710     {
3711         if( nd > 4 ) sub.push_back(shape[4]);
3712         if( nd > 5 ) sub.push_back(shape[5]);
3713     }
3714     else if(i > -1 && j > -1 && k > -1 && l >  -1 && m > -1 && o == -1)
3715     {
3716         if( nd > 5 ) sub.push_back(shape[5]);
3717     }
3718     else if(i > -1 && j > -1 && k > -1 && l >  -1 && m > -1 && o > -1)
3719     {
3720         sub.push_back(1);
3721     }
3722 }
3723
3724 inline NP* NP::spawn_item(  INT i, INT j, INT k, INT l, INT m, INT o  ) const
3725 {
3726     return MakeItemCopy(this, i, j, k, l, m, o );
3727 }
3728
3729
3730
3731 /**
3732 NP::MakeCDF
3733 ------------
3734
3735 Creating a CDF like this with just plain trapz will usually yield a jerky
3736 cumulative integral curve. To avoid that need to play some tricks to have
3737 integral values are more points.
3738
3739 For example by using NP::MakeDiv to split the bins and linearly interpolate
3740 the values.
3741
3742 **/
3743
3744 template<typename T>
3745 inline NP* NP::MakeCDF(const NP* dist )  // static
3746 {
3747     NP* cdf = dist->trapz<T>() ;
3748     cdf->divide_by_last<T>();
3749     return cdf ;
3750 }
3751
3752
3753 /**
3754 NP::MakeICDF
3755 -------------
3756
3757 Inverts CDF using *nu* NP::pdomain lookups in range 0->1
3758 The input CDF must contain domain and values in the payload last dimension.
3759 3d or 2d input CDF are accepted where 3d input CDF is interpreted as
3760 a collection of multiple CDF to be inverted.
3761
3762 The ICDF created has shape (num_items, nu, hd_factor == 0 ? 1 : 4)
3763 where num_items is 1 for 2d input CDF and the number of items for 3d input CDF.
3764
3765 Notice that domain information is not included in the output ICDF, this is
3766 to facilitate direct conversion of the ICDF array into GPU textures.
3767 The *hd_factor* convention regarding domain ranges is used.
3768
3769 Use NP::MakeProperty to add domain infomation using this convention.
3770
3771
3772 **/
3773
3774 template<typename T>
3775 inline NP* NP::MakeICDF(const NP* cdf, unsigned nu, unsigned hd_factor, bool dump)  // static
3776 {
3777     unsigned ndim = cdf->shape.size();
3778     assert( ndim == 2 || ndim == 3 );
3779     unsigned num_items = ndim == 3 ? cdf->shape[0] : 1 ;
3780
3781     assert( hd_factor == 0 || hd_factor == 10 || hd_factor == 20 );
3782     T edge = hd_factor > 0 ? T(1.)/T(hd_factor) : 0. ;
3783
3784     NP* icdf = new NP(cdf->dtype, num_items, nu, hd_factor == 0 ? 1 : 4 );
3785     T* vv = icdf->values<T>();
3786
3787     unsigned ni = icdf->shape[0] ;
3788     unsigned nj = icdf->shape[1] ;
3789     unsigned nk = icdf->shape[2] ;
3790
3791     if(dump) std::cout
3792         << "NP::MakeICDF"
3793         << " nu " << nu
3794         << " ni " << ni
3795         << " nj " << nj
3796         << " nk " << nk
3797         << " hd_factor " << hd_factor
3798         << " ndim " << ndim
3799         << " icdf " << icdf->sstr()
3800         << std::endl
3801         ;
3802
3803     for(unsigned i=0 ; i < ni ; i++)
3804     {
3805         INT item = i ;
3806         if(dump) std::cout << "NP::MakeICDF" << " item " << item << std::endl ;
3807
3808         for(unsigned j=0 ; j < nj ; j++)
3809         {
3810             T y_all = T(j)/T(nj) ; //        // 0 -> (nj-1)/nj = 1-1/nj
3811             T x_all = cdf->pdomain<T>( y_all, item );
3812
3813 #ifdef DEBUG
3814             std::cout
3815                 <<  " y_all " << std::setw(10) << std::setprecision(4) << std::fixed << y_all
3816                 <<  " x_all " << std::setw(10) << std::setprecision(4) << std::fixed << x_all
3817                 << std::endl
3818                 ;
3819 #endif
3820             unsigned offset = i*nj*nk+j*nk ;
3821
3822             vv[offset+0] = x_all ;
3823
3824             if( hd_factor > 0 )
3825             {
3826                 T y_lhs = T(j)/T(hd_factor*nj) ;
3827                 T y_rhs = T(1.) - edge + T(j)/T(hd_factor*nj) ;
3828
3829                 T x_lhs = cdf->pdomain<T>( y_lhs, item );
3830                 T x_rhs = cdf->pdomain<T>( y_rhs, item );
3831
3832                 vv[offset+1] = x_lhs ;
3833                 vv[offset+2] = x_rhs ;
3834                 vv[offset+3] = 0. ;
3835             }
3836         }
3837     }
3838     return icdf ;
3839 }
3840
3841 /**
3842 NP::MakeProperty
3843 -----------------
3844
3845 For hd_factor=0 converts a one dimensional array of values with shape (ni,)
3846 into 2d array of shape (ni, 2) with the domain a range of values
3847 from 0 -> (ni-1)/ni = 1-1/ni
3848 Thinking in one dimensional terms that means that values and
3849 corresponding domains get interleaved.
3850 The resulting property array can then be used with NP::pdomain or NP::interp.
3851
3852 For hd_factor=10 or hd_factor=20 the input array is required to have shape (ni,4) or (ni,nj,4)
3853 where "all" is in payload slot 0 and lhs and rhs high resolution zooms are in
3854 payload slots 1 and 2.  (Slot 3 is currently spare, normally containing zero).
3855
3856 The output array has an added dimension with shape  (ni,4,2)
3857 adding domain values interleaved with the values.
3858 The domain values follow the hd_factor convention of scaling the resolution
3859 in the 1/hd_factor tails
3860
3861
3862 **/
3863
3864 template <typename T> NP* NP::MakeProperty(const NP* a, unsigned hd_factor ) // static
3865 {
3866     NP* prop = nullptr ;
3867     unsigned ndim = a->shape.size();
3868     assert( ndim == 1 || ndim == 2 || ndim == 3 );
3869
3870     if( ndim == 1 )
3871     {
3872         assert( hd_factor == 0 );
3873
3874         unsigned ni = a->shape[0] ;
3875         unsigned nj = 2 ;
3876         prop = NP::Make<T>(ni, nj) ;
3877         T* prop_v = prop->values<T>();
3878         for(unsigned i=0 ; i < ni ; i++)
3879         {
3880             prop_v[nj*i+0] = T(i)/T(ni) ;  // 0 -> (ni-1)/ni = 1-1/ni
3881             prop_v[nj*i+1] = a->get<T>(i) ;
3882         }
3883     }
3884     else if( ndim == 2 )
3885     {
3886         assert( hd_factor == 10 || hd_factor == 20 );
3887         T edge = 1./T(hd_factor) ;
3888         unsigned ni = a->shape[0] ;
3889         unsigned nj = a->shape[1] ; assert( nj == 4 );
3890         unsigned nk = 2 ;
3891
3892         prop = NP::Make<T>(ni, nj, nk) ;
3893         T* prop_v = prop->values<T>();
3894
3895         for(unsigned i=0 ; i < ni ; i++)
3896         {
3897             T u_all =  T(i)/T(ni) ;
3898             T u_lhs =  T(i)/T(hd_factor*ni) ;
3899             T u_rhs =  1. - edge + T(i)/T(hd_factor*ni) ;
3900             T u_spa =  0. ;
3901
3902             for(unsigned j=0 ; j < nj ; j++)   // 0,1,2,3
3903             {
3904                 unsigned k;
3905                 k=0 ;
3906                 switch(j)
3907                 {
3908                     case 0:prop_v[nk*nj*i+nk*j+k] = u_all ; break ;
3909                     case 1:prop_v[nk*nj*i+nk*j+k] = u_lhs ; break ;
3910                     case 2:prop_v[nk*nj*i+nk*j+k] = u_rhs ; break ;
3911                     case 3:prop_v[nk*nj*i+nk*j+k] = u_spa ; break ;
3912                 }
3913                 k=1 ;
3914                 prop_v[nk*nj*i+nk*j+k] = a->get<T>(i,j) ;
3915             }
3916         }
3917     }
3918     else if( ndim == 3 )
3919     {
3920         assert( hd_factor == 10 || hd_factor == 20 );
3921         T edge = 1./T(hd_factor) ;
3922         unsigned ni = a->shape[0] ;
3923         unsigned nj = a->shape[1] ;
3924         unsigned nk = a->shape[2] ; assert( nk == 4 );   // hd_factor convention
3925         unsigned nl = 2 ;
3926
3927         prop = NP::Make<T>(ni, nj, nk, nl) ;
3928
3929         for(unsigned i=0 ; i < ni ; i++)
3930         {
3931             for(unsigned j=0 ; j < nj ; j++)
3932             {
3933                 T u_all =  T(j)/T(nj) ;
3934                 T u_lhs =  T(j)/T(hd_factor*nj) ;
3935                 T u_rhs =  1. - edge + T(j)/T(hd_factor*nj) ;
3936                 T u_spa =  0. ;
3937
3938                 for(unsigned k=0 ; k < nk ; k++)   // 0,1,2,3
3939                 {
3940                     unsigned l ;
3941                     l=0 ;
3942                     switch(k)
3943                     {
3944                         case 0:prop->set<T>(u_all, i,j,k,l) ; break ;
3945                         case 1:prop->set<T>(u_lhs, i,j,k,l) ; break ;
3946                         case 2:prop->set<T>(u_rhs, i,j,k,l) ; break ;
3947                         case 3:prop->set<T>(u_spa, i,j,k,l) ; break ;
3948                     }
3949                     l=1 ;
3950                     prop->set<T>( a->get<T>(i,j,k), i,j,k,l );
3951                 }
3952             }
3953         }
3954     }
3955     return prop ;
3956 }
3957
3958 /**
3959 NP::MakeLookupSample
3960 -----------------------
3961
3962 Create a lookup sample of shape (ni,) using the 2d icdf_prop and ni uniform random numbers
3963 Hmm in regions where the CDF is flat (and ICDF is steep), the ICDF lookup does not do very well.
3964 That is the reason for hd_factor, to increase resolution at the extremes where this
3965 issue usually occurs without paying the cost of higher resolution across the entire range.
3966
3967 TODO: compare what this provides directly on the ICDF (using NP::interp)
3968       with what the CDF directly can provide (using NP::pdomain)
3969
3970 **/
3971
3972 template <typename T> NP* NP::MakeLookupSample(const NP* icdf_prop, unsigned ni, unsigned seed, unsigned hd_factor ) // static
3973 {
3974     unsigned ndim = icdf_prop->shape.size() ;
3975     unsigned npay = icdf_prop->shape[ndim-1] ;
3976     assert( npay == 2 );
3977
3978     if(ndim == 2)
3979     {
3980         assert( hd_factor == 0 );
3981     }
3982     else if( ndim == 3 )
3983     {
3984         assert( hd_factor == 10 || hd_factor == 20  );
3985         assert( icdf_prop->shape[1] == 4 );
3986     }
3987
3988     std::mt19937_64 rng;
3989     rng.seed(seed);
3990     std::uniform_real_distribution<T> unif(0, 1);
3991
3992     NP* sample = NP::Make<T>(ni);
3993     T* sample_v = sample->values<T>();
3994     for(unsigned i=0 ; i < ni ; i++)
3995     {
3996         T u = unif(rng) ;
3997         T y = hd_factor > 0 ? icdf_prop->interpHD<T>(u, hd_factor ) : icdf_prop->interp<T>(u) ;
3998         sample_v[i] = y ;
3999     }
4000     return sample ;
4001 }
4002
4003 /**
4004 NP::MakeUniform
4005 ----------------
4006
4007 Create array of uniform random numbers between 0 and 1 using std::mt19937_64
4008
4009 **/
4010
4011 template <typename T> NP* NP::MakeUniform(unsigned ni, unsigned seed) // static
4012 {
4013     std::mt19937_64 rng;
4014     rng.seed(seed);
4015     std::uniform_real_distribution<T> unif(0, 1);
4016
4017     NP* uu = NP::Make<T>(ni);
4018     T* vv = uu->values<T>();
4019     for(unsigned i=0 ; i < ni ; i++) vv[i] = unif(rng) ;
4020     return uu ;
4021 }
4022
4023 inline NP* NP::copy() const
4024 {
4025     return MakeCopy(this);
4026 }
4027
4028
4029
4030
4031 /**
4032 NP::count_if
4033 -------------
4034
4035
4036 **/
4037
4038
4039 template<typename S>
4040 inline NP::INT NP::count_if(std::function<bool(const S*)> predicate) const
4041 {
4042     assert( is_itemtype<S>() );  // size of type same as item_bytes
4043     const S* vv = cvalues<S>();
4044     INT ni = num_items();
4045     INT count = 0 ;
4046     for(INT i=0 ; i < ni ; i++) if(predicate(vv+i)) count += 1 ;
4047     return count ;
4048 }
4049
4050 template<typename T>
4051 inline NP* NP::simple_copy_if(std::function<bool(const T*)> predicate ) const
4052 {
4053     assert( is_itemtype<T>() );  // size of type same as item_bytes
4054
4055     INT ni = num_items();
4056     INT si = count_if<T>(predicate) ;
4057     assert( si <= ni );
4058
4059     const T* aa = cvalues<T>();
4060
4061     NP* b = NP::Make<T>(si) ;
4062     T* bb = b->values<T>();
4063
4064     INT _si = 0 ;
4065     for(INT i=0 ; i < ni ; i++)
4066     {
4067         if(predicate(aa+i))
4068         {
4069             memcpy( bb + _si,  aa+i , sizeof(T) );
4070             _si += 1 ;
4071         }
4072     }
4073     assert( si == _si );
4074     return b ;
4075 }
4076
4077 /**
4078 NP::copy_if
4079 ------------
4080
4081 T
4082     atomic base type (eg float, double, int, uint32_t) used for the input array
4083     which is also used for the sub-array that is created
4084 S
4085     compound type, eg int4, sphoton, sphotonlite etc..
4086
4087
4088 sizeof(S)/sizeof(T)
4089     compond to atomic type ratio
4090
4091
4092 ::
4093
4094     NP* hit     = photon    ->copy_if<float,   sphoton>    (predicate) ;
4095     NP* hitlite = photonlite->copy_if<uint32_t,sphotonlite>(predicate) ;
4096
4097 **/
4098
4099
4100 template<typename T, typename S>
4101 inline NP* NP::copy_if(std::function<bool(const S*)> predicate ) const
4102 {
4103     assert( sizeof(S) >= sizeof(T) );
4104     INT ni = num_items();
4105
4106     INT si = count_if<S>(predicate) ;
4107     INT sj = sizeof(S) / sizeof(T) ;
4108
4109
4110     assert( si <= ni );
4111     std::vector<INT> sh(shape) ;
4112     INT nd = sh.size();
4113
4114     assert( nd > 0 );
4115     sh[0] = si ;
4116
4117     INT itemcheck = 1 ;
4118     for(INT i=1 ; i < nd ; i++) itemcheck *= sh[i] ;
4119
4120     bool sj_expect = itemcheck == sj ;
4121     if(!sj_expect) std::raise(SIGINT) ;
4122     assert( sj_expect );
4123
4124     const S* aa = cvalues<S>();
4125
4126     NP* b = NP::Make_<T>(sh) ;
4127     S* bb = b->values<S>();
4128
4129     INT _si = 0 ;
4130     for(INT i=0 ; i < ni ; i++)
4131     {
4132         if(predicate(aa+i))
4133         {
4134             memcpy( bb + _si,  aa+i , sizeof(S) );
4135             _si += 1 ;
4136         }
4137     }
4138     assert( si == _si );
4139     return b ;
4140 }
4141
4142
4143 /**
4144 NP::flexible_copy_if
4145 ----------------------
4146
4147 S: compound type, eg int4, sphoton, etc..
4148 T: atomic base type use for array, eg int, float, double
4149 Args: variable number of ints used to specify item shape eg (4,4)
4150
4151 If no itemshape is provided used default of (sizeof(S)/sizeof(T),)
4152 For example with sphoton that has size of 16 floats, would use::
4153
4154     NP* hit = photon->copy_if<float,sphoton>(predicate, 4, 4) ;
4155
4156 HMM: as the source array item shape is already available there
4157 there is actually no need for the Args itemshape complication.
4158 Hence named this "flexible"
4159 **/
4160
4161 template<typename T, typename S, typename... Args>
4162 inline NP* NP::flexible_copy_if(std::function<bool(const S*)> predicate, Args ... itemshape ) const
4163 {
4164     assert( sizeof(S) >= sizeof(T) );
4165     INT ni = num_items();
4166
4167     INT si = count_if<S>(predicate) ;
4168     INT sj = sizeof(S) / sizeof(T) ;
4169
4170     assert( si <= ni );
4171
4172     std::vector<INT> itemshape_ = {itemshape...};
4173     std::vector<INT> sh ;
4174     sh.push_back(si) ;
4175
4176     if(itemshape_.size() == 0 )
4177     {
4178         sh.push_back(sj) ;
4179     }
4180     else
4181     {
4182         INT itemcheck = 1 ;
4183         for(INT i=0 ; i < INT(itemshape_.size()) ; i++)
4184         {
4185             sh.push_back(itemshape_[i]) ;
4186             itemcheck *= itemshape_[i] ;
4187         }
4188         assert( itemcheck == sj );
4189     }
4190     const S* aa = cvalues<S>();
4191
4192     NP* b = NP::Make_<T>(sh) ;
4193     S* bb = b->values<S>();
4194
4195     INT _si = 0 ;
4196     for(INT i=0 ; i < ni ; i++)
4197     {
4198         if(predicate(aa+i))
4199         {
4200             memcpy( bb + _si,  aa+i , sizeof(S) );
4201             _si += 1 ;
4202         }
4203     }
4204     assert( si == _si );
4205     return b ;
4206 }
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219 inline NP* NP::LoadIfExists(const char* path_)
4220 {
4221     return NP::Exists(path_) ? NP::Load(path_) : nullptr ;
4222 }
4223
4224
4225 inline NP* NP::Load(const char* path_)
4226 {
4227     const char* path = U::Resolve(path_);
4228     if(VERBOSE)
4229         std::cerr
4230             << "[ NP::Load "
4231             << " path_ [" << ( path_  ? path_ : "-" ) << "]"
4232             << " path [" << ( path ? path : "-" ) << "]"
4233             << " int(strlen(path)) " << ( path ? int(strlen(path)) : -1 )
4234             << std::endl
4235             ;
4236
4237     if(path == nullptr) return nullptr ; // eg when path_ starts with unsetenvvar "$TOKEN"
4238
4239     bool npy_ext = U::EndsWith(path, EXT) ;
4240     NP* a = nullptr ;
4241     if(npy_ext)
4242     {
4243         a  = NP::Load_(path);
4244     }
4245     else
4246     {
4247         std::vector<std::string> nms ;
4248         U::DirList(nms, path, EXT);
4249         std::cout
4250             << "NP::Load"
4251             << " path " << path
4252             << " U::DirList contains nms.size " << nms.size()
4253             << " EXT " << EXT
4254             << std::endl
4255             ;
4256         a = NP::Concatenate(path, nms);
4257     }
4258     if(VERBOSE) std::cerr << "] NP::Load " << path << std::endl ;
4259     return a ;
4260 }
4261
4262 /**
4263 NP::LoadSlice
4264 ---------------
4265
4266 _path
4267     can start with envvar token
4268 _sli
4269     slice string as shown below OR path to an integer "where" array
4270     with indices to be loaded OR can be an envvar token that resolves
4271     to the slice string or the path to the "where" array.
4272
4273     It is now also supported for the slice spec string to have both
4274     the path to the "where" array with suffix that slices into that
4275     where array, eg "/tmp/w54.npy[0:10]".  That will use the first 10
4276     indices from the where array to specify which items from the
4277     primary array to load.
4278
4279 Reads from file into memory only the specified slices using
4280 NP::load_data_sliced which is based on std::ifstream::seekg
4281
4282 Slice (start,stop,step) examples following Python slice indexing::
4283
4284     [::2]
4285     [::100]
4286     [:10]
4287     [5:10]
4288
4289 * -ve step not implemented
4290
4291 **/
4292
4293 inline NP* NP::LoadSlice(const char* _path, const char* _sli)
4294 {
4295     const char* path = U::Resolve(_path);
4296     if(path == nullptr) return nullptr ; // eg when _path starts with unsetenvvar "$TOKEN"
4297     bool npy_ext = U::EndsWith(path, EXT) ;
4298     if(!npy_ext) return nullptr ;
4299
4300
4301     NP* a = nullptr ;
4302     if( _sli && strlen(_sli) > 1 )
4303     {
4304         bool starts_with_dollar = _sli[0] == '$' ;
4305         const char* sli = starts_with_dollar ? U::GetEnv(_sli+1, nullptr) : _sli ;
4306         a = NP::LoadSlice_(path, sli);
4307     }
4308     else
4309     {
4310         a = NP::Load_(path);
4311     }
4312     return a ;
4313 }
4314
4315
4316 template<typename T>
4317 inline NP* NP::LoadThenSlice( const char* _path, const char* _sel )
4318 {
4319     const char* path = U::Resolve(_path);
4320     if(!Exists(path)) return nullptr ;
4321
4322     NP* __a = NP::Load(path) ;
4323     NP* _a = __a ? NP::MakeNarrowIfWide(__a) : nullptr  ;
4324
4325     const char* sel = nullptr ;
4326     if(_sel && strlen(_sel ) > 1)
4327     {
4328         bool starts_with_dollar = _sel[0] == '$' ;
4329         sel = starts_with_dollar ? U::GetEnv(_sel+1, nullptr) : _sel ;
4330         if(VERBOSE) std::cout
4331             << "NP::LoadThenSlice"
4332             << " starts_with_dollar " << ( starts_with_dollar ? "YES" : "NO " )
4333             << " _sel [" << ( _sel ? _sel : "-" ) << "]"
4334             << " sel ["  << ( sel ? sel : "-" ) << "]"
4335             << "\n"
4336             ;
4337     }
4338     NP* a = sel ? NP::MakeSliceSelection<T>(_a, sel) : _a ;
4339     return a ;
4340 }
4341
4342
4343
4344
4345 /**
4346 NP::ExistsArrayFolder
4347 ----------------------
4348
4349 Returns true if the path does not end in .npy
4350 and is a directory folder containing one or more .npy files.
4351
4352 **/
4353
4354 inline bool NP::ExistsArrayFolder(const char* path )
4355 {
4356     bool npy_ext = U::EndsWith(path, EXT) ;
4357     if(npy_ext) return false ;
4358     std::vector<std::string> nms ;
4359     U::DirList(nms, path, EXT);
4360     return nms.size() > 0 ;
4361 }
4362
4363
4364
4365
4366
4367 inline NP* NP::Load_(const char* path) // static
4368 {
4369     if(!path) return nullptr ;
4370     NP* a = new NP() ;
4371     INT rc = a->load(path, nullptr) ;
4372     return rc == 0 ? a  : nullptr ;
4373 }
4374
4375 inline NP* NP::LoadFromBuffer_(const char* buffer, size_t size ) // static
4376 {
4377     if(!buffer || size < 128) return nullptr ;
4378     NP* a = new NP() ;
4379     INT rc = a->load_from_buffer(buffer, size) ;
4380     return rc == 0 ? a  : nullptr ;
4381 }
4382
4383
4384
4385 /**
4386 NP::LoadSlice_
4387 -----------------
4388
4389 Invoked from NP::LoadSlice. Any envvar tokens
4390 in the initial arguments should have been
4391 resolved at this stage.
4392
4393 **/
4394
4395
4396 inline NP* NP::LoadSlice_(const char* path, const char* sli)
4397 {
4398     if(!path) return nullptr ;
4399     NP* a = new NP() ;
4400     INT rc = a->load(path, sli) ;
4401     return rc == 0 ? a  : nullptr ;
4402 }
4403
4404
4405
4406 inline NP* NP::Load(const char* dir, const char* name)
4407 {
4408     if(!dir) return nullptr ;
4409     std::string path = U::form_path(dir, name);
4410     return Load(path.c_str());
4411 }
4412
4413 inline NP* NP::Load(const char* dir, const char* reldir, const char* name)
4414 {
4415     if(!dir) return nullptr ;
4416     std::string path = U::form_path(dir, reldir, name);
4417     return Load(path.c_str());
4418 }
4419
4420 /**
4421 NP::LoadWide
4422 --------------
4423
4424 Loads array and widens it to 8 bytes per element if not already wide.
4425
4426 **/
4427
4428 inline NP* NP::LoadWide(const char* dir, const char* reldir, const char* name)
4429 {
4430     if(!dir) return nullptr ;
4431     std::string path = U::form_path(dir, reldir, name);
4432     return LoadWide(path.c_str());
4433 }
4434
4435 inline NP* NP::LoadWide(const char* dir, const char* name)
4436 {
4437     if(!dir) return nullptr ;
4438     std::string path = U::form_path(dir, name);
4439     return LoadWide(path.c_str());
4440 }
4441
4442 inline NP* NP::LoadWide(const char* path)
4443 {
4444     if(!path) return nullptr ;
4445     NP* a = NP::Load(path);
4446
4447     assert( a->uifc == 'f' && ( a->ebyte == 8 || a->ebyte == 4 ));
4448     // cannot think of application for doing this with  ints, so restrict to float OR double
4449
4450     NP* b = a->ebyte == 8 ? NP::MakeCopy(a) : NP::MakeWide(a) ;
4451
4452     a->clear();
4453
4454     return b ;
4455 }
4456
4457 /**
4458 NP::LoadNarrow
4459 ---------------
4460
4461 Loads array and narrows to 4 bytes per element if not already narrow.
4462
4463 **/
4464
4465 inline NP* NP::LoadNarrow(const char* dir, const char* reldir, const char* name)
4466 {
4467     if(!dir) return nullptr ;
4468     std::string path = U::form_path(dir, reldir, name);
4469     return LoadNarrow(path.c_str());
4470 }
4471 inline NP* NP::LoadNarrow(const char* dir, const char* name)
4472 {
4473     if(!dir) return nullptr ;
4474     std::string path = U::form_path(dir, name);
4475     return LoadNarrow(path.c_str());
4476 }
4477 inline NP* NP::LoadNarrow(const char* path)
4478 {
4479     if(!path) return nullptr ;
4480     NP* a = NP::Load(path);
4481
4482     assert( a->uifc == 'f' && ( a->ebyte == 8 || a->ebyte == 4 ));
4483     // cannot think of application for doing this with  ints, so restrict to float OR double
4484
4485     NP* b = a->ebyte == 4 ? NP::MakeCopy(a) : NP::MakeNarrow(a) ;
4486
4487     a->clear();
4488
4489     return b ;
4490 }
4491
4492 /**
4493 NP::find_value_index
4494 ---------------------
4495
4496 **/
4497
4498 template<typename T> inline NP::INT NP::find_value_index(T value, T epsilon) const
4499 {
4500     const T* vv = cvalues<T>();
4501     unsigned ni = shape[0] ;
4502     unsigned ndim = shape.size() ;
4503     INT idx = -1 ;
4504     if(ndim == 1)
4505     {
4506         for(unsigned i=0 ; i < ni ; i++)
4507         {
4508             T v = vv[i];
4509             if(std::abs(v-value) < epsilon)
4510             {
4511                 idx = i ;
4512                 break ;
4513             }
4514         }
4515     }
4516     return idx ;
4517 }
4518
4519 /**
4520 NP::ifind2D
4521 ------------
4522
4523 Consider a 2D array of integers of shape (ni, nj).
4524 Look for *ival* in the *jcol* column of each of the *ni* items
4525 and return the corresponding *vret* from the *jret* column
4526 or the *i* index if *jret* is -1.
4527
4528 ::
4529
4530     In [2]: a
4531     Out[2]:
4532     array([[ 0,  1,  2,  3],
4533            [ 4,  5,  6,  7],
4534            [ 8,  9, 10, 11],
4535            [12, 13, 14, 15],
4536            [16, 17, 18, 19],
4537            [20, 21, 22, 23],
4538            [24, 25, 26, 27],
4539            [28, 29, 30, 31],
4540            [32, 33, 34, 35],
4541            [36, 37, 38, 39]], dtype=int32)
4542
4543 ::
4544
4545     NP* a = NP::Make<int>(10,4) ;
4546     a->fillIndexFlat();
4547     a->save("/tmp/a.npy");
4548
4549     int ival = 4 ;  // value to look for
4550     int jcol = 0 ;  // column in which to look for ival
4551     int jret = 3 ;  // column to return -> 7
4552
4553     int vret = a->ifind2D<int>(ival, jcol, jret );
4554
4555     std::cout << " vret " << vret << std::endl ;
4556     assert( vret == 7 );
4557
4558 **/
4559
4560 template<typename T> inline T NP::ifind2D(T ivalue, INT jcol, INT jret ) const
4561 {
4562     if( shape.size() != 2 ) return -2 ;
4563
4564     INT ni = shape[0] ;
4565     INT nj = shape[1] ;
4566
4567     if( jcol >= nj ) return -3 ;
4568     if( jret >= nj ) return -4 ;
4569
4570     const T* vv = cvalues<T>();
4571
4572     T vret = -1 ;
4573
4574     for(INT i=0 ; i < ni ; i++)
4575     {
4576         T vcol = vv[i*nj+jcol];
4577         bool match = vcol == ivalue ;
4578
4579         T cand = jret < 0 ? i : vv[i*nj+jret];
4580
4581         if(VERBOSE) std::cout
4582            << "NP::ifind2D"
4583            << " i " << i
4584            << " vcol " << vcol
4585            << " cand " << cand
4586            << " match " << match
4587            << std::endl
4588            ;
4589
4590         if(match)
4591         {
4592             vret = cand ;
4593             break ;
4594         }
4595     }
4596     return vret ;
4597 }
4598
4599
4600
4601
4602 inline bool NP::is_pshaped() const
4603 {
4604     bool property_shaped = shape.size() == 2 && shape[1] == 2 && shape[0] > 1 ;
4605     return property_shaped ;
4606 }
4607
4608 template<typename T>
4609 inline bool NP::is_pconst() const
4610 {
4611     if(!is_pshaped()) return false ;
4612     const T* vv = cvalues<T>();
4613     INT ni = shape[0] ;
4614     INT nj = shape[1] ;
4615     const T v0 = vv[0*nj+nj-1] ;
4616     INT num_equal = 0 ;
4617     for(INT i=0 ; i < ni ; i++) num_equal += vv[i*nj+nj-1] == v0 ? 1 : 0 ;
4618     return num_equal == ni ;
4619 }
4620
4621 /**
4622 NP::is_pconst_dumb
4623 -------------------
4624
4625 A dumb property is one that uses more than two energy points
4626 to represent a constant value.
4627
4628 **/
4629 template<typename T>
4630 inline bool NP::is_pconst_dumb() const
4631 {
4632     return is_pconst<T>() && shape[0] > 2 ;
4633 }
4634
4635
4636 template<typename T>
4637 inline T NP::pconst(T fallback) const
4638 {
4639     if(!is_pconst<T>()) return fallback ;
4640     INT nj = shape[1] ;
4641     const T* vv = cvalues<T>();
4642     const T v0 = vv[0*nj+nj-1] ;
4643     return v0 ;
4644 }
4645
4646 template<typename T>
4647 inline NP* NP::MakePCopyNotDumb(const NP* a) // static
4648 {
4649     assert( a && a->is_pshaped() );
4650     NP* r = nullptr ;
4651
4652     if(a->is_pconst_dumb<T>())
4653     {
4654         T dl = a->plhs<T>(0) ;
4655         T dr = a->prhs<T>(0) ;
4656         T vc = a->pconst<T>(-1) ;
4657         r = MakePConst<T>(dl, dr, vc );
4658     }
4659     else
4660     {
4661         r = MakeCopy(a);
4662     }
4663     return r ;
4664 }
4665
4666 template<typename T>
4667 inline NP* NP::MakePConst( T dl, T dr, T vc ) // static
4668 {
4669     INT ni = 2 ;
4670     INT nj = 2 ;
4671
4672     NP* p = NP::Make<T>(ni,nj) ;
4673     T*  pp = p->values<T>() ;
4674
4675     pp[0*nj+0] = dl ;
4676     pp[0*nj+1] = vc ;
4677     pp[1*nj+0] = dr ;
4678     pp[1*nj+1] = vc ;
4679
4680     return p ;
4681 }
4682
4683
4684
4685
4686
4687 template<typename T> inline T NP::plhs(unsigned column) const
4688 {
4689     const T* vv = cvalues<T>();
4690
4691     unsigned ndim = shape.size() ;
4692     assert( ndim == 1 || ndim == 2);
4693
4694     unsigned nj = ndim == 1 ? 1 : shape[1] ;
4695     assert( column < nj );
4696
4697     const T lhs = vv[nj*(0)+column] ;
4698     return lhs ;
4699 }
4700
4701
4702 template<typename T> inline T NP::prhs(unsigned column) const
4703 {
4704     const T* vv = cvalues<T>();
4705
4706     unsigned ndim = shape.size() ;
4707     unsigned ni = shape[0] ;
4708     unsigned nj = ndim == 1 ? 1 : shape[1] ;
4709     assert( column < nj );
4710
4711     const T rhs = vv[nj*(ni-1)+column] ;
4712
4713 #ifdef DEBUG
4714     /*
4715     std::cout
4716         << "NP::prhs"
4717         << " column " << std::setw(3) << column
4718         << " ndim " << std::setw(3) << ndim
4719         << " ni " << std::setw(3) << ni
4720         << " nj " << std::setw(3) << nj
4721         << " rhs " << std::setw(10) << std::fixed << std::setprecision(4) << rhs
4722         << std::endl
4723         ;
4724      */
4725 #endif
4726
4727     return rhs ;
4728 }
4729
4730
4731
4732 /**
4733 NP::pfindbin
4734 ---------------
4735
4736 Return *ibin* index of bin corresponding to the argument value.
4737
4738 +---------------------+------------------+----------------------+
4739 |  condition          |   ibin           |  in_range            |
4740 +=====================+==================+======================+
4741 |  value < lhs        |   0              |   false              |
4742 +---------------------+------------------+----------------------+
4743 |  value == lhs       |   1              |   true               |
4744 +---------------------+------------------+----------------------+
4745 |  lhs < value < rhs  |   1 .. ni-1      |   true               |
4746 +---------------------+------------------+----------------------+
4747 |  value == rhs       |   ni             |   false              |
4748 +---------------------+------------------+----------------------+
4749 |  value > rhs        |   ni             |   false              |
4750 +---------------------+------------------+----------------------+
4751
4752 Example indices for bins array of shape (4,) with 3 bins and 4 values (ni=4)
4753 This numbering scheme matches that used by np.digitize::
4754
4755
4756                 +-------------+--------------+-------------+
4757                 |             |              |             |
4758                 |             |              |             |
4759                 +-------------+--------------+-------------+
4760               0        1             2               3            4
4761
4762                 lhs                                       rhs
4763
4764 **/
4765
4766 template<typename T> inline NP::INT  NP::pfindbin(const T value, unsigned column, bool& in_range) const
4767 {
4768     const T* vv = cvalues<T>();
4769
4770     unsigned ndim = shape.size() ;
4771     unsigned ni = shape[0] ;
4772     unsigned nj = ndim == 1 ? 1 : shape[1] ;
4773     assert( column < nj );
4774
4775     const T lhs = vv[nj*(0)+column] ;
4776     const T rhs = vv[nj*(ni-1)+column] ;
4777
4778     INT ibin = -1 ;
4779     in_range = false ;
4780     if( value < lhs )         // value==lhs is in_range
4781     {
4782         ibin = 0 ;
4783     }
4784     else if( value >= rhs )   // value==rhs is NOT in_range
4785     {
4786         ibin = ni ;
4787     }
4788     else if ( value >= lhs && value < rhs )
4789     {
4790         in_range = true ;
4791         for(unsigned i=0 ; i < ni-1 ; i++)
4792         {
4793             const T v0 = vv[nj*(i+0)+column] ;
4794             const T v1 = vv[nj*(i+1)+column] ;
4795             if( value >= v0 && value < v1 )
4796             {
4797                  ibin = i + 1 ;  // maximum i from here is: ni-1-1 -> max ibin is ni-1
4798                  break ;
4799             }
4800         }
4801     }
4802     return ibin ;
4803 }
4804
4805
4806
4807
4808
4809 /**
4810 NP::get_edges
4811 ----------------
4812
4813 Return bin edges using numbering convention from NP::pfindbin,
4814 for out of range ibin == 0 returns lhs edge for both lo and hi
4815 for out of range ibin = ni returns rhs edge for both lo and hi.
4816
4817 **/
4818
4819 template<typename T> inline void  NP::get_edges(T& lo, T& hi, unsigned column, INT ibin) const
4820 {
4821     const T* vv = cvalues<T>();
4822
4823     unsigned ndim = shape.size() ;
4824     unsigned ni = shape[0] ;
4825     unsigned nj = ndim == 1 ? 1 : shape[1] ;
4826     assert( column < nj );
4827
4828     const T lhs = vv[nj*(0)+column] ;
4829     const T rhs = vv[nj*(ni-1)+column] ;
4830
4831     if( ibin == 0 )
4832     {
4833         lo = lhs ;
4834         hi = lhs ;
4835     }
4836     else if( ibin == INT(ni) )
4837     {
4838         lo = rhs ;
4839         hi = rhs ;
4840     }
4841     else
4842     {
4843         unsigned i = ibin - 1 ;
4844         lo  = vv[nj*(i)+column] ;
4845         hi  = vv[nj*(i+1)+column] ;
4846     }
4847 }
4848
4849
4850
4851 template<typename T> inline T  NP::psum(unsigned column) const
4852 {
4853     const T* vv = cvalues<T>();
4854     unsigned ni = shape[0] ;
4855     unsigned ndim = shape.size() ;
4856     unsigned nj = ndim == 1 ? 1 : shape[1] ;
4857     assert( column < nj );
4858
4859     T sum = 0. ;
4860     for(unsigned i=0 ; i < ni ; i++) sum += vv[nj*i+column] ;
4861     return sum ;
4862 }
4863
4864 template<typename T> inline void NP::pscale_add(T scale, T add, unsigned column)
4865 {
4866     assert( is_pshaped() );
4867     assert( column < 2 );
4868     T* vv = values<T>();
4869     unsigned ni = shape[0] ;
4870     for(unsigned i=0 ; i < ni ; i++) vv[2*i+column] = vv[2*i+column]*scale + add ;  ;
4871 }
4872
4873 template<typename T> inline void NP::pscale(T scale, unsigned column)
4874 {
4875     pscale_add(scale, T(0.), column );
4876 }
4877
4878
4879
4880 template<typename T> inline void NP::pdump(const char* msg, T d_scale, T v_scale) const
4881 {
4882     bool property_shaped = is_pshaped();
4883     assert( property_shaped );
4884
4885     unsigned ni = shape[0] ;
4886     std::cout
4887         << msg
4888         << " ni " << ni
4889         << " d_scale "
4890         << std::fixed << std::setw(10) << std::setprecision(5) << d_scale
4891         << " v_scale "
4892         << std::fixed << std::setw(10) << std::setprecision(5) << v_scale
4893         << std::endl
4894         ;
4895
4896     const T* vv = cvalues<T>();
4897
4898     for(unsigned i=0 ; i < ni ; i++)
4899     {
4900         std::cout
4901              << " i " << std::setw(3) << i
4902              << " d " << std::fixed << std::setw(10) << std::setprecision(5) << vv[2*i+0]*d_scale
4903              << " v " << std::fixed << std::setw(10) << std::setprecision(5) << vv[2*i+1]*v_scale
4904              << std::endl
4905              ;
4906     }
4907 }
4908
4909 /**
4910 NP::minmax
4911 ------------
4912
4913 Finds minimum and maximum values of column j, assuming a 2d array,
4914 by looping over the first array dimension and comparing all values.
4915
4916 **/
4917
4918 template<typename T> inline void NP::minmax(T& mn, T&mx, unsigned j, INT item ) const
4919 {
4920     unsigned ndim = shape.size() ;
4921     assert( ndim == 2 || ndim == 3);
4922
4923     unsigned ni = shape[ndim-2] ;
4924     unsigned nj = shape[ndim-1] ;
4925     assert( j < nj );
4926
4927     unsigned num_items = ndim == 3 ? shape[0] : 1 ;
4928     assert( item < INT(num_items) );
4929     unsigned item_offset = item == -1 ? 0 : ni*nj*item ;
4930     const T* vv = cvalues<T>() + item_offset ;  // shortcut approach to handling multiple items
4931
4932     mn = std::numeric_limits<T>::max() ;
4933     mx = std::numeric_limits<T>::min() ;
4934
4935     for(unsigned i=0 ; i < ni ; i++)
4936     {
4937         T v = vv[nj*i+j] ;
4938         if( v > mx ) mx = v;
4939         if( v < mn ) mn = v;
4940     }
4941 }
4942
4943
4944
4945 /**
4946 NP::minmax2D_reshaped<N,T>
4947 --------------------------
4948
4949 1. Temporarily change shape to (-1,N) : ie array of items with N element of type T
4950 2. invoked minmax2D determining value range of the items
4951 3. return the shape back to the original
4952
4953 Consider array of shape (1000,32,4,4) with (position,time) in [:,:,0]
4954 After reshaping that becomes (1000*32*4, 4 )
4955 BUT only every fourth 4-plet is (position, time)
4956
4957 So (item_stride, item_offset) needs to be (4,0) where the
4958 item is the 4-plet chosen with the N template parameter.
4959
4960 Usage example::
4961
4962     int item_stride = 4 ;
4963     int item_offset = 0 ;
4964     record->minmax2D_reshaped<4,float>(&mn.x, &mx.x, item_stride, item_offset );
4965     // actually better to use sphoton::MinMaxPost in this case to avoid unfilled zeros
4966     // impacting the ranges
4967
4968
4969 **/
4970 template<int N, typename T> inline void NP::minmax2D_reshaped(T* mn, T* mx, INT item_stride, INT item_offset )
4971 {
4972     std::vector<INT> sh = shape ;
4973     change_shape(-1,N);
4974
4975     assert( shape.size() == 2 );
4976     [[maybe_unused]] INT ni = shape[0] ;
4977     [[maybe_unused]] INT nj = shape[1] ;
4978     assert( nj == N && ni > 0 );
4979
4980     minmax2D<T>(mn, mx, item_stride, item_offset );
4981
4982     reshape(sh);
4983 }
4984
4985 /**
4986 NP::minmax2D
4987 -------------
4988
4989 Assuming shape (-1, N) where N is typically small (eg 4)
4990 and the mn, mx arguments point to structures
4991 with at least N elements.
4992
4993 **/
4994
4995 template<typename T> inline void NP::minmax2D(T* mn, T* mx, INT item_stride, INT item_offset ) const
4996 {
4997     assert( shape.size() == 2 );
4998     INT ni = shape[0] ;
4999     INT nj = shape[1] ;
5000
5001     for(INT j=0 ; j < nj ; j++) mn[j] = std::numeric_limits<T>::max() ;
5002     for(INT j=0 ; j < nj ; j++) mx[j] = std::numeric_limits<T>::lowest() ; // largest negative
5003
5004     const T* vv = cvalues<T>() ;
5005     for(INT i=0 ; i < ni ; i++)
5006     {
5007         if( i % item_stride != item_offset ) continue ;
5008         for(INT j=0 ; j < nj ; j++)
5009         {
5010             INT idx = i*nj + j ;
5011             if( vv[idx] < mn[j] ) mn[j] = vv[idx] ;
5012             if( vv[idx] > mx[j] ) mx[j] = vv[idx] ;
5013         }
5014     }
5015 }
5016
5017
5018 /**
5019 NP::linear_crossings
5020 ------------------------
5021
5022 As linearly interpolated properties eg RINDEX using NP::interp
5023 are piecewise linear functions it is possible to find the
5024 crossings between such functions and constant values
5025 without using optimization. Simply observing sign changes to identify
5026 crossing bins and then some linear calc provides the roots::
5027
5028
5029                   (x1,v1)
5030                    *
5031                   /
5032                  /
5033                 /
5034         -------?(x,v)----    v = 0    when values are "ri_ - BetaInverse"
5035               /
5036              /
5037             /
5038            /
5039           *
5040         (x0,v0)
5041
5042
5043          Only x is unknown
5044
5045
5046               v1 - v        v - v0
5047              ----------  =  ----------
5048               x1 - x        x - x0
5049
5050
5051            v1 (x - x0 ) =  -v0  (x1 - x )
5052
5053            v1.x - v1.x0 = - v0.x1 + v0.x
5054
5055            ( v1 - v0 ) x = v1*x0 - v0*x1
5056
5057
5058                          v1*x0 - v0*x1
5059                x    =   -----------------
5060                           ( v1 - v0 )
5061
5062
5063 Developed in opticks/ana/rindex.py for an attempt to developing inverse transform Cerenkov RINDEX
5064 sampling.
5065
5066 **/
5067
5068 template<typename T> inline void NP::linear_crossings( T value, std::vector<T>& crossings ) const
5069 {
5070     assert( shape.size() == 2 && shape[1] == 2 && shape[0] > 1);
5071     unsigned ni = shape[0] ;
5072     const T* vv = cvalues<T>();
5073     crossings.clear();
5074
5075     for(unsigned i=0 ; i < ni-1 ; i++ )
5076     {
5077         T x0 = vv[2*(i+0)+0] ;
5078         T x1 = vv[2*(i+1)+0] ;
5079         T v0 = value - vv[2*(i+0)+1] ;
5080         T v1 = value - vv[2*(i+1)+1] ;
5081         if( v0*v1 < T(0.))
5082         {
5083             T x = (v1*x0 - v0*x1)/(v1-v0) ;
5084             //printf("i %d x0 %6.4f x1 %6.4f v0 %6.4f v1 %6.4f x %6.4f \n", i, x0,x1,v0,v1,x) ;
5085             crossings.push_back(x) ;
5086         }
5087     }
5088 }
5089
5090 /**
5091 NP::trapz
5092 -----------
5093
5094 Composite trapezoidal numerical integration
5095
5096 * https://en.wikipedia.org/wiki/Trapezoidal_rule
5097
5098 **/
5099
5100 template<typename T> inline NP* NP::trapz() const
5101 {
5102     assert( shape.size() == 2 && shape[1] == 2 && shape[0] > 1);
5103     unsigned ni = shape[0] ;
5104     T half(0.5);
5105     T xmn = get<T>(0, 0);
5106
5107     NP* integral = NP::MakeLike(this);
5108     T* integral_v = integral->values<T>();
5109     integral_v[0] = xmn ;
5110     integral_v[1] = 0. ;
5111
5112     for(unsigned i=0 ; i < ni-1 ; i++)
5113     {
5114         T x0 = get<T>(i, 0);
5115         T y0 = get<T>(i, 1);
5116
5117         T x1 = get<T>(i+1, 0);
5118         T y1 = get<T>(i+1, 1);
5119
5120 #ifdef DEBUG
5121         std::cout
5122             << " x0 " << std::setw(10) << std::fixed << std::setprecision(4) << x0
5123             << " y0 " << std::setw(10) << std::fixed << std::setprecision(4) << y0
5124             << " x1 " << std::setw(10) << std::fixed << std::setprecision(4) << x1
5125             << " y1 " << std::setw(10) << std::fixed << std::setprecision(4) << y1
5126             << std::endl
5127             ;
5128 #endif
5129         integral_v[2*(i+1)+0] = x1 ;  // x0 of first bin covered with xmn
5130         integral_v[2*(i+1)+1] = integral_v[2*(i+0)+1] + (x1 - x0)*(y0 + y1)*half ;
5131     }
5132     return integral ;
5133 }
5134
5135 template<typename T> void NP::psplit(std::vector<T>& dom, std::vector<T>& val) const
5136 {
5137     unsigned nv = num_values() ;
5138     const T* vv = cvalues<T>() ;
5139
5140     assert( nv %  2 == 0 );
5141     unsigned entries = nv/2 ;
5142
5143     dom.resize(entries);
5144     val.resize(entries);
5145
5146     for(unsigned i=0 ; i < entries ; i++)
5147     {
5148         dom[i] = vv[2*i+0] ;
5149         val[i] = vv[2*i+1] ;
5150     }
5151 }
5152
5153
5154
5155
5156 /**
5157 NP::pdomain
5158 -------------
5159
5160 Returns the domain (eg energy or wavelength) corresponding
5161 to the property value argument.
5162
5163 Requires arrays of shape (num_dom, 2) when item is at default value of -1
5164
5165 Also support arrys of shape (num_item, num_dom, 2 ) when item is used to pick the item.
5166
5167
5168
5169                                                         1  (x1,y1)     (  binVector[bin+1], dataVector[bin+1] )
5170                                                        /
5171                                                       /
5172                                                      *  ( xv,yv )       ( res, aValue )
5173                                                     /
5174                                                    /
5175                                                   0  (x0,y0)          (  binVector[bin], dataVector[bin] )
5176
5177
5178               Similar triangles::
5179
5180                  xv - x0       x1 - x0
5181                ---------- =   -----------
5182                  yv - y0       y1 - y0
5183
5184                                                    x1 - x0
5185                    xv  =    x0  +   (yv - y0) *  --------------
5186                                                    y1 - y0
5187
5188 **/
5189
5190 template<typename T> inline T  NP::pdomain(const T value, INT item, bool dump ) const
5191 {
5192     const T zero = 0. ;
5193     unsigned ndim = shape.size() ;
5194     assert( ndim == 2 || ndim == 3 );
5195     unsigned ni = shape[ndim-2];
5196     unsigned nj = shape[ndim-1];
5197
5198     assert( nj <= 8 );        // not needed for below, just for sanity of payload
5199     unsigned jdom = 0 ;       // 1st payload slot is "domain"
5200     unsigned jval = nj - 1 ;  // last payload slot is "value"
5201     // note that with nj > 2 this allows other values to be carried
5202
5203     unsigned num_items = ndim == 3 ? shape[0] : 1 ;
5204     assert( item < INT(num_items) );
5205     unsigned item_offset = item == -1 ? 0 : ni*nj*item ;   // using item = 0 will have the same effect
5206
5207     const T* vv = cvalues<T>() + item_offset ;  // shortcut approach to handling multiple items
5208
5209
5210     const T lhs_dom = vv[nj*(0)+jdom];
5211     const T rhs_dom = vv[nj*(ni-1)+jdom];
5212     bool dom_expect = rhs_dom >= lhs_dom  ;  // allow equal as getting zeros at extremes
5213
5214     const T lhs_val = vv[nj*(0)+jval];
5215     const T rhs_val = vv[nj*(ni-1)+jval];
5216     bool val_expect = rhs_val >= lhs_val ;
5217
5218     if(!dom_expect) std::cout
5219         << "NP::pdomain FATAL dom_expect : rhs_dom > lhs_dom "
5220         << " lhs_dom " << std::setw(10) << std::fixed << std::setprecision(4) << lhs_dom
5221         << " rhs_dom " << std::setw(10) << std::fixed << std::setprecision(4) << rhs_dom
5222         << std::endl
5223         ;
5224     assert( dom_expect );
5225
5226     if(!val_expect) std::cout
5227         << "NP::pdomain FATAL val_expect : rhs_val > lhs_val "
5228         << " lhs_val " << std::setw(10) << std::fixed << std::setprecision(4) << lhs_val
5229         << " rhs_val " << std::setw(10) << std::fixed << std::setprecision(4) << rhs_val
5230         << std::endl
5231         ;
5232     assert( val_expect );
5233
5234     const T yv = value ;
5235     T xv ;
5236     bool xv_set = false ;
5237
5238
5239     if( yv <= lhs_val )
5240     {
5241         xv = lhs_dom ;
5242         xv_set = true ;
5243     }
5244     else if( yv >= rhs_val )
5245     {
5246         xv = rhs_dom  ;
5247         xv_set = true ;
5248     }
5249     else if ( yv >= lhs_val && yv < rhs_val  )
5250     {
5251         for(unsigned i=0 ; i < ni-1 ; i++)
5252         {
5253             const T x0 = vv[nj*(i+0)+jdom] ;
5254             const T y0 = vv[nj*(i+0)+jval] ;
5255             const T x1 = vv[nj*(i+1)+jdom] ;
5256             const T y1 = vv[nj*(i+1)+jval] ;
5257             const T dy = y1 - y0 ;
5258
5259             //assert( dy >= zero );   // must be monotonic for this to make sense
5260             /*
5261             if( dy < zero )
5262             {
5263                 std::cout
5264                     << "NP::pdomain ERROR : non-monotonic dy less than zero  "
5265                     << " i " << std::setw(5) << i
5266                     << " x0 " << std::setw(10) << std::fixed << std::setprecision(6) << x0
5267                     << " x1 " << std::setw(10) << std::fixed << std::setprecision(6) << x1
5268                     << " y0 " << std::setw(10) << std::fixed << std::setprecision(6) << y0
5269                     << " y1 " << std::setw(10) << std::fixed << std::setprecision(6) << y1
5270                     << " yv " << std::setw(10) << std::fixed << std::setprecision(6) << yv
5271                     << " dy " << std::setw(10) << std::fixed << std::setprecision(6) << dy
5272                     << std::endl
5273                     ;
5274             }
5275             */
5276
5277             if( y0 <= yv && yv < y1 )
5278             {
5279                 xv = x0 ;
5280                 xv_set = true ;
5281                 if( dy > zero ) xv += (yv-y0)*(x1-x0)/dy ;
5282                 break ;
5283             }
5284         }
5285     }
5286
5287     assert( xv_set == true );
5288
5289     if(dump)
5290     {
5291         std::cout
5292             << "NP::pdomain.dump "
5293             << " item " << std::setw(4) << item
5294             << " ni " << std::setw(4) << ni
5295             << " nj " << std::setw(4) << nj
5296             << " lhs_dom " << std::setw(10) << std::fixed << std::setprecision(4) << lhs_dom
5297             << " rhs_dom " << std::setw(10) << std::fixed << std::setprecision(4) << rhs_dom
5298             << " lhs_val " << std::setw(10) << std::fixed << std::setprecision(4) << lhs_val
5299             << " rhs_val " << std::setw(10) << std::fixed << std::setprecision(4) << rhs_val
5300             << " yv " << std::setw(10) << std::fixed << std::setprecision(4) << yv
5301             << " xv " << std::setw(10) << std::fixed << std::setprecision(4) << xv
5302             << std::endl
5303             ;
5304     }
5305     return xv ;
5306 }
5307
5308
5309 /**
5310 NP::interp2D
5311 -------------
5312
5313 * https://en.wikipedia.org/wiki/Bilinear_interpolation
5314
5315 The interpolation formulas used by CUDA textures are documented.
5316
5317 * https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#linear-filtering
5318
5319 ::
5320
5321     J.2. Linear Filtering
5322     In this filtering mode, which is only available for floating-point textures, the value returned by the texture fetch is
5323
5324     tex(x)=(1−α)T[i]+αT[i+1] for a one-dimensional texture,
5325
5326     tex(x,y)=(1−α)(1−β)T[i,j]+α(1−β)T[i+1,j]+(1−α)βT[i,j+1]+αβT[i+1,j+1] for a two-dimensional texture,
5327
5328     tex(x,y,z) =
5329     (1−α)(1−β)(1−γ)T[i,j,k]+α(1−β)(1−γ)T[i+1,j,k]+
5330     (1−α)β(1−γ)T[i,j+1,k]+αβ(1−γ)T[i+1,j+1,k]+
5331     (1−α)(1−β)γT[i,j,k+1]+α(1−β)γT[i+1,j,k+1]+
5332     (1−α)βγT[i,j+1,k+1]+αβγT[i+1,j+1,k+1]
5333
5334     for a three-dimensional texture,
5335     where:
5336
5337     i=floor(xB), α=frac(xB), xB=x-0.5,
5338     j=floor(yB), β=frac(yB), yB=y-0.5,
5339     k=floor(zB), γ=frac(zB), zB= z-0.5,
5340     α, β, and γ are stored in 9-bit fixed point format with 8 bits of fractional value (so 1.0 is exactly represented).
5341
5342
5343 The use of reduced precision makes it not straightforward to perfectly replicate on the CPU,
5344 but you should be able to get very close.
5345
5346
5347 **/
5348 template<typename T> inline T  NP::interp2D(T x, T y, INT item) const
5349 {
5350     INT ndim = shape.size() ;
5351     assert( ndim == 2 || ndim == 3 );
5352
5353     INT num_items = ndim == 3 ? shape[0] : 1 ;
5354     assert( item < num_items );
5355     INT ni = shape[ndim-2];
5356     INT nj = shape[ndim-1];  // typically 2, but can be more
5357     INT item_offset = item == -1 ? 0 : ni*nj*item ;   // item=-1 same as item=0
5358
5359     const T* vv = cvalues<T>() + item_offset ;
5360
5361     T xB = x - T(0.5) ;
5362     T yB = y - T(0.5) ;
5363     // decompose floating point value into integral and fractional parts
5364     T xBint ;
5365     T xBfra = std::modf(xB, &xBint);
5366     INT j = INT(xBint) ;
5367
5368     T yBint ;
5369     T yBfra = std::modf(yB, &yBint);
5370     INT i = INT(yBint) ;
5371
5372     const T one(1.);
5373
5374 #ifdef VERBOSE
5375     std::cout
5376         << " ni = " << ni
5377         << " nj = " << nj
5378         << " i = "  << i
5379         << " j = "  << j
5380         << std::endl
5381        ;
5382 #endif
5383
5384     bool i_inrange = i < ni && i > -1 ;
5385     bool j_inrange = j < nj && j > -1 ;
5386     bool ij_inrange = i_inrange && j_inrange ;
5387
5388     if(!ij_inrange ) std::cerr
5389        << "NP::interp2D"
5390        << "\n"
5391        << " x " << std::fixed << std::setw(10) << std::setprecision(5) << x
5392        << " xB " << std::fixed << std::setw(10) << std::setprecision(5) << xB
5393        << " xBint " << std::fixed << std::setw(10) << std::setprecision(5) << xBint
5394        << " xBfra " << std::fixed << std::setw(10) << std::setprecision(5) << xBfra
5395        << " j " << j
5396        << " nj " << nj
5397        << "\n"
5398        << " y " << std::fixed << std::setw(10) << std::setprecision(5) << y
5399        << " yB " << std::fixed << std::setw(10) << std::setprecision(5) << yB
5400        << " yBint " << std::fixed << std::setw(10) << std::setprecision(5) << yBint
5401        << " yBfra " << std::fixed << std::setw(10) << std::setprecision(5) << yBfra
5402        << " i " << i
5403        << " ni " << ni
5404        << "\n"
5405        << " item " << item
5406        << " ndim " << ndim
5407        << " item_offset " << item_offset
5408        << " num_items " << num_items
5409        << " i_inrange " << ( i_inrange ? "YES" : "NO " )
5410        << " j_inrange " << ( j_inrange ? "YES" : "NO " )
5411        << " ij_inrange " << ( ij_inrange ? "YES" : "NO " )
5412        << "\n"
5413        ;
5414
5415     assert( ij_inrange );
5416
5417
5418
5419
5420     // (i,j) => (y,x)
5421     T v00 = ij_inrange ? vv[(i+0)*nj+(j+0)] : 0. ;
5422     T v01 = ij_inrange ? vv[(i+0)*nj+(j+1)] : 0. ;   // v01 at j+1 (at large x than v00)
5423     T v10 = ij_inrange ? vv[(i+1)*nj+(j+0)] : 0. ;
5424     T v11 = ij_inrange ? vv[(i+1)*nj+(j+1)] : 0. ;
5425
5426 #ifdef VERBOSE
5427     std::cout
5428        << "NP::interp2D[ "
5429        << " T[ij] = " << v00
5430        << " T[i+1,j] = "<< v10
5431        << " T[i,j+1] = "<< v01
5432        << " T[i+1,j+1] = "<< v11
5433        << " NP::interp2D]"
5434        << std::endl
5435        ;
5436 #endif
5437
5438     // v10 is i+1
5439
5440     // tex(x,y)=(1?¦Á(1?¦ÂT[i,j]+¦Á1?¦ÂT[i+1,j]+(1?¦Á¦Â[i,j+1]+¦fÂ[i+1,j+1]
5441     // hmm does this need a y-flip ?
5442
5443     T z =  (one - xBfra)*(one - yBfra)*v00 +
5444                   xBfra *(one - yBfra)*v01 +
5445            (one - xBfra)*       yBfra *v10 +
5446                   xBfra *       yBfra *v11 ;
5447
5448     return z ;
5449 }
5450
5451
5452 /**
5453 NP::interp
5454 ------------
5455
5456 CAUTION: using the wrong type here somehow scrambles the array contents,
5457 so always explicitly define the template type : DO NOT RELY ON COMPILER WORKING IT OUT.
5458
5459 **/
5460
5461 template<typename T> inline T NP::interp(T x, INT item) const
5462 {
5463     unsigned ndim = shape.size() ;
5464     assert( ndim == 2 || ndim == 3 );
5465
5466     unsigned num_items = ndim == 3 ? shape[0] : 1 ;
5467     bool num_items_expect = item < INT(num_items)  ;
5468     assert( num_items_expect );
5469     if(!num_items_expect) std::raise(SIGINT);
5470
5471     unsigned ni = shape[ndim-2];
5472     unsigned nj = shape[ndim-1];  // typically 2 but can be more
5473     unsigned item_offset = item == -1 ? 0 : ni*nj*item ;   // item=-1 same as item=0
5474
5475     assert( ni > 1 );
5476     assert( nj <= 8 );        // not needed for below, just for sanity of payload
5477     unsigned jdom = 0 ;       // 1st payload slot is "domain"
5478     unsigned jval = nj - 1 ;  // last payload slot is "value"   : TODO: argument to control this
5479     // note that with nj > 2 this allows other values to be carried
5480
5481     const T* vv = cvalues<T>() + item_offset ;
5482
5483     INT lo = 0 ;
5484     INT hi = ni-1 ;         // domain must be in ascending order
5485
5486 /*
5487     std::cout
5488          << " NP::interp "
5489          << " x " << x
5490          << " ni " << ni
5491          << " lo " << lo
5492          << " hi " << hi
5493          << " vx_lo " << vv[2*lo+0]
5494          << " vy_lo " <<  vv[2*lo+1]
5495          << " vx_hi " << vv[2*hi+0]
5496          << " vy_hi " <<  vv[2*hi+1]
5497          << std::endl
5498          ;
5499 */
5500
5501     // for x out of domain range return values at edges
5502     if( x <= vv[nj*lo+jdom] ) return vv[nj*lo+jval] ;
5503     if( x >= vv[nj*hi+jdom] ) return vv[nj*hi+jval] ;
5504
5505     // binary search for domain bin containing x
5506     while (lo < hi-1)
5507     {
5508         INT mi = (lo+hi)/2;
5509         if (x < vv[nj*mi+jdom]) hi = mi ;
5510         else lo = mi;
5511     }
5512
5513     // linear interpolation across the bin
5514     T dx = vv[nj*hi+jdom] - vv[nj*lo+jdom] ;
5515     T fx = (x-vv[nj*lo+jdom])/dx ;
5516
5517     // "hi = lo + 1", so  fractional "lo + fx"
5518     // encodes the result of the domain interpolation
5519     // HMM need some signalling for below/above domain
5520     //
5521     // Notice how the values are only used in the below two lines, right at the end.
5522     // Could split the API to return a fractional domain bin "index".
5523     //
5524     // Then could reuse that to get multiple values from a single "bin_interp"
5525     // Could interpolate multiple props without repeating the bin finding so long as
5526     // they shared the same domain (in first column)
5527
5528     T dy = vv[nj*hi+jval] - vv[nj*lo+jval] ;
5529     T y  = vv[nj*lo+jval] + dy*fx ;
5530
5531     return y ;
5532 }
5533
5534 /**
5535 NP::interpHD
5536 --------------
5537
5538 Interpolation within domain 0->1 using hd_factor convention for lhs, rhs high resolution zooms.
5539
5540 Previously tried to avoid the dimensional duplication using set_offset
5541 which attempts to enable get/set addressing with the "wrong" number of dimensions.
5542 The offset is like moving a cursor around the array allowing portions of it
5543 to be in-situ addressed as if they were smaller sub-arrays.
5544
5545 The set_offset approach is problematic as the get/set methods are using the
5546 absolute "correct" ni,nj,nk etc.. whereas when using the lower level cvalues approach
5547 are able to shift the meanings of those in a local fashion that can work
5548 across different numbers of dimensions.
5549
5550 **/
5551
5552 template<typename T> inline T NP::interpHD(T u, unsigned hd_factor, INT item) const
5553 {
5554     unsigned ndim = shape.size() ;
5555     assert( ndim == 3 || ndim == 4 );
5556
5557     unsigned num_items = ndim == 4 ? shape[0] : 1 ;
5558     assert( item < INT(num_items) );
5559
5560     unsigned ni = shape[ndim-3] ;
5561     unsigned nj = shape[ndim-2] ;
5562     unsigned nk = shape[ndim-1] ;
5563     assert( nj == 4 );
5564     assert( nk == 2 ); // not required by the below
5565
5566     unsigned kdom = 0 ;
5567     unsigned kval = nk - 1 ;
5568
5569     // pick *j* resolution zoom depending on u
5570     T lhs = T(1.)/T(hd_factor) ;
5571     T rhs = T(1.) - lhs ;
5572     unsigned j = u > lhs && u < rhs ? 0 : ( u < lhs ? 1 : 2 ) ;
5573
5574     unsigned item_offset = item == -1 ? 0 : ni*nj*nk*item ;   // item=-1 same as item=0
5575     const T* vv = cvalues<T>() + item_offset ;
5576
5577     // lo and hi are standins for *i*
5578     INT lo = 0 ;
5579     INT hi = ni-1 ;
5580
5581     if( u <= vv[lo*nj*nk+j*nk+kdom] ) return vv[lo*nj*nk+j*nk+kval] ;
5582     if( u >= vv[hi*nj*nk+j*nk+kdom] ) return vv[hi*nj*nk+j*nk+kval] ;
5583
5584     // binary search for domain bin containing x
5585     while (lo < hi-1)
5586     {
5587         INT mi = (lo+hi)/2;
5588         if (u < vv[mi*nj*nk+j*nk+kdom] ) hi = mi ;
5589         else lo = mi;
5590     }
5591
5592     // linear interpolation across the bin
5593     T dy = vv[hi*nj*nk+j*nk+kval] - vv[lo*nj*nk+j*nk+kval] ;
5594     T du = vv[hi*nj*nk+j*nk+kdom] - vv[lo*nj*nk+j*nk+kdom] ;
5595     T y  = vv[lo*nj*nk+j*nk+kval] + dy*(u-vv[lo*nj*nk+j*nk+kdom])/du ;
5596
5597     return y ;
5598 }
5599
5600
5601 /**
5602 NP::interp
5603 -------------
5604
5605 Too dangerous to simply remove this method, as the standard NP::interp
5606 has too similar a signature which via type conversion could lead to
5607 difficult to find bugs.
5608
5609 **/
5610
5611 template<typename T> inline T NP::interp(INT i, T x) const
5612 {
5613     std::cerr << "NP::interp DEPRECATED SIGNATURE CHANGE NP::interp TO NP::combined_interp " << std::endl ;
5614     return combined_interp_3<T>(i, x );
5615 }
5616
5617 /**
5618 NP::combined_interp_3  (better name _3D ?)
5619 ---------------------------------------------------------------
5620
5621 Assuming a convention of combined property array layout
5622 this provides interpolation of multiple properties with
5623 different domain lengths.  Special array preparation
5624 is needed with "ni" lengths encoded into last columns, for
5625 example with NP::Combine
5626
5627 See ~/np/tests/NPCombineTest.cc
5628
5629 qudarap/qprop.h qprop<T>::interpolate
5630     GPU version of NP::combined_interp provisioned by qudarap/QProp.hh
5631
5632
5633 ::
5634
5635     In [1]: a.shape
5636     Out[1]: (24, 15, 2)
5637
5638     In [2]: a[:,-1,-1]
5639     Out[2]: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
5640
5641     In [3]: a[:,-1,-1].view(np.int64)
5642     Out[3]: array([10,  2, 14,  2, 14, 14,  4,  2, 10,  2, 14,  2, 14, 14,  4,  2, 10,  2, 14,  2, 14, 14,  4,  2])
5643
5644
5645     In [18]: a.shape
5646     Out[18]: (3, 4, 2, 15, 2)
5647
5648     In [17]: a[...,-1,-1].view(np.int64)
5649     Out[17]:
5650     array([[[10,  2],
5651             [14,  2],
5652             [14, 14],
5653             [ 4,  2]],
5654
5655            [[10,  2],
5656             [14,  2],
5657             [14, 14],
5658             [ 4,  2]],
5659
5660            [[10,  2],
5661             [14,  2],
5662             [14, 14],
5663             [ 4,  2]]])
5664
5665
5666     std::cout
5667          << " NP::combined_interp_3 "
5668          << " x " << x
5669          << " ni " << ni
5670          << " lo " << lo
5671          << " hi " << hi
5672          << " vx_lo " <<  vv[nj*lo+jdom]
5673          << " vy_lo " <<  vv[nj*lo+jval]
5674          << " vx_hi " <<  vv[nj*hi+jdom]
5675          << " vy_hi " <<  vv[nj*hi+jval]
5676          << std::endl
5677          ;
5678
5679 **/
5680
5681 template<typename T> inline T NP::combined_interp_3(INT i, T x) const
5682 {
5683     INT ndim = shape.size() ;
5684     assert( ndim == 3 && shape[ndim-1] >= 2 && i < shape[0] && shape[1] > 1 );
5685
5686     INT stride = shape[ndim-2]*shape[ndim-1] ;
5687     const T* vv = cvalues<T>() + i*stride ;
5688
5689     return _combined_interp<T>( vv, stride, x );
5690 }
5691
5692
5693 /**
5694 NP::combined_interp_5 (better name _5D?)
5695 -----------------------------------------
5696
5697 Example array layout for complex refractive index::
5698
5699       (3 pmtcat, 4 layers, 2 prop ,  ~15  ,  2 )
5700                            |                 |
5701                            RINDEX  1+mx_itm  dom
5702                            KINDEX            val
5703
5704 **/
5705
5706 template<typename T> inline T NP::combined_interp_5(INT i, INT j, INT k, T x) const
5707 {
5708     INT ndim = shape.size() ;
5709     assert( ndim == 5 );
5710     INT ni = shape[0] ;
5711     INT nj = shape[1] ;
5712     INT nk = shape[2] ;
5713     bool args_expect =  i < ni && j < nj && k < nk ;
5714     assert( args_expect );
5715     if(!args_expect) std::raise(SIGINT);
5716
5717     INT nl = shape[ndim-2] ;
5718     INT nm = shape[ndim-1] ;
5719
5720     bool shape_expect = nl > 1 && nm == 2  ;
5721     // require more than one domain items
5722     assert( shape_expect );
5723     if(!shape_expect) std::raise(SIGINT);
5724
5725     INT stride = shape[ndim-2]*shape[ndim-1] ;
5726     INT iprop = i*nj*nk+j*nk+k ;
5727     // maximum:  (ni-1)*nj*nk + (nj-1)*nk + (nk-1) = ni*nj*nk - nj*nk + nj*nk - nk + nk - 1 = ni*nj*nk - 1
5728
5729     const T* vv = cvalues<T>() + iprop*stride ;
5730
5731     return _combined_interp<T>( vv, stride, x );
5732 }
5733
5734
5735 /**
5736 NP::_combined_interp
5737 ----------------------
5738
5739 Note how this needs not know about the upper dimensions, allowing the split
5740
5741 Using ragged array handling with NP::Combined arrays
5742 where individual property (ni,2) have the ni encoded into the absolute
5743 last column.
5744
5745 **/
5746
5747 template<typename T> inline T NP::_combined_interp(const T* vv, INT niv, T x) const
5748 {
5749     INT ndim = shape.size() ;
5750     INT ni = nview::int_from<T>( *(vv+niv-1) ) ; // NPU.hh:nview
5751     INT nj = shape[ndim-1] ;  // normally 2 with (dom, val)
5752
5753     INT jdom = 0 ;       // 1st payload slot is "domain"
5754     INT jval = nj - 1 ;  // last payload slot is "value", with nj 2 (typical) that is 1
5755
5756     INT lo = 0 ;
5757     INT hi = ni-1 ;
5758
5759     if( x <= vv[nj*lo+jdom] ) return vv[nj*lo+jval] ;
5760     if( x >= vv[nj*hi+jdom] ) return vv[nj*hi+jval] ;
5761
5762     while (lo < hi-1)
5763     {
5764         INT mi = (lo+hi)/2;
5765         if (x < vv[nj*mi+jdom]) hi = mi ;
5766         else lo = mi;
5767     }
5768
5769     T dy = vv[nj*hi+jval] - vv[nj*lo+jval] ;
5770     T dx = vv[nj*hi+jdom] - vv[nj*lo+jdom] ;
5771     T y =  vv[nj*lo+jval] + dy*(x-vv[nj*lo+jdom])/dx ;
5772     return y ;
5773 }
5774
5775 /**
5776 NP::FractionalRange
5777 ---------------------
5778
5779 Return fraction of x within range x0 to x1 or 0 below and 1 above the range.
5780
5781 +-------------------+-------------+
5782 | x <= x0           | T(0)        |
5783 +-------------------+-------------+
5784 | x >= x1           | T(1)        |
5785 +-------------------+-------------+
5786 | x0 < x < x1       | T(0->1)     |
5787 +-------------------+-------------+
5788
5789 **/
5790
5791 template<typename T> inline T NP::FractionalRange( T x, T x0, T x1 )  // static
5792 {
5793     assert( x1 > x0 );
5794     if( x <= x0 ) return T(0) ;
5795     if( x >= x1 ) return T(1) ;
5796     T xf = (x-x0)/(x1-x0) ;
5797     return xf ;
5798 }
5799
5800
5801
5802 template<typename T> inline NP* NP::cumsum(INT axis) const
5803 {
5804     assert( axis == 1 && "for now only axis=1 implemented" );
5805     const T* vv = cvalues<T>();
5806     NP* cs = NP::MakeLike(this) ;
5807     T* ss = cs->values<T>();
5808     for(INT p=0 ; p < size ; p++) ss[p] = vv[p] ;   // flat copy
5809
5810     unsigned ndim = shape.size() ;
5811
5812     if( ndim == 1 )
5813     {
5814         unsigned ni = shape[0] ;
5815         for(unsigned i=1 ; i < ni ; i++) ss[i] += ss[i-1] ;   // cute recursive summation
5816     }
5817     else if( ndim == 2 )
5818     {
5819         unsigned ni = shape[0] ;
5820         unsigned nj = shape[1] ;
5821         for(unsigned i=0 ; i < ni ; i++)
5822         {
5823             for(unsigned j=1 ; j < nj ; j++) ss[i*nj+j] += ss[i*nj+j-1] ;
5824         }
5825     }
5826     else
5827     {
5828         assert( 0 && "for now only 1d or 2d implemented");
5829     }
5830     return cs ;
5831 }
5832
5833
5834 /**
5835 NP::divide_by_last
5836 --------------------
5837
5838 Normalization by last payload entry implemented for 1d, 2d and 3d arrays.
5839
5840 **/
5841
5842 template<typename T> inline void NP::divide_by_last()
5843 {
5844     unsigned ndim = shape.size() ;
5845     T* vv = values<T>();
5846     const T zero(0.);
5847
5848     if( ndim == 1 )
5849     {
5850         unsigned ni = shape[0] ;
5851         const T last = get<T>(-1) ;
5852         for(unsigned i=0 ; i < ni ; i++) vv[i] /= last  ;
5853     }
5854     else if( ndim == 2 )
5855     {
5856         unsigned ni = shape[0] ;
5857         unsigned nj = shape[1] ;
5858 #ifdef DEBUG
5859         std::cout
5860             << "NP::divide_by_last 2d "
5861             << " ni " << ni
5862             << " nj " << nj
5863             << std::endl
5864             ;
5865 #endif
5866         // 2d case ni*(domain,value) pairs : there is only one last value to divide by : like the below 3d case with ni=1, i=0
5867         const T last = get<T>(-1,-1) ;
5868         unsigned j = nj - 1 ;    // last payload slot
5869         for(unsigned i=0 ; i < ni ; i++)
5870         {
5871             if(last != zero) vv[i*nj+j] /= last ;
5872         }
5873     }
5874     else if( ndim == 3 )   // eg (1000, 100, 2)    1000(diff BetaInverse) * 100 * (energy, integral)
5875     {
5876         unsigned ni = shape[0] ;  // eg BetaInverse dimension
5877         unsigned nj = shape[1] ;  // eg energy dimension
5878         unsigned nk = shape[2] ;  // eg payload carrying  [energy,s2,s2integral]
5879         assert( nk <= 8  ) ;      // not required by the below, but restrict for understanding
5880         unsigned k = nk - 1 ;     // last payload property, eg s2integral
5881
5882         for(unsigned i=0 ; i < ni ; i++)
5883         {
5884             // get<T>(i, -1, -1 )
5885             const T last = vv[i*nj*nk+(nj-1)*nk+k] ;  // for each item i, pluck the last payload value at the last energy value
5886             for(unsigned j=0 ; j < nj ; j++) if(last != zero) vv[i*nj*nk+j*nk+k] /= last ;  // traverse energy dimension normalizing the last payload items by last energy brethren
5887         }
5888     }
5889     else
5890     {
5891         assert( 0 && "for now only ndim 1,2,3 implemented");
5892     }
5893 }
5894
5895
5896 inline void NP::fillIndexFlat()
5897 {
5898     if(uifc == 'f')
5899     {
5900         switch(ebyte)
5901         {
5902             case 4: _fillIndexFlat<float>()  ; break ;
5903             case 8: _fillIndexFlat<double>() ; break ;
5904         }
5905     }
5906     else if(uifc == 'u')
5907     {
5908         switch(ebyte)
5909         {
5910             case 1: _fillIndexFlat<unsigned char>()  ; break ;
5911             case 2: _fillIndexFlat<unsigned short>()  ; break ;
5912             case 4: _fillIndexFlat<unsigned int>() ; break ;
5913             case 8: _fillIndexFlat<unsigned long>() ; break ;
5914         }
5915     }
5916     else if(uifc == 'i')
5917     {
5918         switch(ebyte)
5919         {
5920             case 1: _fillIndexFlat<char>()  ; break ;
5921             case 2: _fillIndexFlat<short>()  ; break ;
5922             case 4: _fillIndexFlat<int>() ; break ;
5923             case 8: _fillIndexFlat<long>() ; break ;
5924         }
5925     }
5926 }
5927
5928
5929 inline void NP::dump(INT i0, INT i1, INT j0, INT j1) const
5930 {
5931     if(uifc == 'f')
5932     {
5933         switch(ebyte)
5934         {
5935             case 4: _dump<float>(i0,i1,j0,j1)  ; break ;
5936             case 8: _dump<double>(i0,i1,j0,j1) ; break ;
5937         }
5938     }
5939     else if(uifc == 'u')
5940     {
5941         switch(ebyte)
5942         {
5943             case 1: _dump<unsigned char>(i0,i1,j0,j1)  ; break ;
5944             case 2: _dump<unsigned short>(i0,i1,j0,j1)  ; break ;
5945             case 4: _dump<unsigned int>(i0,i1,j0,j1) ; break ;
5946             case 8: _dump<unsigned long>(i0,i1,j0,j1) ; break ;
5947         }
5948     }
5949     else if(uifc == 'i')
5950     {
5951         switch(ebyte)
5952         {
5953             case 1: _dump<char>(i0,i1,j0,j1)  ; break ;
5954             case 2: _dump<short>(i0,i1,j0,j1)  ; break ;
5955             case 4: _dump<int>(i0,i1,j0,j1) ; break ;
5956             case 8: _dump<long>(i0,i1,j0,j1) ; break ;
5957         }
5958     }
5959 }
5960
5961
5962
5963
5964 inline std::string NP::Brief(const NP* a)
5965 {
5966     return a ? a->sstr() : "-" ;
5967 }
5968 inline std::string NP::sstr() const
5969 {
5970     std::stringstream ss ;
5971     ss << NPS::desc(shape) ;
5972     return ss.str();
5973 }
5974 inline std::string NP::desc() const
5975 {
5976     std::stringstream ss ;
5977     ss << "NP "
5978        << " dtype " << dtype
5979        << NPS::desc(shape)
5980        << " size " << size
5981        << " uifc " << uifc
5982        << " ebyte " << ebyte
5983        << " shape.size " << shape.size()
5984        << " data.size " << data.size()
5985        << " meta.size " << meta.size()
5986        << " names.size " << names.size()
5987        ;
5988     if(nodata) ss << " NODATA " ;
5989     return ss.str();
5990 }
5991
5992
5993 inline std::string NP::brief() const
5994 {
5995     std::stringstream ss ;
5996     ss
5997        << " " << dtype
5998        << NPS::desc(shape)
5999        ;
6000     return ss.str();
6001 }
6002
6003
6004 template<typename T>
6005 inline std::string NP::repr() const
6006 {
6007     const T* vv = cvalues<T>();
6008     INT nv = num_values() ;
6009     const INT edge = 5 ;
6010
6011     std::stringstream ss ;
6012     ss << "{" ;
6013     for(INT i=0 ; i < nv ; i++)
6014     {
6015         if( i < edge || i > nv - edge )
6016         {
6017             switch(uifc)
6018             {
6019                 case 'f': ss << std::setw(10) << std::fixed << std::setprecision(5) << vv[i] << " " ; break ;
6020                 case 'u': ss << std::setw(5) << vv[i] << " " ; break ;
6021                 case 'i': ss << std::setw(5) << vv[i] << " " ; break ;
6022                 case 'c': ss << std::setw(10) << vv[i] << " " ; break ;   // TODO: check array of std::complex
6023             }
6024         }
6025         else if( i == edge )
6026         {
6027             ss << "... " ;
6028         }
6029     }
6030     ss << "}" ;
6031     return ss.str();
6032 }
6033
6034
6035 inline void NP::set_meta( const std::vector<std::string>& lines, char delim )
6036 {
6037     std::stringstream ss ;
6038     for(unsigned i=0 ; i < lines.size() ; i++) ss << lines[i] << delim  ;
6039     meta = ss.str();
6040 }
6041
6042 inline void NP::get_meta( std::vector<std::string>& lines, char delim  ) const
6043 {
6044     if(meta.empty()) return ;
6045
6046     std::stringstream ss ;
6047     ss.str(meta.c_str())  ;
6048     std::string s;
6049     while (std::getline(ss, s, delim)) lines.push_back(s) ;
6050 }
6051
6052
6053 inline void NP::set_names( const std::vector<std::string>& lines )
6054 {
6055     names.clear();
6056     for(unsigned i=0 ; i < lines.size() ; i++)
6057     {
6058          const std::string& line = lines[i] ;
6059          names.push_back(line);
6060     }
6061 }
6062
6063 inline void NP::get_names( std::vector<std::string>& lines ) const
6064 {
6065     for(unsigned i=0 ; i < names.size() ; i++)
6066     {
6067          const std::string& name = names[i] ;
6068          lines.push_back(name);
6069     }
6070 }
6071
6072 //Returns 0-based index of first matching name, or -1 if the name is not found or the name is nullptr.
6073 inline NP::INT NP::get_name_index( const char* qname ) const
6074 {
6075     unsigned count = 0 ;
6076     return NameIndex(qname, count, names);
6077 }
6078 inline NP::INT NP::get_name_index( const char* qname, unsigned& count ) const
6079 {
6080     return NameIndex(qname, count, names);
6081 }
6082
6083
6084 /**
6085 NP::NameIndex
6086 --------------------
6087
6088 Returns the index of the first listed name that exactly matches the query string.
6089 A count of the number of matches is also provided.
6090 Returns -1 if not found.
6091
6092 **/
6093
6094 inline NP::INT NP::NameIndex( const char* qname, unsigned& count, const std::vector<std::string>& names ) // static
6095 {
6096     if(names.size() == 0) return -1 ;
6097
6098     INT result(-1);
6099     count = 0 ;
6100     for(unsigned i=0 ; i < names.size() ; i++)
6101     {
6102         const std::string& k = names[i] ;
6103         if(strcmp(k.c_str(), qname) == 0 )
6104         {
6105             if(count == 0) result = i ;
6106             count += 1 ;
6107         }
6108     }
6109     return result ;
6110 }
6111
6112
6113 inline bool NP::is_named_shape() const
6114 {
6115     //return int(shape.size()) == 2 && shape[1] == 1 && shape[0] == int(names.size()) ;
6116     return INT(shape.size()) > 0 && shape[0] == INT(names.size()) ;
6117 }
6118
6119 template<typename T>
6120 inline T NP::get_named_value( const char* qname, T fallback ) const
6121 {
6122     bool is_named = is_named_shape() ;
6123
6124     if(NP::VERBOSE) std::cerr
6125         << "NP::get_named_value [" << qname << "]"
6126         << " is_named " << is_named
6127         << " sstr " << sstr()
6128         << std::endl
6129         ;
6130
6131     if(! is_named) return fallback ;
6132
6133     const T* vv = cvalues<T>() ;
6134
6135     unsigned count(0);
6136     INT idx = get_name_index(qname, count );
6137
6138     if(count != 1) return fallback ;
6139     if(idx < INT(shape[0])) return vv[idx] ;
6140     return fallback ;
6141 }
6142
6143
6144
6145
6146
6147 inline bool NP::has_meta() const
6148 {
6149     return meta.empty() == false ;
6150 }
6151
6152 /**
6153 NP::get_meta_string_
6154 ----------------------
6155
6156 Assumes metadata layout of form::
6157
6158     key1:value1
6159     key2:value2
6160
6161 With each key-value pair separated by newlines and the key and value
6162 delimited by a colon.
6163
6164 **/
6165
6166 inline std::string NP::get_meta_string_(const char* metadata, const char* key) // static
6167 {
6168     std::string value ;
6169
6170     std::stringstream ss;
6171     ss.str(metadata);
6172     std::string s;
6173     char delim = ':' ;
6174
6175     while (std::getline(ss, s))
6176     {
6177        size_t pos = s.find(delim);
6178        if( pos != std::string::npos )
6179        {
6180            std::string k = s.substr(0, pos);
6181            std::string v = s.substr(pos+1);
6182            if(strcmp(k.c_str(), key) == 0 ) value = v ;
6183 #ifdef DEBUG
6184            std::cout
6185                << "NP::get_meta_string "
6186                << " s[" << s << "]"
6187                << " k[" << k << "]"
6188                << " v[" << v << "]"
6189                << std::endl
6190                ;
6191 #endif
6192        }
6193 #ifdef DEBUG
6194        else
6195        {
6196            std::cout
6197                << "NP::get_meta_string "
6198                << "s[" << s << "] SKIP "
6199                << std::endl
6200                ;
6201        }
6202 #endif
6203     }
6204     return value ;
6205 }
6206
6207 inline std::string NP::get_meta_string(const std::string& meta, const char* key)  // static
6208 {
6209     const char* metadata = meta.empty() ? nullptr : meta.c_str() ;
6210     return get_meta_string_( metadata, key );
6211 }
6212
6213 /**
6214 NP::MakeMetaKVProfileArray
6215 ----------------------------
6216
6217 ::
6218
6219     (ok) A[blyth@localhost ALL1_Debug_Philox_ref1]$ grep Index SProf.txt
6220     A000_SEvt__setIndex:1760707886287057,7316444,1222084
6221     A000_SEvt__endIndex:1760707886541457,8373000,1334844
6222
6223 1. finds metadata lines looking like profile stamps with keys containing the ptn (eg "Index"), nullptr matches all lines
6224 2. create (N,3) int64_t array filled with the stamps (t[us],vm[kb],rs[kb])
6225
6226 **/
6227
6228 inline NP* NP::MakeMetaKVProfileArray(const std::string& meta, const char* ptn)
6229 {
6230     std::vector<std::string> keys ;
6231     std::vector<std::string> vals ;
6232     bool only_with_profile = true ;
6233     GetMetaKV(meta, &keys, &vals, only_with_profile, ptn );
6234     assert( keys.size() == vals.size() );
6235     INT num_key = keys.size();
6236
6237     INT ni = num_key ;
6238     INT nj = 3 ;
6239     bool dump = false ;
6240
6241     NP* prof = ni > 0 ? NP::Make<int64_t>(ni, nj ) : nullptr  ;
6242     int64_t* pp = prof ? prof->values<int64_t>() : nullptr ;
6243     if(prof)
6244     {
6245         prof->labels = new std::vector<std::string> {"st[us]", "vm[kb]", "rs[kb]" } ;
6246         prof->meta = meta ;
6247     }
6248
6249     for(INT i=0 ; i < ni ; i++)
6250     {
6251         const char* k = keys[i].c_str();
6252         const char* v = vals[i].c_str();
6253         bool looks_like_prof  = U::LooksLikeProfileTriplet(v);
6254         assert( looks_like_prof );
6255         if(!looks_like_prof) continue ;
6256
6257         char* end = nullptr ;
6258         int64_t st = strtoll( v,   &end, 10 ) ;
6259         int64_t vm = strtoll( end+1, &end , 10 ) ;
6260         int64_t rs = strtoll( end+1, &end , 10 ) ;
6261
6262         if(dump) std::cout
6263             << "NP::makeMetaKVProfileArray"
6264             << " k " << ( k ? k : "-" )
6265             << " v " << ( v ? v : "-" )
6266             << " st " << st
6267             << " vm " << vm
6268             << " rs " << rs
6269             << std::endl
6270             ;
6271
6272         pp[nj*i + 0 ] = st ;
6273         pp[nj*i + 1 ] = vm ;
6274         pp[nj*i + 2 ] = rs ;
6275         prof->names.push_back(k) ;
6276     }
6277     return prof ;
6278 }
6279
6280 inline void NP::GetMetaKV_(
6281     const char* metadata,
6282     std::vector<std::string>* keys,
6283     std::vector<std::string>* vals,
6284     bool only_with_profile,
6285     const char* ptn
6286     ) // static
6287 {
6288     if(metadata == nullptr) return ;
6289     std::stringstream ss;
6290     ss.str(metadata);
6291     std::string s;
6292     char delim = ':' ;
6293
6294     while (std::getline(ss, s))
6295     {
6296         size_t pos = s.find(delim);
6297         if( pos != std::string::npos )
6298         {
6299             std::string _k = s.substr(0, pos);
6300             std::string _v = s.substr(pos+1);
6301             const char* k = _k.c_str();
6302             const char* v = _v.c_str();
6303             bool match_ptn = ptn ? strstr( k, ptn ) != nullptr : true  ;
6304             bool looks_like_profile = U::LooksLikeProfileTriplet(v);
6305             bool select = only_with_profile ? looks_like_profile && match_ptn : match_ptn ;
6306             if(!select) continue ;
6307
6308             if(keys) keys->push_back(k);
6309             if(vals) vals->push_back(v);
6310         }
6311     }
6312 }
6313
6314 inline void NP::GetMetaKV(
6315     const std::string& meta,
6316     std::vector<std::string>* keys,
6317     std::vector<std::string>* vals,
6318     bool only_with_profile,
6319     const char* ptn)  // static
6320 {
6321     const char* metadata = meta.empty() ? nullptr : meta.c_str() ;
6322     return GetMetaKV_( metadata, keys, vals, only_with_profile, ptn  );
6323 }
6324
6325
6326
6327
6328 template<typename T> inline T NP::GetMeta(const std::string& mt, const char* key, T fallback) // static
6329 {
6330     if(mt.empty()) return fallback ;
6331     std::string s = get_meta_string( mt, key);
6332 #ifdef DEBUG
6333     std::cout << "NP::GetMeta[" << s << "]" << std::endl ;
6334 #endif
6335     if(s.empty()) return fallback ;
6336     return U::To<T>(s.c_str()) ;
6337 }
6338
6339
6340 template uint64_t    NP::GetMeta<uint64_t>(   const std::string& , const char*, uint64_t ) ;
6341 template int64_t     NP::GetMeta<int64_t>(    const std::string& , const char*, int64_t ) ;
6342 template int         NP::GetMeta<int>(        const std::string& , const char*, int ) ;
6343 template unsigned    NP::GetMeta<unsigned>(   const std::string& , const char*, unsigned ) ;
6344 template float       NP::GetMeta<float>(      const std::string& , const char*, float ) ;
6345 template double      NP::GetMeta<double>(     const std::string& , const char*, double ) ;
6346 template std::string NP::GetMeta<std::string>(const std::string& , const char*, std::string ) ;
6347
6348
6349
6350
6351
6352 template<typename T> inline T NP::get_meta(const char* key, T fallback) const
6353 {
6354     if(meta.empty()) return fallback ;
6355     return GetMeta<T>( meta.c_str(), key, fallback );
6356 }
6357
6358 template uint64_t NP::get_meta<uint64_t>(const char*, uint64_t ) const ;
6359 template int64_t  NP::get_meta<int64_t>(const char*, int64_t ) const ;
6360 template int      NP::get_meta<int>(const char*, int ) const ;
6361 template unsigned NP::get_meta<unsigned>(const char*, unsigned ) const  ;
6362 template float    NP::get_meta<float>(const char*, float ) const ;
6363 template double   NP::get_meta<double>(const char*, double ) const ;
6364 template std::string NP::get_meta<std::string>(const char*, std::string ) const ;
6365
6366
6367 /**
6368 NP::SetMeta
6369 -----------
6370
6371 Updates the single *mt* string
6372
6373 **/
6374
6375 template<typename T> inline void NP::SetMeta( std::string& mt, const char* key, T value ) // static
6376 {
6377     std::stringstream nn;  // stringstream for creating the updated mt string
6378
6379     std::stringstream ss;  // stringstream for parsing the initial mt string
6380     ss.str(mt);
6381
6382     std::string s;
6383     char delim = ':' ;
6384     bool changed = false ;
6385     while (std::getline(ss, s))
6386     {
6387        size_t pos = s.find(delim);
6388        if( pos != std::string::npos )  // lines has the delim, so extract (k,v)
6389        {
6390            std::string k = s.substr(0, pos);
6391            std::string v = s.substr(pos+1);
6392            if(strcmp(k.c_str(), key) == 0 )  // key already present, so change it
6393            {
6394                changed = true ;
6395                nn << key << delim << value << std::endl ;
6396            }
6397            else
6398            {
6399                nn << s << std::endl ;    // leaving line asis
6400            }
6401        }
6402        else
6403        {
6404            nn << s << std::endl ;     // leaving line as is
6405        }
6406     }
6407     if(!changed) nn << key << delim << value << std::endl ;  // didnt find the key, so add it
6408     mt = nn.str() ;
6409 }
6410
6411 template void     NP::SetMeta<int64_t>(     std::string&, const char*, int64_t );
6412 template void     NP::SetMeta<uint64_t>(    std::string&, const char*, uint64_t );
6413 template void     NP::SetMeta<int>(         std::string&, const char*, int );
6414 template void     NP::SetMeta<unsigned>(    std::string&, const char*, unsigned );
6415 template void     NP::SetMeta<float>(       std::string&, const char*, float );
6416 template void     NP::SetMeta<double>(      std::string&, const char*, double );
6417 template void     NP::SetMeta<std::string>( std::string&, const char*, std::string );
6418
6419
6420
6421
6422 /**
6423 NP::set_meta
6424 --------------
6425
6426 A preexisting keyed k:v pair is changed by this otherwise if there is no
6427 such pre-existing key a new k:v pair is added.
6428
6429 **/
6430 template<typename T> inline void NP::set_meta(const char* key, T value)
6431 {
6432     SetMeta(meta, key, value);
6433 }
6434
6435 template void     NP::set_meta<int64_t>(const char*, int64_t );
6436 template void     NP::set_meta<uint64_t>(const char*, uint64_t );
6437 template void     NP::set_meta<int>(const char*, int );
6438 template void     NP::set_meta<unsigned>(const char*, unsigned );
6439 template void     NP::set_meta<float>(const char*, float );
6440 template void     NP::set_meta<double>(const char*, double );
6441 template void     NP::set_meta<std::string>(const char*, std::string );
6442
6443
6444
6445 template<typename T> inline void NP::set_meta_kv(const std::vector<std::pair<std::string, T>>& kvs )
6446 {
6447     SetMetaKV(meta, kvs );
6448 }
6449 template<typename T> inline void NP::SetMetaKV( std::string& meta, const std::vector<std::pair<std::string, T>>& kvs ) // static
6450 {
6451     for(int i=0 ; i < int(kvs.size()); i++) SetMeta(meta, kvs[i].first.c_str(), kvs[i].second );
6452 }
6453
6454
6455 template<typename T> inline std::string NP::DescKV( const std::vector<std::pair<std::string, T>>& kvs ) // static
6456 {
6457     typedef std::pair<std::string, T> KV ;
6458     std::stringstream ss ;
6459     ss << "NP::DescKV" << std::endl ;
6460     for(INT i=0 ; i < INT(kvs.size()) ; i++)
6461     {
6462         const KV& kv = kvs[i] ;
6463         ss
6464             << std::setw(20)  << kv.first
6465             << " : "
6466             << std::setw(100) << kv.second
6467             << std::endl
6468             ;
6469     }
6470     std::string str = ss.str();
6471     return str ;
6472 }
6473
6474
6475 inline void NP::SetMetaKV_(
6476     std::string& meta,
6477     const std::vector<std::string>& keys,
6478     const std::vector<std::string>& vals ) // static
6479 {
6480     assert( keys.size() == vals.size() );
6481     for(INT i=0 ; i < INT(keys.size()); i++) SetMeta(meta, keys[i].c_str(), vals[i].c_str() );
6482 }
6483
6484 inline void NP::setMetaKV_( const std::vector<std::string>& keys,  const std::vector<std::string>& vals )
6485 {
6486     SetMetaKV_(meta, keys, vals);
6487 }
6488
6489
6490
6491
6492 inline std::string NP::descMeta() const
6493 {
6494     std::stringstream ss ;
6495     ss
6496        << "[NP::descMeta\n"
6497        << DescMetaKVS(meta, nullptr, nullptr)
6498        << "]NP::descMeta\n"
6499        ;
6500     std::string str = ss.str();
6501     return str ;
6502 }
6503
6504
6505 /**
6506 NP::GetFirstStampIndex_OLD
6507 ---------------------------
6508
6509 Return index of the first stamp that has difference to
6510 the next stamp of less than the discount. This is
6511 to avoid uninteresting large time ranges in the deltas.
6512
6513 HMM: this assumes the stamps are ascending
6514
6515 HMM: simpler to just disqualify stamps during initialization
6516
6517 **/
6518
6519 inline NP::INT NP::GetFirstStampIndex_OLD(const std::vector<int64_t>& stamps, int64_t discount ) // static
6520 {
6521     INT first = -1 ;
6522     INT i_prev = -1 ;
6523     int64_t t_prev = -1 ;
6524
6525     for(INT i=0 ; i < INT(stamps.size()) ; i++)
6526     {
6527         if(stamps[i] == 0) continue ;
6528
6529         int64_t t  = stamps[i] ;
6530         int64_t dt = t_prev > -1 ? t - t_prev : -1 ;
6531         if( dt > -1 && dt < discount && first == -1 ) first = i_prev ;
6532
6533         t_prev = t ;
6534         i_prev = i ;
6535     }
6536     return first ;
6537 }
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547 /**
6548 NP::MakeMetaKVS_ranges
6549 ------------------------
6550
6551 Former NP::makeMetaKVS_ranges turned static with meta arg.
6552
6553 **/
6554
6555 inline NP* NP::MakeMetaKVS_ranges(const std::string& meta_, const char* ranges_ , std::ostream* ss) // static
6556 {
6557     std::vector<std::string> keys ;
6558     std::vector<std::string> vals ;
6559     std::vector<int64_t> tt ;
6560     bool only_with_stamp = true ;
6561     U::GetMetaKVS(meta_, &keys, &vals, &tt, only_with_stamp );
6562     assert( keys.size() == vals.size() );
6563     assert( keys.size() == tt.size() );
6564     assert( tt.size() == keys.size() );
6565     return MakeMetaKVS_ranges(keys, tt, ranges_ , ss );
6566 }
6567
6568 inline NP* NP::MakeMetaKVS_ranges2(const std::string& meta_, const char* ranges_ , std::ostream* ss) // static
6569 {
6570     std::vector<std::string> keys ;
6571     std::vector<std::string> vals ;
6572     std::vector<int64_t> tt ;
6573     bool only_with_stamp = true ;
6574     U::GetMetaKVS(meta_, &keys, &vals, &tt, only_with_stamp );
6575     assert( keys.size() == vals.size() );
6576     assert( keys.size() == tt.size() );
6577     assert( tt.size() == keys.size() );
6578     return MakeMetaKVS_ranges2(keys, tt, ranges_ , ss );
6579 }
6580
6581
6582
6583
6584
6585 /**
6586 NP::Resolve_ranges
6587 -------------------
6588
6589 Ranges are newline delimited with colon separated pairs of tags and mandatory annotation, eg::
6590
6591    SEvt__Init_RUN_META:CSGFoundry__Load_HEAD                     ## init
6592    CSGFoundry__Load_HEAD:CSGFoundry__Load_TAIL                   ## load_geom
6593    CSGOptiX__Create_HEAD:CSGOptiX__Create_TAIL                   ## upload_geom
6594    A%0.3d_QSim__simulate_HEAD:A%0.3d_QSim__simulate_PREL         ## upload_genstep
6595    A%0.3d_QSim__simulate_PREL:A%0.3d_QSim__simulate_POST         ## simulate
6596    A%0.3d_QSim__simulate_POST:A%0.3d_QSim__simulate_TAIL         ## download
6597
6598 Stamp keys are wildcarded by including strings like %0.3d
6599 so need to pre-pass looking for keys with a range of indices,
6600 so effectively are generating simple ranges without wildcard
6601 based on the keys, wildcards and idx range.
6602
6603 1. split the colon delimited range pair from the ## delimited annotation
6604 2. generate specs vector of wildcard resolved key ranges including the annotation
6605
6606 The output specs are of form::
6607
6608     A000_QSim__simulate_HEAD:A000_QSim__simulate_PREL:upload_genstep
6609
6610 **/
6611
6612 inline void NP::Resolve_ranges( std::vector<std::string>& specs, const std::vector<std::string>& keys, const char* ranges_, std::ostream* ss )
6613 {
6614     assert(ranges_ && strlen(ranges_) > 0);
6615     std::vector<std::string> ranges ;
6616     std::vector<std::string> anno ;
6617     U::LiteralAnno(ranges, anno, ranges_ , "#" );
6618     assert( ranges.size() == anno.size() ) ;
6619
6620     int num_keys = keys.size() ;
6621     int num_ranges = ranges.size() ;
6622
6623     if(ss) ss->imbue(std::locale("")) ; // commas for thousands
6624
6625     if(ss) (*ss)
6626        << "[NP::Resolve_ranges\n"
6627        << " num_keys :" << num_keys
6628        << " num_ranges :" << num_ranges
6629        << std::endl
6630        ;
6631
6632     // generate specs of wildcard resolved ranges
6633
6634     char delim = ':' ;
6635
6636     for(int i=0 ; i < num_ranges ; i++)
6637     {
6638         const std::string& range = ranges[i] ;  //
6639         size_t pos = range.find(delim);
6640         if( pos == std::string::npos ) continue ;
6641
6642         std::string _a = range.substr(0, pos);
6643         std::string _b = range.substr(pos+1);
6644         const char* a = _a.c_str();
6645         const char* b = _b.c_str();
6646
6647         // idx0 idx1 specifies the range for wildcard replacements
6648         int idx0 = 0 ;
6649         int idx1 = 30 ;
6650         for(int idx=idx0 ; idx < idx1 ; idx++)
6651         {
6652             std::string akey ;
6653             std::string bkey ;
6654             int ia = U::FormattedKeyIndex(akey, keys, a, idx, idx+1 ) ;
6655             int ib = U::FormattedKeyIndex(bkey, keys, b, idx, idx+1 ) ;
6656
6657             bool found_range_pair = !akey.empty() && !bkey.empty() && ia > -1 && ib > -1 ;
6658
6659             if(found_range_pair)
6660             {
6661                 std::stringstream mm ;
6662                 mm << akey << ":" << bkey << ":" << anno[i] ;
6663                 std::string spec = mm.str();
6664                 if(std::find(specs.begin(), specs.end(), spec) == specs.end())  specs.push_back(spec);
6665             }
6666         }
6667     }
6668     int num_specs = specs.size();
6669
6670     if(ss) (*ss)
6671        << "]NP::Resolve_ranges\n"
6672        << " num_keys :" << num_keys
6673        << " num_ranges :" << num_ranges
6674        << " num_specs :"  << num_specs
6675        << std::endl
6676        ;
6677
6678 }
6679
6680
6681 /**
6682 NP::TimeOrder_ranges
6683 ---------------------
6684
6685 1. collect lhs (start) times of the ranges into stt
6686 2. sort spec_order indices into ascending time order
6687
6688 **/
6689
6690
6691 inline void NP::TimeOrder_ranges( std::vector<int>& spec_order, const std::vector<std::string>& specs, const std::vector<std::string>& keys, const std::vector<int64_t>& tt, std::ostream* ss )
6692 {
6693     if(ss) *ss << "NP::TimeOrder_ranges\n" ;
6694     assert( spec_order.size() == specs.size() ) ;
6695     int num_specs = specs.size();
6696
6697     std::vector<int64_t> stt(num_specs);
6698
6699     for(int i=0 ; i < num_specs ; i++)
6700     {
6701         const char* spec = specs[i].c_str();
6702         std::vector<std::string> elem ;
6703         U::Split( spec, ':', elem );
6704         assert( elem.size() > 1 );
6705
6706         const char* ak = elem[0].c_str();
6707         const char* bk = elem[1].c_str();
6708
6709         int ia = U::KeyIndex( keys, ak );
6710         int ib = U::KeyIndex( keys, bk );
6711
6712         int64_t ta = ia > -1 ? tt[ia] : 0 ;
6713         int64_t tb = ib > -1 ? tt[ib] : 0 ;
6714
6715         bool expect = ta > 0 && tb > 0 ;
6716         if(!expect) std::cerr << "NP::TimeOrder_ranges MISSING KEY " << std::endl ;
6717         assert(expect );
6718
6719         stt[i] = ta ;
6720     }
6721
6722     // Sort indices into ascending start time order
6723
6724     std::iota(spec_order.begin(), spec_order.end(), 0);
6725     auto order = [&stt](const size_t& a, const size_t &b) { return stt[a] < stt[b];}  ;
6726     std::sort(spec_order.begin(), spec_order.end(), order );
6727 }
6728
6729 inline NP* NP::MakeMetaKVS_ranges_table(
6730     const std::vector<int>& spec_order,
6731     const std::vector<std::string>& specs,
6732     const std::vector<std::string>& keys,
6733     const std::vector<int64_t>& tt,
6734     std::ostream* ss )
6735 {
6736     assert( spec_order.size() == specs.size() ) ;
6737     int num_specs = specs.size();
6738
6739     int ni = num_specs ;
6740     int nj = 5 ;
6741
6742     NP* _rr = NP::Make<int64_t>( ni, nj ) ;
6743     int64_t* rr = _rr->values<int64_t>();
6744
6745     int64_t ab_total = 0 ;
6746     int wid = 30 ;
6747     _rr->labels = new std::vector<std::string> { "ta", "tb", "ab", "ia", "ib" } ;
6748
6749     for(int j=0 ; j < num_specs ; j++)
6750     {
6751         int i = spec_order[j];
6752         const char* spec = specs[i].c_str();
6753         _rr->names.push_back(spec);
6754
6755         std::vector<std::string> elem ;
6756         U::Split( spec, ':', elem );
6757         assert( elem.size() > 1 );
6758
6759         const char* ak = elem[0].c_str();
6760         const char* bk = elem[1].c_str();
6761         const char* no = elem.size() > 2 ? elem[2].c_str() : nullptr ;
6762
6763         int ia = U::KeyIndex( keys, ak );
6764         int ib = U::KeyIndex( keys, bk );
6765
6766         int64_t ta = ia > -1 ? tt[ia] : 0 ;
6767         int64_t tb = ib > -1 ? tt[ib] : 0 ;
6768         int64_t ab = tb - ta ;
6769
6770         rr[nj*j+0] = ta ;
6771         rr[nj*j+1] = tb ;
6772         rr[nj*j+2] = ab ;
6773         rr[nj*j+3] = ia ;
6774         rr[nj*j+4] = ib ;
6775
6776         ab_total += ab ;
6777
6778         if(ss) (*ss)
6779             << " " << std::setw(wid) << ak
6780             << " ==> "
6781             << " " << std::setw(wid) << bk
6782             << "      " << std::setw(16) << std::right << ab
6783             << "      " << std::setw(16) << std::right << ta
6784             << "      " << std::setw(16) << std::right << tb
6785             << ( no == nullptr ? "" : "    ## " ) << ( no ? no : "" )
6786             << std::endl
6787             ;
6788     }
6789
6790     if(ss) (*ss)
6791        << " " << std::setw(wid) << ""
6792        << "     "
6793        << " " << std::setw(wid) << "TOTAL:"
6794        << "      " << std::setw(16) << std::right << ab_total
6795        << std::endl
6796        ;
6797
6798     return _rr ;
6799 }
6800
6801 /**
6802 NP::MakeMetaKVS_ranges2_table
6803 ------------------------------
6804
6805 1. For each range lookup the a and b key indices into keys vector.
6806 2. When the number of indices for a and b matches simply collect indices and time stamps for the range into the kpp tuple vector
6807 3. When one indice is found for b and more than one for a find the a index closest in absolute time difference to b
6808 4. Ditto for a and b situation reversed
6809 5. sort indices of the kpp tuple into ascending a(start) time order
6810
6811 **/
6812
6813 inline NP* NP::MakeMetaKVS_ranges2_table( const std::vector<std::string>& specs, const std::vector<std::string>& keys, const std::vector<int64_t>& tt, std::ostream* ss )
6814 {
6815     int num_specs = specs.size();
6816     if(ss) (*ss) << "[NP::MakeMetaKVS_ranges2_table num_specs " << num_specs << "\n";
6817
6818     using KP = std::tuple<int,int,int64_t,int64_t,const char*, int> ;
6819
6820     std::vector<KP> kpp ;
6821
6822     for(int i=0 ; i < num_specs ; i++)
6823     {
6824         const char* spec = specs[i].c_str();
6825         std::vector<std::string> elem ;
6826         U::Split( spec, ':', elem );
6827         assert( elem.size() > 1 );
6828
6829         const char* ak = elem[0].c_str();
6830         const char* bk = elem[1].c_str();
6831         const char* no = elem.size() > 2 ? elem[2].c_str() : nullptr ;
6832         const char* uno = no ? strdup(no) : nullptr ;
6833
6834         std::vector<int> iia ;
6835         U::KeyIndices(iia, keys, ak );
6836
6837         std::vector<int> iib ;
6838         U::KeyIndices(iib, keys, bk );
6839
6840         int na = iia.size();
6841         int nb = iib.size();
6842
6843         if(na == nb)
6844         {
6845             for(int j=0 ; j < na ; j++) kpp.push_back({iia[j],iib[j],tt[iia[j]],tt[iib[j]], uno, i });
6846         }
6847         else if( na > 1 && nb == 1 )
6848         {
6849             // pick smallest absolute delta between the multiple a and single b
6850             std::vector<int64_t> att ;
6851             for(int a=0 ; a < na ; a++) att.push_back( std::abs( tt[iia[a]] - tt[iib[0]] ) );
6852             int ia = std::distance(std::begin(att), std::min_element(std::begin(att), std::end(att)));
6853             kpp.push_back( {iia[ia], iib[0], tt[iia[ia]], tt[iib[0]], uno, i } );
6854         }
6855         else if( nb > 1 && na == 1 )
6856         {
6857             // pick smallest absolute delta between the multiple b and single a
6858             std::vector<int64_t> btt ;
6859             for(int b=0 ; b < nb ; b++) btt.push_back( std::abs( tt[iib[b]] - tt[iia[0]] ) );
6860             int ib = std::distance(std::begin(btt), std::min_element(std::begin(btt), std::end(btt)));
6861             kpp.push_back( {iia[0], iib[ib], tt[iia[0]], tt[iib[ib]], uno, i } );
6862         }
6863     }
6864
6865     int num_kpp = kpp.size();
6866     if(ss) (*ss) << ".NP::MakeMetaKVS_ranges2_table kpp.size " << kpp.size() << "\n" ;
6867
6868     // Sort indices into ascending start time order
6869     std::vector<int> ikp(num_kpp);
6870     std::iota(ikp.begin(), ikp.end(), 0);
6871     auto kp_order = [&kpp](const size_t& a, const size_t &b) { return std::get<2>(kpp[a]) < std::get<2>(kpp[b]);}  ;
6872     std::sort(ikp.begin(), ikp.end(), kp_order );
6873
6874
6875     int dbg = 0 ;
6876     int ni = num_kpp ;
6877     int nj = 5 ;
6878
6879     NP* _rr = NP::Make<int64_t>( ni, nj ) ;
6880     int64_t* rr = _rr->values<int64_t>();
6881
6882     int wid = 30 ;
6883     _rr->labels = new std::vector<std::string> { "ta", "tb", "ab", "ia", "ib" } ;
6884
6885     int64_t ab_total = 0 ;
6886
6887     std::vector<std::tuple<int,int64_t>> abs ;
6888
6889     for(int j=0 ; j < num_kpp ; j++)
6890     {
6891         int i = ikp[j] ;
6892
6893         const KP& kp = kpp[i];
6894         int ia = std::get<0>(kp) ;
6895         int ib = std::get<1>(kp) ;
6896         int64_t ta = std::get<2>(kp) ;
6897         int64_t tb = std::get<3>(kp) ;
6898         const char* no = std::get<4>(kp) ;
6899         int ispec = std::get<5>(kp) ;    // when ispec stays the same its a repeated range
6900         const char* spec = specs[ispec].c_str();
6901         _rr->names.push_back(spec);
6902
6903         int64_t ab = tb - ta ;
6904         abs.push_back( {ispec, ab } );
6905
6906         int64_t ab_cumsum = 0 ;  // sum ab so far with the same spec
6907         int ab_cumsum_num = 0 ;
6908         for(int i=0 ; i < int(abs.size()) ; i++)
6909         {
6910             if( std::get<0>(abs[i]) == ispec )
6911             {
6912                 ab_cumsum += std::get<1>(abs[i]) ;
6913                 ab_cumsum_num += 1 ;
6914             }
6915         }
6916         bool rep = ab_cumsum_num > 1 ;
6917
6918
6919         ab_total += ab ;
6920
6921         rr[nj*j+0] = ta ;
6922         rr[nj*j+1] = tb ;
6923         rr[nj*j+2] = ab ;
6924         rr[nj*j+3] = ia ;
6925         rr[nj*j+4] = ib ;
6926
6927         if(ss) (*ss)
6928             << " " << std::setw(wid) << keys[ia]
6929             << " ==> "
6930             << " " << std::setw(wid) << keys[ib]
6931             << "      " << std::setw(16) << std::right << ab
6932             ;
6933
6934         if(ss && dbg > 0 ) (*ss)
6935             << "      " << std::setw(16) << std::right << ta
6936             << "      " << std::setw(16) << std::right << tb
6937             ;
6938
6939         if(ss) (*ss)
6940             << ( rep ? " REP " : "     " )
6941             ;
6942
6943         if(ss && rep) (*ss)
6944             << "      " << std::setw(16) << std::right << ab_cumsum
6945             ;
6946
6947         if(ss && !rep) (*ss)
6948             << "      " << std::setw(16) << ""
6949             ;
6950
6951         if(ss) (*ss)
6952             << ( no == nullptr ? "" : "    ## " ) << ( no ? no : "" )
6953             << std::endl
6954             ;
6955     }
6956
6957     if(ss) (*ss)
6958        << " " << std::setw(wid) << ""
6959        << "     "
6960        << " " << std::setw(wid) << "TOTAL:"
6961        << "      " << std::setw(16) << std::right << ab_total
6962        << std::endl
6963        ;
6964
6965     if(ss) (*ss)
6966        << "]NP::MakeMetaKVS_ranges2_table num_keys:" << keys.size() << "\n"
6967        ;
6968
6969     return _rr ;
6970 }
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980 /**
6981 NP::MakeMetaKVS_ranges
6982 -----------------------
6983
6984 Does not handle repeated keys well, as happens with multi-launch.
6985 See ranges2 which attempts to fix that.
6986
6987 **/
6988
6989 inline NP* NP::MakeMetaKVS_ranges( const std::vector<std::string>& keys, const std::vector<int64_t>& tt, const char* ranges_, std::ostream* ss )
6990 {
6991     std::vector<std::string> specs ;
6992     Resolve_ranges(specs, keys, ranges_, ss );
6993     int num_specs = specs.size();
6994
6995     if(ss) (*ss)
6996        << ".NP::MakeMetaKVS_ranges num_specs:" << num_specs << "\n"
6997        ;
6998
6999     std::vector<int> spec_order(num_specs) ;
7000     TimeOrder_ranges( spec_order, specs, keys, tt, ss );
7001
7002     // present the ranges in order of start time
7003     NP* rr = MakeMetaKVS_ranges_table(spec_order, specs, keys, tt, ss );
7004
7005     return rr ;
7006 }
7007
7008
7009 /**
7010 NP::MakeMetaKVS_ranges2
7011 -------------------------
7012
7013 HMM: with repeated keys which currently happens with multi-launch
7014 this code is unaware of that and just acts on the timestamp of
7015 the first key found. It would be more meaningful to find all the ranges
7016 and sum them, with reporting on how many were summed.
7017
7018 **/
7019
7020 inline NP* NP::MakeMetaKVS_ranges2( const std::vector<std::string>& keys, const std::vector<int64_t>& tt, const char* ranges_, std::ostream* ss )
7021 {
7022     if(ss) (*ss)
7023        << "[NP::MakeMetaKVS_ranges2 num_keys:" << keys.size() << "\n"
7024        ;
7025
7026     std::vector<std::string> specs ;
7027     Resolve_ranges(specs, keys, ranges_, ss );
7028     int num_specs = specs.size();
7029
7030     NP* rr = MakeMetaKVS_ranges2_table(specs, keys, tt, ss );
7031
7032     if(ss) (*ss)
7033        << "]NP::MakeMetaKVS_ranges2 num_specs:" << num_specs << " rr " << ( rr ? rr->sstr() : "-" ) << "\n"
7034        ;
7035
7036     return rr ;
7037 }
7038
7039
7040
7041
7042
7043
7044
7045 inline std::string NP::DescMetaKVS_kvs(const std::vector<std::string>& keys, const std::vector<std::string>& vals, const std::vector<int64_t>& tt )  // static
7046 {
7047     int num_keys = keys.size() ;
7048     if(num_keys == 0) return "" ;
7049
7050     // sort indices into increasing time order
7051     // non-timestamped lines with placeholder timestamp zero will come first
7052     std::vector<int> ii(num_keys);
7053     std::iota(ii.begin(), ii.end(), 0);  // init to 0,1,2,3,..., num_keys-1
7054     auto order = [&tt](const size_t& a, const size_t &b) { return tt[a] < tt[b];}  ;
7055     std::sort(ii.begin(), ii.end(), order );
7056
7057     std::stringstream ss ;
7058     ss.imbue(std::locale("")) ;  // commas for thousands
7059
7060     int64_t t_first = 0 ;
7061     int64_t t_second = 0 ;
7062     int64_t t_prev  = 0 ;
7063
7064     int t_count = 0 ;
7065
7066     ss
7067         << "[NP::DescMetaKVS_kvs "
7068         << " keys.size " << keys.size()
7069         << " vals.size " << vals.size()
7070         << " tt.size "   << tt.size()
7071         << " num_keys " << num_keys
7072         << "\n"
7073         ;
7074
7075     for(int j=0 ; j < num_keys ; j++)
7076     {
7077         int i = ii[j] ;
7078         const char* k = keys[i].c_str();
7079         const char* v = vals[i].c_str();
7080         int64_t     t = tt[i] ;
7081
7082         if(t_first > 0 && t_second == 0 && t > 0 ) t_second = t  ;
7083         if(t_first == 0 && t > 0 ) t_first = t  ;
7084
7085         int64_t dt0 = t > 0 && t_first  > 0 ? t - t_first  : -1 ; // microseconds since first
7086         int64_t dt1 = t > 0 && t_second > 0 ? t - t_second : -1 ; // microseconds since second
7087         int64_t dt  = t > 0 && t_prev   > 0 ? t - t_prev   : -1 ; // microseconds since previous stamp
7088         if(t > 0) t_prev = t ;
7089         if(t > 0) t_count += 1 ;
7090         if(t_count == 1 ) ss
7091             << "\n"
7092             << std::setw(30) << "k"
7093             << " : "
7094             << std::setw(35) << "v"
7095             << "   "
7096             << std::setw(27) << (  "t:microsecond"  )
7097             << " " << std::setw(11) << "dt0:(t-t0)"
7098             << " " << std::setw(11) << "dt1:(t-t1)"
7099             << " " << std::setw(11) << "dt:(t-tpr)"
7100             << std::endl
7101             ;
7102
7103
7104         ss << std::setw(30) << k
7105            << " : "
7106            << std::setw(35) << v
7107            << "   "
7108            << std::setw(27) << (  t > 0 ? U::Format(t) : "" )
7109            << " " << std::setw(11) << U::FormatInt(dt0, 11)
7110            << " " << std::setw(11) << U::FormatInt(dt1, 11)
7111            << " " << std::setw(11) << U::FormatInt(dt , 11 )
7112            << std::endl
7113            ;
7114     }
7115     ss << "]NP::DescMetaKVS_kvs\n" ;
7116     std::string str = ss.str();
7117     return str ;
7118 }
7119
7120
7121 /**
7122 NP::DescMetaKVS_juncture
7123 -------------------------
7124
7125 Example juncture::
7126
7127     SEvt__Init_RUN_META,SEvt__BeginOfRun,SEvt__EndOfRun,SEvt__Init_RUN_META
7128
7129 1. split juncture string on comma delimiters
7130 2. loop over juncture j_key looking up the corresponding KeyIndex
7131 3. report deltas between the timestamps looked up from the juncture keys
7132
7133 Essentially this is just a selected key version of the full descMetaKVS
7134 listing that precedes it which can be easier to understand with careful
7135 choice of juncture keys appropriate for the parts of the code that
7136 take the time. The ordering is entirely from the input juncture key
7137 order with no sorting. So delta times can be negative.
7138
7139 **/
7140
7141
7142 inline std::string NP::DescMetaKVS_juncture( const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* juncture_ )
7143 {
7144     assert(juncture_ && strlen(juncture_) > 0);
7145
7146     int it_first = std::distance(std::begin(tt), std::min_element(std::begin(tt), std::end(tt)));
7147     int64_t t0 = tt[it_first];
7148
7149     std::vector<std::string> juncture ;
7150     Split(juncture, juncture_ , ',' );
7151     INT num_juncture = juncture.size() ;
7152
7153     std::stringstream ss ;
7154     ss << "[NP::DescMetaKVS_juncture\n" ;
7155     ss << "num_juncture " << num_juncture << "\n" ;
7156     ss << "juncture [" << juncture_ << "] time ranges between junctures" << std::endl ;
7157     ss.imbue(std::locale("")) ;  // commas for thousands
7158
7159
7160     ss << std::setw(30) << "k"
7161        << " : "
7162        << std::setw(12) << "dtp"
7163        << std::setw(23) << ""
7164        << " : "
7165        << std::setw(12) << "dt0"
7166        << " : "
7167        << "timestamp"
7168        << std::endl
7169        ;
7170
7171     int64_t tp = 0 ;
7172     for(int j=0 ; j < num_juncture ; j++)
7173     {
7174         const char* j_key = juncture[j].c_str() ;
7175         int i = U::KeyIndex(keys, j_key) ;
7176         if( i == -1 ) continue ;
7177
7178         const char* k = keys[i].c_str();
7179         int64_t t = tt[i] ;
7180
7181         int64_t dtp = ( t > 0 && tp > 0 ) ? t - tp : -1  ;
7182         int64_t dt0 = ( t > 0 && t0 > 0 ) ? t - t0 : -1 ;
7183
7184         ss << std::setw(30) << k
7185            << " : "
7186            << std::setw(12) << dtp
7187            << std::setw(23) << ""
7188            << " : "
7189            << std::setw(12) << dt0
7190            << " : "
7191            << U::Format(t)
7192            << " JUNCTURE"
7193            << std::endl
7194            ;
7195
7196          if( t > 0 ) tp = t ;
7197     }
7198     ss << "]NP::DescMetaKVS_juncture\n" ;
7199     std::string str = ss.str();
7200     return str ;
7201 }
7202
7203 /**
7204 NP::DescMetaKVS_ranges
7205 ------------------------
7206
7207 This does not handle repeated keys, eg from multi-launch running.
7208
7209 **/
7210
7211 inline std::string NP::DescMetaKVS_ranges( const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* ranges_ )
7212 {
7213     std::stringstream ss ;
7214     ss << "[NP::DescMetaKVS_ranges\n"
7215        << "[ranges\n"
7216        << ( ranges_ ? ranges_ : "-" )
7217        << "]ranges\n"
7218        ;
7219
7220     NP* a = MakeMetaKVS_ranges(keys, tt, ranges_ , &ss );
7221
7222     ss << "]NP::DescMetaKVS_ranges"
7223        << " a " << ( a ? a->sstr() : "-" )
7224        << "\n"
7225        ;
7226
7227     std::string str = ss.str();
7228     return str ;
7229 }
7230
7231
7232 /**
7233 NP::DescMetaKVS_ranges2
7234 ------------------------
7235
7236 This attempts to handle repeated keys reasonably, eg from multi-launch running.
7237
7238 **/
7239
7240 inline std::string NP::DescMetaKVS_ranges2( const std::vector<std::string>& keys, std::vector<int64_t>& tt, const char* ranges_ )
7241 {
7242     std::stringstream ss ;
7243     ss << "[NP::DescMetaKVS_ranges2\n"
7244        << "[ranges\n"
7245        << ( ranges_ ? ranges_ : "-" )
7246        << "]ranges\n"
7247        ;
7248
7249     NP* a = MakeMetaKVS_ranges2(keys, tt, ranges_ , &ss );
7250
7251     ss << "]NP::DescMetaKVS_ranges2"
7252        << " a " << ( a ? a->sstr() : "-" )
7253        << "\n"
7254        ;
7255
7256     std::string str = ss.str();
7257     return str ;
7258 }
7259
7260
7261
7262
7263
7264 /**
7265 NP::DescMetaKVS
7266 ----------------
7267
7268 1. GetMetaKVS extracting key, val pairs and microsecond timestamps,
7269    lines without 16 digit timestamps have placeholder timestamps of zero
7270 2. std::iota create vector of indices 0,1,2,3...num_key - 1
7271 3. sort indices into increasing timestamp order, all placeholder zeros will be at start
7272 4. report time stamps with deltas from 1st, 2nd and previous
7273 5. DescMetaKVS_juncture
7274 6. DescMetaKVS_ranges
7275
7276 **/
7277
7278
7279 inline std::string NP::DescMetaKVS(const std::string& meta, const char* juncture_ , const char* ranges_ )  // static
7280 {
7281     VS keys ;
7282     VS vals ;
7283     VT tt ;
7284
7285     bool only_with_stamp = false ;
7286
7287     U::GetMetaKVS(meta, &keys, &vals, &tt, only_with_stamp );
7288
7289     assert( keys.size() == vals.size() );
7290     assert( keys.size() == tt.size() );
7291     assert( tt.size() == keys.size() );
7292
7293     std::stringstream ss ;
7294     ss << "[NP::DescMetaKVS only_with_stamp : " << ( only_with_stamp ? "YES" : "NO " ) << "\n" ;
7295
7296     ss << DescMetaKVS_kvs( keys, vals, tt ) ;
7297     if(juncture_ && strlen(juncture_) > 0 ) ss << DescMetaKVS_juncture(keys, tt, juncture_ );
7298     if(ranges_ && strlen(ranges_) > 0 )     ss << DescMetaKVS_ranges2(keys, tt, ranges_ );
7299
7300     ss << "]NP::DescMetaKVS\n" ;
7301
7302     std::string str = ss.str();
7303     return str ;
7304 }
7305
7306
7307 inline std::string NP::descMetaKVS(const char* juncture_, const char* ranges_) const
7308 {
7309
7310     bool dump_meta = false ;
7311
7312     std::stringstream ss ;
7313     ss << "[NP::descMetaKVS\n" ;
7314
7315     if(dump_meta) ss
7316        << "[meta\n"
7317        << meta
7318        << "]meta\n"
7319        ;
7320
7321     ss
7322        << DescMetaKVS(meta, juncture_, ranges_)
7323        << "]NP::descMetaKVS\n"
7324        ;
7325     std::string str = ss.str();
7326     return str ;
7327 }
7328
7329
7330
7331
7332
7333
7334
7335
7336 inline std::string NP::DescMetaKV(const std::string& meta, const char* juncture_, const char* ranges_ )  // static
7337 {
7338     std::vector<std::string> keys ;
7339     std::vector<std::string> vals ;
7340     bool only_with_profile = false ;
7341     GetMetaKV(meta, &keys, &vals, only_with_profile );
7342     assert( keys.size() == vals.size() );
7343     INT num_keys = keys.size();
7344
7345     int64_t t0 = std::numeric_limits<int64_t>::max() ;
7346     std::vector<int64_t> tt ;
7347     std::vector<INT> ii ;
7348
7349     // collect times and indices of all entries
7350     // time is set to zero for entries without time stamps
7351     for(INT i=0 ; i < num_keys ; i++)
7352     {
7353         const char* v = vals[i].c_str();
7354         bool looks_like_stamp = U::LooksLikeStampInt(v);
7355         bool looks_like_prof  = U::LooksLikeProfileTriplet(v);
7356         int64_t t = 0 ;
7357         if(looks_like_stamp) t = U::To<int64_t>(v) ;
7358         if(looks_like_prof)  t = strtoll(v, nullptr, 10);
7359         tt.push_back(t);
7360         ii.push_back(i);
7361         if(t > 0 && t < t0) t0 = t ;
7362     }
7363
7364     // sort the indices into time increasing order
7365     auto order = [&tt](const size_t& a, const size_t &b) { return tt[a] < tt[b];}  ;
7366     std::sort( ii.begin(), ii.end(), order );
7367
7368
7369     std::stringstream ss ;
7370     ss.imbue(std::locale("")) ;  // commas for thousands
7371
7372     // use the time sorted indices to output in time order
7373     // entries without time info at t=0 appear first
7374     for(INT j=0 ; j < num_keys ; j++)
7375     {
7376         INT i = ii[j] ;
7377         const char* k = keys[i].c_str();
7378         const char* v = vals[i].c_str();
7379         int64_t t = tt[i] ;
7380
7381         ss << std::setw(30) << k
7382            << " : "
7383            << std::setw(35) << v
7384            << " : "
7385            << std::setw(12) << ( t > 0 ? t - t0 : -1 )
7386            << " : "
7387            << ( t > 0 ? U::Format(t) : "" )
7388            << std::endl
7389            ;
7390     }
7391
7392
7393
7394     if(juncture_ && strlen(juncture_) > 0)
7395     {
7396         std::vector<std::string> juncture ;
7397         Split(juncture, juncture_ , ',' );
7398         INT num_juncture = juncture.size() ;
7399         ss << "juncture:" << num_juncture << " [" << juncture_ << "] time ranges between junctures" << std::endl ;
7400
7401         int64_t tp = 0 ;
7402         for(INT j=0 ; j < num_juncture ; j++)
7403         {
7404             const char* j_key = juncture[j].c_str() ;
7405             INT i = std::distance( keys.begin(), std::find(keys.begin(), keys.end(), j_key )) ;
7406             if( i == INT(keys.size()) ) continue ;
7407
7408             const char* k = keys[i].c_str();
7409             //const char* v = vals[i].c_str();
7410             int64_t t = tt[i] ;
7411
7412             ss << std::setw(30) << k
7413                << " : "
7414                << std::setw(12) << ( t > 0 && tp > 0 ? t - tp : -1 )
7415                << std::setw(23) << ""
7416                << " : "
7417                << std::setw(12) << ( t > 0 && t0 > 0 ? t - t0 : -1 )
7418                << " : "
7419                << U::Format(t)
7420                << " JUNCTURE"
7421                << std::endl
7422                ;
7423
7424              if( t > 0 ) tp = t ;
7425         }
7426     }
7427     ss << " ranges_[" << ( ranges_ ? ranges_ : "-" ) << "]\n";
7428     std::string str = ss.str();
7429     return str ;
7430 }
7431
7432 inline std::string NP::descMetaKV(const char* juncture, const char* ranges) const
7433 {
7434     std::stringstream ss ;
7435     ss << "NP::descMetaKV"
7436        << std::endl
7437        << DescMetaKV(meta, juncture, ranges)
7438        ;
7439     std::string str = ss.str();
7440     return str ;
7441 }
7442
7443
7444
7445 inline const char* NP::get_lpath() const
7446 {
7447     return lpath.c_str() ? lpath.c_str() : "-" ;
7448 }
7449
7450
7451 template<typename T>
7452 inline NP::INT NP::DumpCompare( const NP* a, const NP* b , unsigned a_column, unsigned b_column, const T epsilon ) // static
7453 {
7454     const T* aa = a->cvalues<T>();
7455     const T* bb = b->cvalues<T>();
7456
7457     unsigned a_ndim = a->shape.size() ;
7458     unsigned a_ni = a->shape[0] ;
7459     unsigned a_nj = a_ndim == 1 ? 1 : a->shape[1] ;
7460     assert( a_column < a_nj );
7461
7462     unsigned b_ndim = b->shape.size() ;
7463     unsigned b_ni = b->shape[0] ;
7464     unsigned b_nj = b_ndim == 1 ? 1 : b->shape[1] ;
7465     assert( b_column < b_nj );
7466
7467     assert( a_ni == b_ni );
7468
7469     T av_sum = 0. ;
7470     T bv_sum = 0. ;
7471     INT mismatch = 0 ;
7472
7473     for(unsigned i=0 ; i < a_ni ; i++)
7474     {
7475         const T av = aa[a_nj*i+a_column] ;
7476         const T bv = bb[b_nj*i+b_column] ;
7477         av_sum += av ;
7478         bv_sum += bv ;
7479
7480         bool is_diff = std::abs(av-bv) > epsilon ;
7481         if(is_diff) std::cout
7482             << std::setw(4) << i
7483             << " a " << std::setw(10) << std::fixed << std::setprecision(4) << av
7484             << " b " << std::setw(10) << std::fixed << std::setprecision(4) << bv
7485             << " a-b " << std::setw(10) << std::fixed << std::setprecision(4) << av-bv
7486             << std::endl
7487             ;
7488         if(is_diff) mismatch += 1 ;
7489     }
7490     if(mismatch > 0) std::cout
7491         << "NP::DumpCompare "
7492         << std::setw(4) << "sum"
7493         << " a " << std::setw(10) << std::fixed << std::setprecision(4) << av_sum
7494         << " b " << std::setw(10) << std::fixed << std::setprecision(4) << bv_sum
7495         << " a-b " << std::setw(10) << std::fixed << std::setprecision(4) << av_sum-bv_sum
7496         << " mismatch " << mismatch
7497         << std::endl
7498         ;
7499     return mismatch ;
7500 }
7501
7502 /**
7503 NP::Memcmp
7504 -----------
7505
7506 * -1: array lengths differ
7507 * 0:bytes of the two arrays match
7508 * other value indicating the array bytes differ
7509
7510 **/
7511
7512 inline NP::INT NP::Memcmp(const NP* a, const NP* b ) // static
7513 {
7514     unsigned a_bytes = a ? a->arr_bytes() : 0 ;
7515     unsigned b_bytes = b ? b->arr_bytes() : 0 ;
7516     return a_bytes == b_bytes && a_bytes > 0 ? memcmp(a->bytes(), b->bytes(), a_bytes) : -1 ;
7517 }
7518
7519 inline bool NP::SameData( const NP* a, const NP* b )
7520 {
7521     return 0 == Memcmp(a, b );
7522 }
7523
7524 /**
7525 NP::Concatenate
7526 ----------------
7527
7528 Load the named NP arrays from directory and concatenate them in name order
7529
7530 **/
7531
7532
7533 inline NP* NP::Concatenate(const char* dir, const std::vector<std::string>& names) // static
7534 {
7535     std::vector<NP*> aa ;
7536     for(unsigned i=0 ; i < names.size() ; i++)
7537     {
7538          const char* name = names[i].c_str();
7539          NP* a = Load(dir, name);
7540          aa.push_back(a);
7541     }
7542     NP* concat = NP::Concatenate(aa);
7543     return concat ;
7544 }
7545
7546 /**
7547 NP::Concatenate
7548 ----------------
7549
7550 * template allows same code to work with both "NP" and "const NP"
7551 * arrays must have the same number of itemvalues, ie values after first dimension
7552
7553
7554 **/
7555
7556 template<typename T>
7557 inline NP* NP::Concatenate(const std::vector<T*>& aa )  // static
7558 {
7559     [[maybe_unused]] INT num_a = aa.size();
7560     assert( num_a > 0 );
7561     auto a0 = aa[0] ;
7562
7563     unsigned nv0 = a0->num_itemvalues() ;
7564     const char* dtype0 = a0->dtype ;
7565
7566     for(unsigned i=0 ; i < aa.size() ; i++)
7567     {
7568         auto a = aa[i] ;
7569
7570         unsigned nv = a->num_itemvalues() ;   // values after first dimension, eg 16 for (n,4,4)
7571         bool compatible = nv == nv0 && strcmp(dtype0, a->dtype) == 0 ;
7572         if(!compatible)
7573             std::cout
7574                 << "NP::Concatenate FATAL expecting equal itemsize"
7575                 << " nv " << nv
7576                 << " nv0 " << nv0
7577                 << " a.dtype " << a->dtype
7578                 << " dtype0 " << dtype0
7579                 << std::endl
7580                 ;
7581         assert(compatible);
7582
7583         if(VERBOSE) std::cout << "NP::Concatenate " << std::setw(3) << i << " " << a->desc() << " nv " << nv << std::endl ;
7584     }
7585
7586     UINT ni_total = 0 ;
7587     for(unsigned i=0 ; i < aa.size() ; i++) ni_total += aa[i]->shape[0] ;
7588     if(VERBOSE) std::cout << "NP::Concatenate ni_total " << ni_total << std::endl ;
7589
7590     std::vector<INT> comb_shape ;
7591     NPS::copy_shape( comb_shape, a0->shape );
7592     comb_shape[0] = ni_total ;
7593
7594     if(VERBOSE) std::cout << "NP::Concatenate c = new NP " << std::endl ;
7595     NP* c = new NP(a0->dtype);
7596     if(VERBOSE) std::cout << "NP::Concatenate c.set_shape " << std::endl ;
7597     c->set_shape(comb_shape);
7598     if(VERBOSE) std::cout << "NP::Concatenate c " << c->desc() << std::endl ;
7599
7600     UINT offset_bytes = 0 ;   // uint64_t needed here to avoid clocking offset_bytes for large array handling
7601     for(unsigned i=0 ; i < aa.size() ; i++)
7602     {
7603         auto a = aa[i];
7604         UINT a_bytes = a->uarr_bytes() ;
7605         memcpy( c->data.data() + offset_bytes ,  a->data.data(),  a_bytes );
7606         offset_bytes += a_bytes ;
7607         // clocking offset_bytes here (when used only 32 bit unsigned) resulted in the tail of the array
7608         // being unfilled (left as zero) and the addressed portion of the array being overwritten
7609         // potentially multiple times
7610     }
7611     return c ;
7612 }
7613
7614 /**
7615 NP::Combine
7616 ------------
7617
7618 Combines 2d arrays with different item counts using the largest item count plus one
7619 for the middle dimension of the resulting 3d array.
7620
7621 For example a combination of 2d arrays with shapes: (n0,m) (n1,m) (n2,m) (n3,m) (n4,m)
7622 yields an output 3d array with shape: (5, 1+max(n0,n1,n2,n3,n4), m )
7623 The extra "1+" column is used for including annotation of the n0, n1, n2, n3, n4  values
7624 within the output array.
7625
7626 The canonical usage is for combination of paired properties with m=2 however
7627 the implementation could easily be generalized to work with higher dimensions if necessary.
7628
7629 Note that if the n0,n1,n2,... dimensions are very different then the combined array will
7630 be inefficient with lots of padding so it makes sense to avoid large differences.
7631 When all the n are equal the annotation and padding could be disabled by setting annotate=false.
7632
7633 See also:
7634
7635 tests/NPInterp.py:np_irregular_combine
7636     python prototype
7637
7638 test/NPCombineTest.cc
7639     testing this NP::Combine and NP::interp on the combined array
7640
7641
7642 annotate:true && parasite!=nullptr
7643     parasite array:
7644
7645     * must be 1d
7646     * same length as aa vector : ie one value is provided per input array
7647     * same dtype as the arrays
7648
7649     The single parasitic values per input array are incorporated
7650     into the -2 item slot in the combined array.
7651
7652 **/
7653 inline NP* NP::Combine(const std::vector<const NP*>& aa, bool annotate, const NP* parasite)  // static
7654 {
7655     assert( aa.size() > 0 );
7656     const NP* a0 = aa[0] ;
7657
7658     const char* dtype0 = a0->dtype ;
7659     INT ebyte0 = a0->ebyte ;
7660     unsigned ndim0 = a0->shape.size() ;
7661     unsigned ldim0 = a0->shape[ndim0-1] ;
7662     unsigned fdim_mx = a0->shape[0] ;
7663
7664     for(unsigned i=1 ; i < aa.size() ; i++)
7665     {
7666         const NP* a = aa[i];
7667         bool dtype_expect = strcmp( a->dtype, dtype0 ) == 0  ;
7668         if(!dtype_expect) std::cerr << "NP::Combine : input arrays must all have same dtype " << std::endl;
7669         assert( dtype_expect );
7670
7671         unsigned ndim = a->shape.size() ;
7672         bool ndim_expect = ndim == ndim0  ;
7673         if(!ndim_expect) std::cerr << "NP::Combine : input arrays must all have an equal number of dimensions " << std::endl;
7674         assert( ndim_expect );
7675
7676         unsigned ldim = a->shape[ndim-1] ;
7677         bool ldim_expect = ldim == ldim0 ;
7678         if(!ldim_expect) std::cerr << "NP::Combine : last dimension of the input arrays must be equal " << std::endl ;
7679         assert( ldim_expect );
7680
7681         unsigned fdim = a->shape[0] ;
7682         if( fdim > fdim_mx ) fdim_mx = fdim ;
7683     }
7684
7685
7686     if(parasite)
7687     {
7688         assert( parasite->shape.size() == 1 && parasite->shape[0] == INT(aa.size()) );
7689         assert( strcmp( parasite->dtype, dtype0) == 0 && "parasite arrays must have same dtype as those being combined" );
7690     }
7691
7692     unsigned width = fdim_mx + unsigned(annotate) ;
7693     assert( ldim0 == 2 && "last dimension must currently be 2");
7694
7695     NP* c = new NP(a0->dtype, aa.size(), width, ldim0 );
7696     unsigned item_bytes = c->item_bytes();
7697
7698     if(VERBOSE) std::cout
7699         << "NP::Combine"
7700         << " ebyte0 " << ebyte0
7701         << " item_bytes " << item_bytes
7702         << " aa.size " << aa.size()
7703         << " width " << width
7704         << " ldim0 " << ldim0
7705         << " c " << c->desc()
7706         << std::endl
7707         ;
7708
7709     assert( item_bytes % ebyte0 == 0 );
7710     unsigned item_values = item_bytes/ebyte0 ;
7711
7712     unsigned offset_bytes = 0 ;
7713     for(unsigned i=0 ; i < aa.size() ; i++)
7714     {
7715         const NP* a = aa[i];
7716         unsigned a_bytes = a->arr_bytes() ;
7717
7718         memcpy( c->data.data() + offset_bytes ,  a->data.data(),  a_bytes );
7719
7720         // NB: a_bytes may be less than item_bytes
7721         // effectively are padding to allow ragged arrays to be handled together
7722
7723         offset_bytes += item_bytes ;
7724     }
7725
7726     if( annotate )
7727     {
7728         if( ebyte0 == 4 )
7729         {
7730             float* cc = c->values<float>();
7731             const float* pp = parasite ? parasite->cvalues<float>() : nullptr ;
7732
7733             UIF32 uif32 ;
7734             for(unsigned i=0 ; i < aa.size() ; i++)
7735             {
7736                 const NP* a = aa[i];
7737                 uif32.u = a->shape[0] ;
7738                 if(VERBOSE) std::cout << "NP::Combine annotate " << i << " uif32.u  " << uif32.u  << std::endl ;
7739                 *(cc + (i+1)*item_values - 1) = uif32.f ;
7740                 if(pp) *(cc + (i+1)*item_values - 2) = pp[i] ;
7741                 // (i+1)*item_bytes/ebyte0 is off the edge, then -1 to be the last value
7742             }
7743         }
7744         else if( ebyte0 == 8 )
7745         {
7746             double* cc = c->values<double>();
7747             const double* pp = parasite ? parasite->cvalues<double>() : nullptr ;
7748
7749             UIF64 uif64 ;
7750             for(unsigned i=0 ; i < aa.size() ; i++)
7751             {
7752                 const NP* a = aa[i];
7753                 uif64.u = a->shape[0] ;
7754                 if(VERBOSE) std::cout << "NP::Combine annotate " << i << " uif64.u  " << uif64.u  << std::endl ;
7755                 *(cc + (i+1)*item_values - 1) = uif64.f ;
7756                 if(pp) *(cc + (i+1)*item_values - 2) = pp[i] ;
7757             }
7758
7759             c->set_preserve_last_column_integer_annotation() ;
7760             // make the annotation survive MakeNarrow
7761             // (currently annotation is scrubbed by MakeWide but could be easily be implented)
7762         }
7763     }
7764     return c ;
7765 }
7766
7767 template<typename... Args> inline NP* NP::Combine_(Args ... args)  // Combine_ellipsis
7768 {
7769     std::vector<const NP*> aa = {args...};
7770     bool annotate = true ;
7771     return Combine(aa, annotate);
7772 }
7773
7774
7775 inline bool NP::Exists(const char* base, const char* rel,  const char* name) // static
7776 {
7777     std::string path = U::form_path(base, rel, name);
7778     return Exists(path.c_str());
7779 }
7780 inline bool NP::Exists(const char* dir, const char* name) // static
7781 {
7782     std::string path = U::form_path(dir, name);
7783     return Exists(path.c_str());
7784 }
7785 inline bool NP::Exists(const char* path_) // static
7786 {
7787     const char* path = U::Resolve(path_);
7788     std::ifstream fp(path, std::ios::in|std::ios::binary);
7789     return fp.fail() ? false : true ;
7790 }
7791
7792 inline bool NP::ExistsSidecar( const char* path, const char* ext ) // static
7793 {
7794     std::string vstr_path = U::ChangeExt(path, ".npy", ext );
7795     return Exists(vstr_path.c_str()) ;
7796 }
7797
7798
7799
7800 inline bool NP::IsNoData(const char* path) // static
7801 {
7802     return path && strlen(path) > 0 && path[0] == NODATA_PREFIX ;
7803 }
7804
7805 inline const char* NP::PathWithNoDataPrefix(const char* path) // static
7806 {
7807     if(path == nullptr) return nullptr ;
7808     if(IsNoData(path)) return path ;   // dont add prefix if one already present
7809
7810     std::stringstream ss ;
7811     ss << NODATA_PREFIX << path ;
7812     std::string str = ss.str() ;
7813     return strdup(str.c_str());
7814 }
7815
7816
7817
7818
7819 /**
7820 NP::load(const char*, const char*)
7821 -------------------------------------
7822
7823 Formerly used this signature for dir/name loading but as that
7824 is now done at static level are repurposing to do both ordinary
7825 and slice loading.
7826
7827 Formerly read an arbitrary initial buffer size,
7828 are now reading up to first newline, which marks the
7829 end of the header, then adding the newline to the
7830 header string for correctness as getline consumes the
7831 newline from the stream without returning it.
7832
7833 **/
7834
7835 inline int NP::load(const char* _path, const char* _sli )
7836 {
7837     if(VERBOSE) std::cerr << "[ NP::load [" << ( _path ? _path : "-" ) << "]\n" ;
7838
7839     std::ifstream* fp = load_header(_path, _sli);
7840     if( fp == nullptr )
7841     {
7842         std::cerr << "NP::load Failed to load from path [" << ( _path ? _path : "-" ) << "]\n" ;
7843         //std::raise(SIGINT);
7844         return 1 ; // SIGINT might have a handler
7845     }
7846     load_data( fp, _sli );
7847     delete fp ;
7848
7849     const char* path = lpath.c_str();
7850     load_meta( path );
7851     load_names( path );
7852     load_labels( path );
7853
7854     if(VERBOSE) std::cerr << "] NP::load [" << ( _path ? _path : "-" ) << "]\n" ;
7855     return 0 ;
7856 }
7857
7858 inline int NP::load_from_buffer(const char* buffer, size_t size)
7859 {
7860     size_t loaded = 0 ;
7861     loaded = load_header_from_buffer(  buffer, size);
7862     loaded = load_data_from_buffer( buffer, size, loaded );
7863     loaded = load_meta_from_buffer( buffer, size, loaded );
7864     return loaded == size ? 0 : 1  ;
7865 }
7866
7867
7868
7869
7870
7871 inline std::ifstream* NP::load_header(const char* _path, const char* _sli)
7872 {
7873     nodata = IsNoData(_path) ;  // _path starting with NODATA_PREFIX currently '@'
7874     const char* path = nodata ? _path + 1 : _path ;
7875
7876     lpath = path ;  // loadpath
7877     lfold = U::DirName(path);
7878
7879     std::ifstream* fp = new std::ifstream(path, std::ios::in|std::ios::binary);
7880     if(fp->fail())
7881     {
7882         std::cerr << "NP::load_header std::ifstream FAIL for path [" << ( path ? path : "-" ) << "]\n" ;
7883         delete fp ;
7884         return nullptr ;
7885     }
7886
7887     std::getline(*fp, _hdr );
7888     _hdr += '\n' ;
7889
7890     bool data_resize = !nodata && _sli == nullptr ; // DEFER data resize when active slice
7891     decode_header(data_resize);
7892
7893     return fp ;
7894 }
7895
7896 inline size_t NP::load_header_from_buffer(const char* buffer, size_t size)
7897 {
7898     char q = '\n' ;
7899     size_t pos = FindChar(buffer, size, q);
7900     if( pos == 0 || pos + 1 == size ) return 0 ;
7901
7902     nodata = false ;
7903     lpath = "load_from_buffer" ;
7904     lfold = "" ;
7905
7906     _hdr.resize( pos + 1 ); // include '\n' in the hdr
7907     _hdr.assign( buffer, pos+1 );
7908
7909     if(0) std::cout << "NP::load_header_from_buffer _hdr[\n" << HexDump(_hdr) << "]\n" ;
7910
7911     bool data_resize = true ;
7912     decode_header( data_resize );
7913
7914     return pos + 1 ;
7915 }
7916
7917 inline size_t NP::load_data_from_buffer( const char* buffer, size_t /*size*/, size_t pos )
7918 {
7919      size_t data_size = arr_bytes() ; // available after parsing header
7920      memcpy( bytes(),  buffer + pos, data_size );
7921      return pos + data_size ;
7922 }
7923
7924 inline size_t NP::load_meta_from_buffer( const char* buffer, size_t size, size_t pos )
7925 {
7926      size_t meta_size = size - uhdr_bytes() - arr_bytes() ; // available after parsing header
7927      if(pos > size )             throw std::out_of_range("Invalid buffer pos");
7928      if(meta_size + pos > size ) throw std::out_of_range("Invalid meta_size");
7929      /*
7930      meta.resize( meta_size );
7931      memcpy( meta.data(),  buffer + pos, meta_size );
7932      */
7933
7934      meta.assign(buffer+pos, meta_size);
7935
7936      return pos + meta_size ;
7937 }
7938
7939
7940
7941
7942
7943
7944
7945 inline bool NP::HasChar(const char* buffer, size_t size, char q)  // static
7946 {
7947     const char* qptr = (const char*)memchr(buffer, q, size);
7948     return qptr != nullptr ;
7949 }
7950 inline size_t NP::FindChar(const char* buffer, size_t size, char q)  // static
7951 {
7952     const char* qptr = (const char*)memchr(buffer, q, size);
7953     return qptr ? (size_t)(qptr - buffer) : std::numeric_limits<size_t>::max() ;
7954 }
7955
7956 /**
7957 NP::FindChar_
7958 ----------------
7959
7960 Usage::
7961
7962      auto result = NP::FindChar("hello", 5, 'l')
7963      if( result.has_value() ) std::cout << " HAS VALUE : " << result.value()  << "\n";
7964      else                     std::cout << "Character not found\n";
7965
7966
7967 **/
7968
7969 inline std::optional<size_t> NP::FindChar_(const char* buffer, size_t size, char q)
7970 {
7971     const char* qptr = (const char*)memchr(buffer, q, size);
7972     return qptr ? std::optional<size_t>(qptr - buffer) : std::nullopt;
7973 }
7974
7975
7976
7977
7978
7979
7980 /**
7981 NP::load_data
7982 ---------------
7983
7984 Invoked by NP::load
7985
7986 **/
7987
7988
7989 inline void NP::load_data( std::ifstream* fp, const char* _sli )
7990 {
7991     if(nodata && VERBOSE) std::cerr << "NP::load_data SKIP reading data as nodata:true : data.size() " << data.size() << "\n" ;
7992     if(nodata) return ;
7993
7994     if(LooksLikeSliceIndexStringIsEmpty(_sli) )  // eg nullptr OR "" OR "[]"
7995     {
7996         fp->read(bytes(), arr_bytes() );
7997     }
7998     else
7999     {
8000         if(LooksLikeSliceIndexString(_sli ))  // eg _sli "[0:10]"
8001         {
8002             load_data_sliced( fp, _sli );
8003         }
8004         else                                  // eg _sli "/tmp/w54.npy[0:1]"
8005         {
8006             load_data_where( fp, _sli );
8007         }
8008     }
8009 }
8010
8011
8012
8013
8014
8015 /**
8016 NP::load_data_sliced
8017 ----------------------
8018
8019 1. parse *_sli* into NP_slice : (start,stop,step)
8020 2. determine number of array items that will be present after slicing
8021 3. change array shape to conform to slicing
8022 4. read only the slice specified items into the data vector
8023
8024 **/
8025
8026 inline void NP::load_data_sliced( std::ifstream* fp, const char* _sli )
8027 {
8028     NP_slice<INT> sli = {} ;
8029     parse_slice<INT>(sli, _sli);
8030
8031     INT count0 = 0 ;
8032     for(INT idx=sli.start ; idx < sli.stop ; idx += sli.step ) count0 += 1 ;
8033     INT sliced_ni = count0 ;
8034
8035     std::string sstr_0 = sstr();
8036     bool data_resize = true ;
8037     _change_shape_ni(sliced_ni, data_resize);
8038     std::string sstr_1 = sstr();
8039
8040     // read only the slice specified items
8041
8042     INT hdrsize = hdr_bytes() ;  // NB not same as  strlen(_hdr.c_str())
8043     INT itemsize = item_bytes();
8044
8045     if(VERBOSE)
8046     std::cout
8047         << "NP::load_data_sliced"
8048         << " hdrsize " << hdrsize
8049         << " strlen(_hdr.c_str() " << strlen(_hdr.c_str())
8050         << " itemsize " << itemsize
8051         << "\n"
8052         ;
8053
8054     INT count = 0 ;
8055     for(INT idx=sli.start ; idx < sli.stop ; idx += sli.step )
8056     {
8057         fp->seekg( hdrsize + idx*itemsize );  // move file pointer to *idx* item
8058         fp->read( bytes() + count*itemsize, itemsize );
8059         count += 1 ;
8060     }
8061     assert( count == sliced_ni );
8062
8063     if(VERBOSE)
8064     std::cout
8065         << "NP::load_data_sliced\n"
8066         << " _sli " << _sli << "\n"
8067         << " sli " << sli.desc() << "\n"
8068         << " sstr_0 " << sstr_0 << "\n"
8069         << " sstr_1 " << sstr_1 << "\n"
8070         << " sliced_ni  " << sliced_ni << "\n"
8071         << "\n"
8072         ;
8073 }
8074
8075
8076 /**
8077 NP::load_data_where
8078 --------------------
8079
8080 Example spec that would cause this to be called::
8081
8082     /tmp/w54.npy           ## first loads array of indices that controls which items to load
8083     /tmp/w54.npy[0:1]      ## first loads slice of indices array that controls which items to load
8084
8085 1. load the where array
8086 2. count *sliced_ni* indices from where array that are less than ni0
8087 3. change this array shape to fit *sliced_ni* items with data_resize:true
8088 4. seekg read the items selected by the where array
8089
8090 **/
8091
8092
8093 inline void NP::load_data_where( std::ifstream* fp, const char* spec )
8094 {
8095     char* path = nullptr ;
8096     char* sli = nullptr ;
8097     bool with_suffix = LooksLikeSliceIndexStringSuffix(spec, &path, &sli );  // ends with eg "[0:5]"
8098
8099     NP* w = LoadSlice_(path, sli );
8100
8101     if(VERBOSE)
8102     std::cout
8103        << "NP::load_data_where\n"
8104        << " spec {" << ( spec ? spec : "-" ) << "}\n"
8105        << " with_suffix " << ( with_suffix ? "YES" : "NO " ) << "\n"
8106        << " path {" << ( path ? path : "-" ) << "}\n"
8107        << " sli {" << ( sli ? sli : "-" ) << "}\n"
8108        << " w " << ( w ? w->sstr() : "-" ) << "\n"
8109        ;
8110
8111
8112     assert( w );
8113     assert( w->uifc == 'i' );
8114     assert( w->ebyte == 4 || w->ebyte == 8 );
8115     assert( w->shape.size() == 1 );
8116
8117     const int* ww4 = w->cvalues<int>();
8118     const INT* ww8 = w->cvalues<INT>();
8119
8120     INT wni = w->num_items() ;
8121     INT ni0 = shape[0] ;
8122
8123     // count valid indices
8124     INT sliced_ni = 0 ;
8125     for(INT i = 0 ; i < wni ; i++ )
8126     {
8127         INT idx = w->ebyte == 4 ? ww4[i] : ww8[i] ;
8128         bool valid_idx =  idx >= 0 && idx < ni0 ;
8129         if(valid_idx) sliced_ni += 1 ;
8130     }
8131
8132
8133     std::string sstr_0 = sstr();
8134     bool data_resize = true ;
8135     _change_shape_ni(sliced_ni, data_resize);
8136     std::string sstr_1 = sstr();
8137
8138     // read only the slice specified items
8139
8140     INT hdrsize = hdr_bytes() ;  // NB not same as  strlen(_hdr.c_str())
8141     INT itemsize = item_bytes();
8142
8143     if(VERBOSE)
8144     std::cout
8145         << "NP::load_data_where"
8146         << " wni " << wni
8147         << " ni0 " << ni0
8148         << " hdrsize " << hdrsize
8149         << " strlen(_hdr.c_str() " << strlen(_hdr.c_str())
8150         << " itemsize " << itemsize
8151         << "\n"
8152         ;
8153
8154
8155     INT count = 0 ;
8156     for(INT i = 0 ; i < wni ; i++ )
8157     {
8158         INT idx = w->ebyte == 4 ? ww4[i] : ww8[i] ;
8159         bool valid_idx =  idx >= 0 && idx < ni0 ;
8160         if(!valid_idx) continue ;
8161         fp->seekg( hdrsize + idx*itemsize );  // move file pointer to *idx* item
8162         fp->read( bytes() + count*itemsize, itemsize );
8163         count += 1 ;
8164     }
8165     assert( count == sliced_ni );
8166
8167     if(VERBOSE)
8168     std::cout
8169         << "NP::load_data_where\n"
8170         << " spec " << spec << "\n"
8171         << " sstr_0 " << sstr_0 << "\n"
8172         << " sstr_1 " << sstr_1 << "\n"
8173         << " sliced_ni  " << sliced_ni << "\n"
8174         << "\n"
8175         ;
8176
8177 }
8178
8179
8180
8181
8182
8183
8184 inline int NP::load_string_( const char* path, const char* ext, std::string& str )
8185 {
8186     std::string str_path = U::ChangeExt(path, ".npy", ext );
8187     std::ifstream fp(str_path.c_str(), std::ios::in);
8188     if(fp.fail()) return 1 ;
8189
8190     std::stringstream ss ;
8191     std::string line ;
8192     while (std::getline(fp, line))
8193     {
8194         ss << line << std::endl ;   // getline swallows new lines
8195     }
8196     str = ss.str();
8197     return 0 ;
8198 }
8199
8200 inline int NP::load_strings_( const char* path, const char* ext, std::vector<std::string>* vstr )
8201 {
8202     if(vstr == nullptr) return 1 ;
8203     std::string vstr_path = U::ChangeExt(path, ".npy", ext );
8204     std::ifstream fp(vstr_path.c_str(), std::ios::in);
8205     int rc = fp.fail() ? 1 : 0 ;
8206
8207     if(false) std::cout
8208         << "NP::load_strings_" << std::endl
8209         << " path " << ( path ? path : "-" ) << std::endl
8210         << " vstr_path " << vstr_path << std::endl
8211         << " rc " << rc << std::endl
8212         ;
8213
8214     std::string line ;
8215     while (std::getline(fp, line)) vstr->push_back(line);  // getline swallows new lines
8216     return 0 ;
8217 }
8218
8219
8220 inline int NP::load_meta(  const char* path ){  return load_string_( path, "_meta.txt",  meta  ) ; }
8221 inline int NP::load_names( const char* path ){  return load_strings_( path, "_names.txt", &names ) ; }
8222 inline int NP::load_labels( const char* path )
8223 {
8224     labels = ExistsSidecar(path, "_labels.txt") ? new std::vector<std::string> : nullptr ;
8225     return load_strings_( path, "_labels.txt", labels ) ;
8226 }
8227
8228
8229 inline void NP::save_string_(const char* path, const char* ext, const std::string& str ) const
8230 {
8231     if(str.empty()) return ;
8232     std::string str_path = U::ChangeExt(path, ".npy", ext );
8233     if(VERBOSE) std::cout << "NP::save_string_ str_path [" << str_path  << "]" << std::endl ;
8234     std::ofstream fps(str_path.c_str(), std::ios::out);
8235     fps << str ;
8236 }
8237
8238 inline void NP::save_strings_(const char* path, const char* ext, const std::vector<std::string>& vstr ) const
8239 {
8240     if(vstr.size() == 0) return ;
8241     std::string vstr_path = U::ChangeExt(path, ".npy", ext );
8242     if(VERBOSE) std::cout << "NP::save_strings_ vstr_path [" << vstr_path  << "]" << std::endl ;
8243
8244     char delim = '\n' ;
8245     std::ofstream fps(vstr_path.c_str(), std::ios::out);
8246     for(unsigned i=0 ; i < vstr.size() ; i++)
8247     {
8248         const std::string& str = vstr[i] ;
8249         fps << str << delim ;
8250     }
8251 }
8252
8253
8254 inline void NP::save_meta(  const char* path) const { save_string_(path, "_meta.txt",  meta  );  }
8255 inline void NP::save_names( const char* path) const { save_strings_(path, "_names.txt", names );  }
8256 inline void NP::save_labels(const char* path) const { if(labels) save_strings_(path, "_labels.txt", *labels );  }
8257
8258
8259 inline void NP::save_header(const char* path)
8260 {
8261     update_headers();
8262     std::ofstream stream(path, std::ios::out|std::ios::binary);
8263     stream << _hdr ;
8264 }
8265
8266 inline void NP::old_save(const char* path)  // non-const due to update_headers
8267 {
8268     std::cout << "NP::save path [" << path  << "]" << std::endl ;
8269     update_headers();
8270     std::ofstream stream(path, std::ios::out|std::ios::binary);
8271     stream << _hdr ;
8272     stream.write( bytes(), arr_bytes() );
8273 }
8274
8275 inline void NP::save(const char* path_) const
8276 {
8277     const char* path = U::Resolve(path_);  // path is nullptr with unexpanded envvar token
8278     if(path == nullptr) std::cerr << "NP::save failed to U::Resolve path_ " << ( path_ ? path_ : "-" ) << std::endl ;
8279     if(path == nullptr) return ;
8280
8281     int rc = U::MakeDirsForFile(path);
8282     const char* _save_VERBOSE = "NP__save_VERBOSE" ;
8283     bool save_VERBOSE = getenv(_save_VERBOSE) != nullptr ;
8284
8285     if(VERBOSE||save_VERBOSE) std::cout
8286           << "NP::save"
8287           << " " << _save_VERBOSE << ":" << ( save_VERBOSE ? "YES" : "NO " )
8288           << " path [" << ( path ? path : "-" ) << "]"
8289           << " rc:" << rc
8290           << "\n"
8291           ;
8292
8293     assert( rc == 0 );
8294
8295     std::string hdr = make_header();
8296     std::ofstream fpa(path, std::ios::out|std::ios::binary);
8297     fpa << hdr ;
8298     fpa.write( bytes(), arr_bytes() );
8299
8300     save_meta( path);
8301     save_names(path);
8302     save_labels(path);
8303 }
8304
8305 inline void NP::save(const char* dir, const char* reldir, const char* name) const
8306 {
8307     if(VERBOSE) std::cout << "NP::save dir [" << ( dir ? dir : "-" )  << "] reldir [" << ( reldir ? reldir : "-" )  << "] name [" << name << "]" << std::endl ;
8308     std::string path = U::form_path(dir, reldir, name);
8309     save(path.c_str());
8310 }
8311
8312 inline void NP::save(const char* dir, const char* name) const
8313 {
8314     if(dir == nullptr || name == nullptr) std::cerr << "NP::save FAIL dir OR name arg is null " << std::endl ;
8315     if(dir == nullptr || name == nullptr) return ;
8316
8317     std::string path = U::form_path(dir, name);
8318     save(path.c_str());
8319 }
8320
8321 inline void NP::save_jsonhdr(const char* path) const
8322 {
8323     std::string json = make_jsonhdr();
8324     std::ofstream stream(path, std::ios::out|std::ios::binary);
8325     stream << json ;
8326 }
8327
8328 inline void NP::save_jsonhdr(const char* dir, const char* name) const
8329 {
8330     std::string path = U::form_path(dir, name);
8331     save_jsonhdr(path.c_str());
8332 }
8333
8334 inline std::string NP::get_jsonhdr_path() const
8335 {
8336     assert( lpath.empty() == false );
8337     assert( U::EndsWith(lpath.c_str(), ".npy" ) );
8338     std::string path = U::ChangeExt(lpath.c_str(), ".npy", ".npj");
8339     return path ;
8340 }
8341
8342 inline void NP::save_jsonhdr() const
8343 {
8344     std::string path = get_jsonhdr_path() ;
8345     std::cout << "NP::save_jsonhdr to " << path << std::endl  ;
8346     save_jsonhdr(path.c_str());
8347 }
8348
8349
8350 template <typename T> inline std::string NP::_present(T v) const
8351 {
8352     std::stringstream ss ;
8353     ss << " " << std::fixed << std::setw(8) << v  ;
8354     return ss.str();
8355 }
8356
8357 // needs specialization to _present char as an int rather than a character
8358 template<>  inline std::string NP::_present(char v) const
8359 {
8360     std::stringstream ss ;
8361     ss << " " << std::fixed << std::setw(8) << int(v)  ;
8362     return ss.str();
8363 }
8364 template<>  inline std::string NP::_present(unsigned char v) const
8365 {
8366     std::stringstream ss ;
8367     ss << " " << std::fixed << std::setw(8) << unsigned(v)  ;
8368     return ss.str();
8369 }
8370 template<>  inline std::string NP::_present(float v) const
8371 {
8372     std::stringstream ss ;
8373     ss << " " << std::setw(10) << std::fixed << std::setprecision(3) << v ;
8374     return ss.str();
8375 }
8376 template<>  inline std::string NP::_present(double v) const
8377 {
8378     std::stringstream ss ;
8379     ss << " " << std::setw(10) << std::fixed << std::setprecision(3) << v ;
8380     return ss.str();
8381 }
8382
8383 /**
8384 NP::_dump
8385 -----------
8386
8387 **/
8388 template <typename T> inline void NP::_dump(INT i0_, INT i1_, INT j0_, INT j1_ ) const
8389 {
8390     INT ni = NPS::ni_(shape) ;  // ni_ nj_ nk_ returns shape dimension size or 1 if no such dimension
8391     INT nj = NPS::nj_(shape) ;
8392     INT nk = NPS::nk_(shape) ;
8393
8394     INT i0 = i0_ == -1 ? 0                : i0_ ;
8395     INT i1 = i1_ == -1 ? std::min(ni, TEN) : i1_ ;
8396
8397     INT j0 = j0_ == -1 ? 0                : j0_ ;
8398     INT j1 = j1_ == -1 ? std::min(nj, TEN) : j1_ ;
8399
8400
8401     std::cout
8402        << desc()
8403        << std::endl
8404        << " array dimensions "
8405        << " ni " << ni
8406        << " nj " << nj
8407        << " nk " << nk
8408        << " item range  "
8409        << " i0 " << i0
8410        << " i1 " << i1
8411        << " j0 " << j0
8412        << " j1 " << j1
8413        << std::endl
8414        ;
8415
8416     const T* vv = cvalues<T>();
8417
8418     for(INT i=i0 ; i < i1 ; i++){
8419         std::cout << "[" << std::setw(4) << i  << "] " ;
8420         for(INT j=j0 ; j < j1 ; j++){
8421             for(INT k=0 ; k < nk ; k++)
8422             {
8423                 INT index = i*nj*nk + j*nk + k ;
8424                 T v = *(vv + index) ;
8425                 if(k%4 == 0 ) std::cout << " : " ;
8426                 std::cout << _present<T>(v)  ;
8427
8428             }
8429             //std::cout << std::endl ;
8430         }
8431         std::cout << std::endl ;
8432     }
8433
8434
8435     std::cout
8436         << "meta:[" << meta << "]"
8437         << std::endl
8438         ;
8439 }
8440
8441
8442 template <typename T> inline void NP::read(const T* src)
8443 {
8444     T* v = values<T>();
8445
8446     NPS sh(shape);
8447     for(INT i=0 ; i < sh.ni_() ; i++ )
8448     for(INT j=0 ; j < sh.nj_() ; j++ )
8449     for(INT k=0 ; k < sh.nk_() ; k++ )
8450     for(INT l=0 ; l < sh.nl_() ; l++ )
8451     for(INT m=0 ; m < sh.nm_() ; m++ )
8452     for(INT o=0 ; o < sh.no_() ; o++ )
8453     {
8454         INT index = sh.idx(i,j,k,l,m,o);
8455         *(v + index) = *(src + index ) ;
8456     }
8457 }
8458
8459 template <typename T> inline void NP::read2(const T* src)
8460 {
8461     bool consistent = sizeof(T) == ebyte ;
8462     if(!consistent) std::cout << "NP::read2 FAIL not consistent sizeof(T): " << sizeof(T) << " and ebyte: " << ebyte << std::endl ;
8463     assert( consistent );
8464     memcpy( bytes(), src, arr_bytes() );
8465 }
8466
8467 inline void NP::read_bytes(char* src)
8468 {
8469     memcpy( bytes(), src, arr_bytes() );
8470 }
8471
8472
8473
8474
8475
8476 template <typename T>
8477 inline void NP::write(T* dst) const
8478 {
8479     assert( sizeof(T) == ebyte );
8480     memcpy( dst, bytes(), arr_bytes() );
8481 }
8482
8483
8484
8485
8486 template <typename T> inline void NP::Write(const char* dir, const char* reldir, const char* name, const T* data, INT ni_, INT nj_, INT nk_, INT nl_, INT nm_, INT no_ ) // static
8487 {
8488     std::string path = U::form_path(dir, reldir, name);
8489     Write( path.c_str(), data, ni_, nj_, nk_, nl_, nm_, no_ );
8490 }
8491
8492 template <typename T> inline void NP::Write(const char* dir, const char* name, const T* data, INT ni_, INT nj_, INT nk_, INT nl_, INT nm_, INT no_ ) // static
8493 {
8494     std::string path = U::form_path(dir, name);
8495     Write( path.c_str(), data, ni_, nj_, nk_, nl_, nm_, no_ );
8496 }
8497
8498 template <typename T> inline void NP::Write(const char* path, const T* data, INT ni_, INT nj_, INT nk_, INT nl_, INT nm_, INT no_ ) // static
8499 {
8500     std::string dtype = descr_<T>::dtype() ;
8501     if(VERBOSE) std::cout
8502         << "NP::Write"
8503         << " dtype " << dtype
8504         << " ni  " << std::setw(7) << ni_
8505         << " nj  " << nj_
8506         << " nk  " << nk_
8507         << " nl  " << nl_
8508         << " nm  " << nm_
8509         << " no  " << no_
8510         << " path " << path
8511         << std::endl
8512         ;
8513
8514     if(ni_ == 0) return ;
8515     NP a(dtype.c_str(), ni_,nj_,nk_,nl_,nm_,no_) ;
8516     a.read(data);
8517     a.save(path);
8518 }
8519
8520
8521
8522
8523 template void NP::Write<float>(   const char*, const char*, const float*,        INT, INT, INT, INT, INT, INT );
8524 template void NP::Write<double>(  const char*, const char*, const double*,       INT, INT, INT, INT, INT, INT );
8525 template void NP::Write<int>(     const char*, const char*, const int*,          INT, INT, INT, INT, INT, INT );
8526 template void NP::Write<unsigned>(const char*, const char*, const unsigned*,     INT, INT, INT, INT, INT, INT );
8527
8528
8529 template<typename T> void NP::Write(const char* dir, const char* name, const std::vector<T>& values )
8530 {
8531     if(values.size() > 0) NP::Write(dir, name, values.data(), values.size() );
8532 }
8533
8534 template void NP::Write<float>(   const char*, const char*, const std::vector<float>& );
8535 template void NP::Write<double>(  const char*, const char*, const std::vector<double>&  );
8536 template void NP::Write<int>(     const char*, const char*, const std::vector<int>& );
8537 template void NP::Write<unsigned>(const char*, const char*, const std::vector<unsigned>& );
8538
8539
8540
8541 inline void NP::WriteNames(
8542     const char* dir,
8543     const char* name,
8544     const std::vector<std::string>& names,
8545     unsigned num_names_,
8546     bool append )
8547 {
8548     std::string _path = U::form_path(dir, name);
8549     const char* path = _path.c_str();
8550     WriteNames(path, names, num_names_, append  );
8551 }
8552
8553
8554 inline void NP::WriteNames(
8555     const char* dir,
8556     const char* reldir,
8557     const char* name,
8558     const std::vector<std::string>& names,
8559     unsigned num_names_,
8560     bool append )
8561 {
8562     std::string _path = U::form_path(dir, reldir, name);
8563     const char* path = _path.c_str();
8564     WriteNames(path, names, num_names_, append );
8565 }
8566
8567 /**
8568 NP::WriteNames
8569 ----------------
8570
8571 https://stackoverflow.com/questions/12929378/what-is-the-difference-between-iosapp-and-iosate
8572
8573 app : 'append'
8574     all output will be added (appended) to the end of the file.
8575     In other words you cannot write anywhere else in the file but at the end.
8576
8577 ate : 'at end'
8578     sets the stream position at the end of the file when you open it,
8579     but you are free to move it around (seek) and write wherever it pleases you.
8580
8581 num_names_
8582     when different from default of zero this restricts
8583     the number of names written (HMM: Why? What needs that?)
8584
8585
8586 **/
8587
8588 inline void NP::WriteNames(
8589     const char* path,
8590     const std::vector<std::string>& names,
8591     unsigned num_names_,
8592     bool append )
8593 {
8594     // if(names.size() == 0) return ;   DONT EARLY EXIT AS MORE REASONABLE TO TRUNCATE THE FILE WHEN THERE ARE NO NAMES
8595     int rc = U::MakeDirsForFile(path);
8596     if( rc != 0 ) std::cerr << "NP::WriteNames ERR creating dirs " << std::endl ;
8597     assert( rc == 0 );
8598
8599     unsigned names_size = names.size() ;
8600     unsigned num_names = num_names_ == 0 ? names_size : num_names_ ;
8601     assert( num_names <= names_size );
8602
8603     std::ios_base::openmode mode = std::ios::out|std::ios::binary ;
8604     if(append) mode |= std::ios::app ;
8605
8606     std::ofstream stream(path, mode );
8607     for( unsigned i=0 ; i < num_names ; i++) stream << names[i] << std::endl ;
8608     stream.close();
8609 }
8610
8611
8612
8613 inline void NP::WriteNames_Simple(
8614     const char* dir,
8615     const char* name,
8616     const std::vector<std::string>& names )
8617 {
8618     std::string _path = U::form_path(dir, name);
8619     const char* path = _path.c_str();
8620
8621     WriteNames_Simple(path, names );
8622
8623 }
8624
8625 inline void NP::WriteNames_Simple(
8626     const char* path,
8627     const std::vector<std::string>& names )
8628 {
8629     int rc = U::MakeDirsForFile(path);
8630     if( rc != 0 ) std::cerr << "NP::WriteNames_Simple ERR creating dirs " << std::endl ;
8631     assert( rc == 0 );
8632
8633     INT num_names = names.size();
8634     std::ios_base::openmode mode = std::ios::out|std::ios::binary ;
8635     std::ofstream fp(path, mode );
8636     for( INT i=0 ; i < num_names ; i++) fp << names[i] << std::endl ;
8637     fp.close();
8638 }
8639
8640
8641
8642
8643 inline void NP::WriteString(const char* dir, const char* name_, const char* ext, const std::string& str, bool append ) // static
8644 {
8645     std::string name = U::form_name( name_, ext );
8646     std::string path_ = U::form_path(dir, name.c_str() );
8647     const char* path = path_.c_str();
8648     const char* xpath = U::Resolve(path);
8649
8650     if(VERBOSE) std::cout
8651        << "NP::WriteString"
8652        << " path " << ( path ? path : "-" )
8653        << " xpath " << ( xpath ? xpath : "-" )
8654        << " str.size " << str.size()
8655        << std::endl
8656        ;
8657
8658     std::ios_base::openmode mode = std::ios::out|std::ios::binary ;
8659     if(append) mode |= std::ios::app ;
8660     std::ofstream stream(xpath, mode );
8661     stream << str << std::endl ;
8662     stream.close();
8663 }
8664
8665
8666 inline void NP::ReadNames(const char* dir, const char* name, std::vector<std::string>& names )
8667 {
8668     std::stringstream ss ;
8669     ss << dir << "/" << name ;
8670     std::string path = ss.str() ;
8671     ReadNames(path.c_str(), names);
8672 }
8673 inline void NP::ReadNames(const char* path, std::vector<std::string>& names )
8674 {
8675     std::ifstream ifs(path);
8676     std::string line;
8677     while(std::getline(ifs, line)) names.push_back(line) ;
8678
8679 }
8680
8681 template<typename T>
8682 inline std::string NP::DescKV(const std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extras)
8683 {
8684     std::stringstream ss ;
8685     assert( keys.size() == vals.size() );
8686     if(extras) assert( extras->size() == keys.size() );
8687     for(unsigned i=0 ; i < keys.size() ; i++)
8688     {
8689          ss
8690             << std::setw(20) << keys[i]
8691             << " : "
8692             << std::scientific << std::setw(10) << std::setprecision(5) << vals[i]
8693             << " : "
8694             << ( extras ? (*extras)[i] : "" )
8695             << std::endl
8696             ;
8697     }
8698     std::string s = ss.str();
8699     return s ;
8700 }
8701
8702
8703 template<typename T>
8704 inline void NP::ReadKV(const char* dir, const char* name, std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extras )
8705 {
8706     std::stringstream ss ;
8707     ss << dir << "/" << name ;
8708     std::string path = ss.str() ;
8709     ReadKV(path.c_str(), keys, vals, extras);
8710 }
8711
8712 template<typename T>
8713 inline void NP::ReadKV(const char* path, std::vector<std::string>& keys, std::vector<T>& vals, std::vector<std::string>* extras )
8714 {
8715     std::ifstream ifs(path);
8716     std::string line;
8717     while(std::getline(ifs, line))
8718     {
8719         std::string key ;
8720         T val ;
8721         std::string extra ;
8722
8723         std::istringstream iss(line);
8724         iss >> key >> val >> extra ;
8725
8726         if(VERBOSE) std::cout
8727             << "NP::ReadKV"
8728             << " key[" <<  key << "]"
8729             << " val[" <<  val << "]"
8730             << " extra[" <<  extra << "]"
8731             << std::endl ;
8732
8733         keys.push_back(key);
8734         vals.push_back(val);
8735         if(extras) extras->push_back(extra);
8736     }
8737 }
8738
8739 template<typename T>
8740 inline T NP::ReadKV_Value(const char* dir, const char* name, const char* key )
8741 {
8742     std::stringstream ss ;
8743     ss << dir << "/" << name ;
8744     std::string path = ss.str() ;
8745     return NP::ReadKV_Value<T>(path.c_str(), key );
8746 }
8747
8748 template<typename T>
8749 inline T NP::ReadKV_Value(const char* spec_or_path, const char* key)
8750 {
8751     const char* path = Resolve(spec_or_path);
8752
8753     std::vector<std::string> keys ;
8754     std::vector<T> vals ;
8755     std::vector<std::string> extras ;
8756
8757     ReadKV<T>(path, keys, vals, &extras );
8758
8759     std::vector<std::string>::iterator it = std::find(keys.begin(), keys.end(), key ) ;
8760
8761     if(it == keys.end())
8762     {
8763         std::cout
8764             << "NP::ReadKV_Value"
8765             << " FATAL "
8766             << " failed to find key " << key
8767             << std::endl
8768             ;
8769         std::cout << NP::DescKV<T>(keys, vals, &extras ) << std::endl ;
8770         assert(0);
8771     }
8772
8773     unsigned idx = std::distance( keys.begin(), it );
8774     return vals[idx] ;
8775 }
8776
8777
8778
8779 template <typename T>
8780 inline NP* NP::LoadFromTxtFile(const char* base, const char* relp )  // static
8781 {
8782     std::string path = U::form_path(base, relp);
8783     NP* a = LoadFromTxtFile<T>( path.c_str());
8784     a->lpath = path ;
8785     return a ;
8786 }
8787
8788
8789 /**
8790 NP::LoadFromTxtFile
8791 ----------------------
8792
8793 1. resolves spec_or_path into path
8794 2. reads txt from the file into str
8795 3. creates array with NP::LoadFromString
8796
8797 **/
8798
8799 template <typename T>
8800 inline NP* NP::LoadFromTxtFile(const char* spec_or_path )  // static
8801 {
8802     const char* path = Resolve(spec_or_path ) ;
8803
8804
8805
8806     if(!Exists(path))
8807     {
8808         std::cerr
8809             << "NP::ArrayFromTxtFile"
8810             << " FATAL path does not EXIST "
8811             << " spec_or_path [" << spec_or_path << "]"
8812             << " path [" << path << "]"
8813             << std::endl
8814             ;
8815         assert(0);
8816     }
8817
8818     const char* str = U::ReadString2(path);
8819     NP* a = LoadFromString<T>(str, path);
8820     a->lpath = path ;
8821     return a ;
8822 }
8823
8824
8825
8826
8827
8828 /**
8829 NP::FindUnit
8830 --------------
8831
8832 Each unit string is looked for within the line,
8833 the last in the units list that matches is returned.
8834
8835 **/
8836
8837 inline char* NP::FindUnit(const char* line, const std::vector<std::string>& units  ) // static
8838 {
8839     char* upos = nullptr ;
8840     for(unsigned i=0 ; i < units.size() ; i++)
8841     {
8842         const char* u = units[i].c_str();
8843         upos = (char*)strstr(line, u) ;
8844     }
8845     return upos ;
8846 }
8847
8848 inline void NP::Split(std::vector<std::string>& elems, const char* str, char delim)
8849 {
8850     std::stringstream uss(str) ;
8851     std::string elem ;
8852     while(std::getline(uss,elem,delim)) elems.push_back(elem) ;
8853 }
8854 inline void NP::GetUnits(std::vector<std::string>& units ) // static
8855 {
8856     Split(units, UNITS, ' ');
8857 }
8858 inline bool NP::IsListed(const std::vector<std::string>& ls, const char* str) // static
8859 {
8860     return std::find(ls.begin(), ls.end(), str ) != ls.end() ;
8861 }
8862 inline std::string NP::StringConcat(const std::vector<std::string>& ls, char delim ) // static
8863 {
8864     unsigned num = ls.size() ;
8865     std::stringstream ss ;
8866     for(unsigned i=0 ; i < num ; i++ )
8867     {
8868         ss << ls[i] ;
8869         if( i < num - 1) ss << delim ;
8870     }
8871     std::string cls = ss.str() ;
8872     return cls ;
8873 }
8874
8875
8876
8877 template <typename T>
8878 inline NP* NP::ZEROProp(T dscale)  // static
8879 {
8880     NP* a = NP::LoadFromString<T>(R"(
8881     1.55     *eV    0.0
8882     15.5     *eV    0.0
8883 )" );
8884
8885    a->pscale(dscale, 0);
8886    return a ;
8887 }
8888
8889
8890 /**
8891 NP::LoadFromString
8892 ----------------------
8893
8894 String format example::
8895
8896    ScintillationYield   9846/MeV
8897    BirksConstant1  12.05e-3*g/cm2/MeV
8898
8899 Each line is cleaned to correct the poor file format
8900 regaining whitespace between fields:
8901
8902 1. '/' prior to recognized unit strings are changed to ' '
8903 2. all '*' are changed to ' '
8904
8905 After cleanup, the number of fields on each line must be consistent
8906 for all lines of the string. Also the number of fields that
8907 can be converted to type T must be consistent for all lines.
8908
8909 So for the above example an array of shape (2,1) would be created
8910 with a names vector containing the non-unit strings, which
8911 allowed named access to values with NP::get_named_value
8912
8913 Another example, the below input txt with type "float" or "double"::
8914
8915     1.55     *eV    2.72832
8916     2.69531  *eV    2.7101
8917     2.7552   *eV    2.5918
8918     3.17908  *eV    1.9797
8919     15.5     *eV    1.9797
8920
8921 would yield an array of shape (5,2) with metadata key "unit" of "eV"
8922
8923 **/
8924
8925
8926 template <typename T>
8927 inline NP* NP::LoadFromString(const char* str, const char* path)  // static
8928 {
8929     // path is optional for debug messages
8930     //std::cout << "NP::LoadFromString " << ( path ? path : "-" ) << std::endl ;
8931
8932     std::vector<std::string> recognized_units ;
8933     GetUnits(recognized_units);
8934
8935     unsigned UNSET = ~0u ;
8936     unsigned num_field = UNSET ;
8937     unsigned num_column = UNSET ;
8938
8939     std::vector<std::string> units ;
8940     std::vector<std::string> other ;
8941     std::vector<T> value ;
8942
8943     std::string line ;
8944     std::stringstream fss(str) ;
8945     while(std::getline(fss, line))
8946     {
8947         char* l = (char*)line.c_str() ;
8948
8949         if(strlen(l) == 0) continue ;
8950         if(strlen(l) > 0 && l[0] == '#') continue ;
8951
8952         // if a unit string is found which is preceeded by '/' remove that
8953         // to regain whitespace between fields
8954         //
8955         char* upos = FindUnit(l, recognized_units) ;
8956         if(upos && (upos - l) > 0)
8957         {
8958             if(*(upos-1) == '/') *(upos-1) = ' ' ;
8959         }
8960
8961         ReplaceCharInsitu( l, '*', ' ', false );
8962
8963
8964         std::vector<std::string> fields ;
8965         std::string field ;
8966         std::istringstream iss(line);
8967         while( iss >> field )
8968         {
8969             const char* f = field.c_str();
8970             if(IsListed(recognized_units, f))
8971             {
8972                 if(!IsListed(units, f)) units.push_back(f);
8973             }
8974             else
8975             {
8976                 fields.push_back(field) ;
8977             }
8978         }
8979
8980         if(fields.size() == 0) continue ;
8981
8982         if( num_field == UNSET )
8983         {
8984             num_field = fields.size() ;
8985         }
8986         else if( fields.size() != num_field )
8987         {
8988             std::cerr
8989                 << "NP::LoadFromString"
8990                 << " WARNING : INCONSISTENT NUMBER OF FIELDS " << std::endl
8991                 << " [" << line << "]" << std::endl
8992                 << " fields.size : " << fields.size()
8993                 << " num_field : " << num_field
8994                 << " path " << ( path ? path : "-" )
8995                 << std::endl
8996                 ;
8997             assert(0);
8998         }
8999         assert( num_field != UNSET );
9000
9001         //std::cout << "[" << line << "] num_field " << num_field << std::endl;
9002
9003         unsigned line_column = 0u ;
9004         for(unsigned i=0 ; i < num_field ; i++)
9005         {
9006             const char* fstr = fields[i].c_str();
9007             if(U::ConvertsTo<T>(fstr))
9008             {
9009                 value.push_back(U::To<T>(fstr)) ;
9010                 line_column += 1 ;
9011             }
9012             else
9013             {
9014                 if(!IsListed(other, fstr)) other.push_back(fstr);
9015             }
9016         }
9017
9018         if( num_column == UNSET )
9019         {
9020             num_column = line_column ;
9021         }
9022         else if( line_column != num_column )
9023         {
9024             std::cerr
9025                 << "NP::LoadFromString"
9026                 << " FATAL : INCONSISTENT NUMBER OF VALUES " << std::endl
9027                 << " [" << line << "]" << std::endl
9028                 << " fields.size : " << fields.size()
9029                 << " num_field : " << num_field
9030                 << " num_column : " << num_column
9031                 << " line_column : " << line_column
9032                 << " path " << ( path ? path : "-" )
9033                 << std::endl
9034                 ;
9035             assert(0);
9036         }
9037     }
9038
9039     unsigned num_value = value.size() ;
9040     assert( num_value % num_column == 0 );
9041
9042     unsigned num_row = num_value/num_column ;
9043     assert( num_row*num_column == num_value );
9044
9045     NP* a = NP::Make<T>( num_row, num_column );
9046     a->read2( value.data() );
9047
9048
9049     if(units.size() > 0)
9050     {
9051         //for(unsigned i=0 ; i < units.size() ; i++ ) std::cout << "units[" << units[i] << "]" << std::endl  ;
9052         std::string u_units = StringConcat(units, ' ');
9053         a->set_meta<std::string>("units", u_units );
9054     }
9055
9056     if(other.size() > 0)
9057     {
9058         //for(unsigned i=0 ; i < other.size() ; i++ ) std::cout << "other[" << other[i] << "]" << std::endl  ;
9059         std::string u_other = StringConcat(other, ' ');
9060         a->set_meta<std::string>("other", u_other );
9061
9062         if( num_column == 1 && other.size() == num_row ) a->set_names(other) ;
9063     }
9064     return a ;
9065 }
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076 inline unsigned NP::CountChar(const char* str, char q )
9077 {
9078     unsigned count = 0 ;
9079     char* c = (char*)str ;
9080     while(*c)
9081     {
9082         if(*c == q) count += 1 ;
9083         c++ ;
9084     }
9085     return count ;
9086 }
9087
9088 inline void NP::ReplaceCharInsitu(char* str, char q, char n, bool first )
9089 {
9090     unsigned count = 0 ;
9091     char* c = str ;
9092     while(*c)
9093     {
9094         if(*c == q)
9095         {
9096            if((first && count == 0) || first == false ) *c = n ;
9097            count += 1 ;
9098         }
9099         c++ ;
9100     }
9101 }
9102 inline const char* NP::ReplaceChar(const char* str, char q, char n, bool first  )
9103 {
9104     char* s = strdup(str);
9105     ReplaceCharInsitu(s, q, n, first );
9106     return s ;
9107 }
9108
9109 inline const char* NP::Resolve( const char* spec)  // TODO: rename or eliminate this as same as U::Resolve
9110 {
9111     return CountChar(spec, '.') > 1 ? ResolveProp(spec) : spec ;
9112 }
9113
9114 inline const char* NP::ResolveProp(const char* spec)
9115 {
9116     assert(CountChar(spec, '.') > 1);
9117
9118     char* s = strdup(spec) ;
9119     while(*s && *s == ' ') s++ ;  // skip any leading whitespace
9120     char* c = s ;
9121     while(*c)    // terminate when hit end of spec or trailing whitespace
9122     {
9123         if(*c == '.') *c = '/' ;
9124         c++ ;
9125         if(*c == ' ') *c = '\0' ;  // terminate at first trailing space
9126     }
9127     const char* base = getenv("NP_PROP_BASE") ;
9128     std::stringstream ss ;
9129     ss << ( base ? base : "/tmp" ) << "/" << s  ;
9130
9131     std::string path = ss.str();
9132     return strdup(path.c_str()) ;
9133 }
9134
9135
9136 /**
9137 operator<< NP : NOT a member function
9138 ---------------------------------------
9139
9140 Write array into output stream
9141
9142 **/
9143
9144 inline std::ostream& operator<<(std::ostream &os,  const NP& a)
9145 {
9146     os << a.make_prefix() ;
9147     os << a.make_header() ;
9148     os.write(a.bytes(), a.arr_bytes());
9149     os << a.meta ;
9150     return os ;
9151 }
9152
9153 /**
9154 operator>> NP : NOT a member function
9155 ---------------------------------------
9156
9157 Direct input stream into NP array
9158
9159 **/
9160
9161 inline std::istream& operator>>(std::istream& is, NP& a)
9162 {
9163     is.read( (char*)a._prefix.data(), net_hdr::LENGTH ) ;
9164
9165     unsigned hdr_bytes_nh = a.prefix_size(0);
9166     unsigned arr_bytes_nh = a.prefix_size(1);
9167     unsigned meta_bytes_nh = a.prefix_size(2);
9168
9169     if(NP::VERBOSE) std::cout
9170         << " hdr_bytes_nh " << hdr_bytes_nh
9171         << " arr_bytes_nh " << arr_bytes_nh
9172         << " meta_bytes_nh " << meta_bytes_nh
9173         << std::endl
9174         ;
9175
9176     std::getline( is, a._hdr );
9177     a._hdr += '\n' ;     // getline consumes newline ending header but does not return it
9178     assert( hdr_bytes_nh == a._hdr.length() );
9179
9180     bool data_resize = true ;
9181     a.decode_header(data_resize);
9182
9183     assert( a.arr_bytes() == arr_bytes_nh );
9184     is.read(a.bytes(), a.arr_bytes() );
9185
9186     a.meta.resize(meta_bytes_nh);
9187     is.read( (char*)a.meta.data(), meta_bytes_nh );
9188
9189     //is.setstate(std::ios::failbit);
9190     return is;
9191 }
9192
9193 #endif