Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:29:41

0001 //Copyright (C) 2011  Carl Rogers
0002 //Released under MIT License
0003 //license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
0004 
0005 #ifndef LIBCNPY_H_
0006 #define LIBCNPY_H_
0007 
0008 #include<string>
0009 #include<stdexcept>
0010 #include<sstream>
0011 #include<vector>
0012 #include<cstdio>
0013 #include<typeinfo>
0014 #include<iostream>
0015 #include<cassert>
0016 #include<zlib.h>
0017 #include<map>
0018 #include<memory>
0019 #include<stdint.h>
0020 #include<numeric>
0021 
0022 namespace cnpy {
0023 
0024     struct NpyArray {
0025         NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order) :
0026             shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
0027         {
0028             num_vals = 1;
0029             for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
0030             data_holder = std::shared_ptr<std::vector<char>>(
0031                 new std::vector<char>(num_vals * word_size));
0032         }
0033 
0034         NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { }
0035 
0036         template<typename T>
0037         T* data() {
0038             return reinterpret_cast<T*>(&(*data_holder)[0]);
0039         }
0040 
0041         template<typename T>
0042         const T* data() const {
0043             return reinterpret_cast<T*>(&(*data_holder)[0]);
0044         }
0045 
0046         template<typename T>
0047         std::vector<T> as_vec() const {
0048             const T* p = data<T>();
0049             return std::vector<T>(p, p+num_vals);
0050         }
0051 
0052         size_t num_bytes() const {
0053             return data_holder->size();
0054         }
0055 
0056         std::shared_ptr<std::vector<char>> data_holder;
0057         std::vector<size_t> shape;
0058         size_t word_size;
0059         bool fortran_order;
0060         size_t num_vals;
0061     };
0062    
0063     using npz_t = std::map<std::string, NpyArray>; 
0064 
0065     char BigEndianTest();
0066     char map_type(const std::type_info& t);
0067     template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape);
0068     void parse_npy_header(FILE* fp,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
0069     void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
0070     void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
0071     npz_t npz_load(std::string fname);
0072     NpyArray npz_load(std::string fname, std::string varname);
0073     NpyArray npy_load(std::string fname);
0074 
0075     template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
0076         //write in little endian
0077         for(size_t byte = 0; byte < sizeof(T); byte++) {
0078             char val = *((char*)&rhs+byte); 
0079             lhs.push_back(val);
0080         }
0081         return lhs;
0082     }
0083 
0084     template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
0085     template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
0086 
0087 
0088     template<typename T> void npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w") {
0089         FILE* fp = NULL;
0090         std::vector<size_t> true_data_shape; //if appending, the shape of existing + new data
0091 
0092         if(mode == "a") fp = fopen(fname.c_str(),"r+b");
0093 
0094         if(fp) {
0095             //file exists. we need to append to it. read the header, modify the array size
0096             size_t word_size;
0097             bool fortran_order;
0098             parse_npy_header(fp,word_size,true_data_shape,fortran_order);
0099             assert(!fortran_order);
0100 
0101             if(word_size != sizeof(T)) {
0102                 std::cout<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
0103                 assert( word_size == sizeof(T) );
0104             }
0105             if(true_data_shape.size() != shape.size()) {
0106                 std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
0107                 assert(true_data_shape.size() != shape.size());
0108             }
0109 
0110             for(size_t i = 1; i < shape.size(); i++) {
0111                 if(shape[i] != true_data_shape[i]) {
0112                     std::cout<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
0113                     assert(shape[i] == true_data_shape[i]);
0114                 }
0115             }
0116             true_data_shape[0] += shape[0];
0117         }
0118         else {
0119             fp = fopen(fname.c_str(),"wb");
0120             true_data_shape = shape;
0121         }
0122 
0123         std::vector<char> header = create_npy_header<T>(true_data_shape);
0124         size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
0125 
0126         fseek(fp,0,SEEK_SET);
0127         fwrite(&header[0],sizeof(char),header.size(),fp);
0128         fseek(fp,0,SEEK_END);
0129         fwrite(data,sizeof(T),nels,fp);
0130         fclose(fp);
0131     }
0132 
0133     template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape, std::string mode = "w")
0134     {
0135         //first, append a .npy to the fname
0136         fname += ".npy";
0137 
0138         //now, on with the show
0139         FILE* fp = NULL;
0140         uint16_t nrecs = 0;
0141         size_t global_header_offset = 0;
0142         std::vector<char> global_header;
0143 
0144         if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
0145 
0146         if(fp) {
0147             //zip file exists. we need to add a new npy file to it.
0148             //first read the footer. this gives us the offset and size of the global header
0149             //then read and store the global header.
0150             //below, we will write the the new data at the start of the global header then append the global header and footer below it
0151             size_t global_header_size;
0152             parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
0153             fseek(fp,global_header_offset,SEEK_SET);
0154             global_header.resize(global_header_size);
0155             size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
0156             if(res != global_header_size){
0157                 throw std::runtime_error("npz_save: header read error while adding to existing zip");
0158             }
0159             fseek(fp,global_header_offset,SEEK_SET);
0160         }
0161         else {
0162             fp = fopen(zipname.c_str(),"wb");
0163         }
0164 
0165         std::vector<char> npy_header = create_npy_header<T>(shape);
0166 
0167         size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
0168         size_t nbytes = nels*sizeof(T) + npy_header.size();
0169 
0170         //get the CRC of the data to be added
0171         uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
0172         crc = crc32(crc,(uint8_t*)data,nels*sizeof(T));
0173 
0174         //build the local header
0175         std::vector<char> local_header;
0176         local_header += "PK"; //first part of sig
0177         local_header += (uint16_t) 0x0403; //second part of sig
0178         local_header += (uint16_t) 20; //min version to extract
0179         local_header += (uint16_t) 0; //general purpose bit flag
0180         local_header += (uint16_t) 0; //compression method
0181         local_header += (uint16_t) 0; //file last mod time
0182         local_header += (uint16_t) 0;     //file last mod date
0183         local_header += (uint32_t) crc; //crc
0184         local_header += (uint32_t) nbytes; //compressed size
0185         local_header += (uint32_t) nbytes; //uncompressed size
0186         local_header += (uint16_t) fname.size(); //fname length
0187         local_header += (uint16_t) 0; //extra field length
0188         local_header += fname;
0189 
0190         //build global header
0191         global_header += "PK"; //first part of sig
0192         global_header += (uint16_t) 0x0201; //second part of sig
0193         global_header += (uint16_t) 20; //version made by
0194         global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
0195         global_header += (uint16_t) 0; //file comment length
0196         global_header += (uint16_t) 0; //disk number where file starts
0197         global_header += (uint16_t) 0; //internal file attributes
0198         global_header += (uint32_t) 0; //external file attributes
0199         global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
0200         global_header += fname;
0201 
0202         //build footer
0203         std::vector<char> footer;
0204         footer += "PK"; //first part of sig
0205         footer += (uint16_t) 0x0605; //second part of sig
0206         footer += (uint16_t) 0; //number of this disk
0207         footer += (uint16_t) 0; //disk where footer starts
0208         footer += (uint16_t) (nrecs+1); //number of records on this disk
0209         footer += (uint16_t) (nrecs+1); //total number of records
0210         footer += (uint32_t) global_header.size(); //nbytes of global headers
0211         footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
0212         footer += (uint16_t) 0; //zip file comment length
0213 
0214         //write everything
0215         fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
0216         fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
0217         fwrite(data,sizeof(T),nels,fp);
0218         fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
0219         fwrite(&footer[0],sizeof(char),footer.size(),fp);
0220         fclose(fp);
0221     }
0222 
0223     template<typename T> void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w") {
0224         std::vector<size_t> shape;
0225         shape.push_back(data.size());
0226         npy_save(fname, &data[0], shape, mode);
0227     }
0228 
0229     template<typename T> void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w") {
0230         std::vector<size_t> shape;
0231         shape.push_back(data.size());
0232         npz_save(zipname, fname, &data[0], shape, mode);
0233     }
0234 
0235     template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape) {  
0236 
0237         std::vector<char> dict;
0238         dict += "{'descr': '";
0239         dict += BigEndianTest();
0240         dict += map_type(typeid(T));
0241         dict += std::to_string(sizeof(T));
0242         dict += "', 'fortran_order': False, 'shape': (";
0243         dict += std::to_string(shape[0]);
0244         for(size_t i = 1;i < shape.size();i++) {
0245             dict += ", ";
0246             dict += std::to_string(shape[i]);
0247         }
0248         if(shape.size() == 1) dict += ",";
0249         dict += "), }";
0250         //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
0251         int remainder = 16 - (10 + dict.size()) % 16;
0252         dict.insert(dict.end(),remainder,' ');
0253         dict.back() = '\n';
0254 
0255         std::vector<char> header;
0256         header += (char) 0x93;
0257         header += "NUMPY";
0258         header += (char) 0x01; //major version of numpy format
0259         header += (char) 0x00; //minor version of numpy format
0260         header += (uint16_t) dict.size();
0261         header.insert(header.end(),dict.begin(),dict.end());
0262 
0263         return header;
0264     }
0265 
0266 
0267 }
0268 
0269 #endif