Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-21 09:58:12

0001 // SPDX-License-Identifier: MIT
0002 // Copyright 2015,2018-2019 Moritz Kiehn
0003 //
0004 // Permission is hereby granted, free of charge, to any person obtaining a copy
0005 // of this software and associated documentation files (the "Software"), to deal
0006 // in the Software without restriction, including without limitation the rights
0007 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
0008 // copies of the Software, and to permit persons to whom the Software is
0009 // furnished to do so, subject to the following conditions:
0010 //
0011 // The above copyright notice and this permission notice shall be included in
0012 // all copies or substantial portions of the Software.
0013 //
0014 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0017 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0018 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
0019 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0020 // SOFTWARE.
0021 
0022 /// \file
0023 /// \brief   Write numpy-compatible .npy binary files
0024 /// \author  Moritz Kiehn <msmk@cern.ch>
0025 /// \date    2019-09-08, Split numpy i/o from the namedtuple library
0026 
0027 #pragma once
0028 
0029 #include <array>
0030 #include <cstdint>
0031 #include <fstream>
0032 #include <string>
0033 #include <tuple>
0034 #include <utility>
0035 
0036 namespace dfe {
0037 
0038 /// Write records into a binary NumPy-compatible `.npy` file.
0039 ///
0040 /// See
0041 /// https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.format.html
0042 /// for an explanation of the file format.
0043 template<typename NamedTuple>
0044 class NamedTupleNumpyWriter {
0045 public:
0046   NamedTupleNumpyWriter() = delete;
0047   NamedTupleNumpyWriter(const NamedTupleNumpyWriter&) = delete;
0048   NamedTupleNumpyWriter(NamedTupleNumpyWriter&&) = default;
0049   ~NamedTupleNumpyWriter();
0050   NamedTupleNumpyWriter& operator=(const NamedTupleNumpyWriter&) = delete;
0051   NamedTupleNumpyWriter& operator=(NamedTupleNumpyWriter&&) = default;
0052 
0053   /// Create a npy file at the given path. Overwrites existing data.
0054   NamedTupleNumpyWriter(const std::string& path);
0055 
0056   /// Append a record to the end of the file.
0057   void append(const NamedTuple& record);
0058 
0059 private:
0060   // the equivalent std::tuple-like type
0061   using Tuple = typename NamedTuple::Tuple;
0062 
0063   std::ofstream m_file;
0064   std::size_t m_fixed_header_length;
0065   std::size_t m_num_tuples;
0066 
0067   void write_header(std::size_t num_tuples);
0068   template<std::size_t... I>
0069   void write_record(const NamedTuple& record, std::index_sequence<I...>);
0070   template<typename T>
0071   void write_bytes(const T* ptr);
0072 };
0073 
0074 // implementation helpers
0075 namespace io_npy_impl {
0076 
0077 template<typename T>
0078 constexpr std::enable_if_t<false, T> kNumpyDtypeCode;
0079 template<>
0080 constexpr const char* kNumpyDtypeCode<uint8_t> = "u1";
0081 template<>
0082 constexpr const char* kNumpyDtypeCode<uint16_t> = "u2";
0083 template<>
0084 constexpr const char* kNumpyDtypeCode<uint32_t> = "u4";
0085 template<>
0086 constexpr const char* kNumpyDtypeCode<uint64_t> = "u8";
0087 template<>
0088 constexpr const char* kNumpyDtypeCode<int8_t> = "i1";
0089 template<>
0090 constexpr const char* kNumpyDtypeCode<int16_t> = "i2";
0091 template<>
0092 constexpr const char* kNumpyDtypeCode<int32_t> = "i4";
0093 template<>
0094 constexpr const char* kNumpyDtypeCode<int64_t> = "i8";
0095 template<>
0096 constexpr const char* kNumpyDtypeCode<float> = "f4";
0097 template<>
0098 constexpr const char* kNumpyDtypeCode<double> = "f8";
0099 template<>
0100 constexpr const char* kNumpyDtypeCode<bool> = "?";
0101 
0102 template<typename... Types>
0103 constexpr std::array<const char*, sizeof...(Types)>
0104 dtypes_codes(const std::tuple<Types...>&) {
0105   return {kNumpyDtypeCode<typename std::decay<Types>::type>...};
0106 }
0107 
0108 // Determines endianness and return the corresponding dtype code modifier.
0109 //
0110 // Derived from:
0111 // https://stackoverflow.com/questions/1001307/detecting-endianness-programmatically-in-a-c-program
0112 inline char
0113 dtype_endianness_modifier() {
0114   union {
0115     uint32_t i;
0116     char c[4];
0117   } x = {0x0A0B0C0D};
0118   bool is_little_endian =
0119     (x.c[0] == 0xD) and (x.c[1] == 0xC) and (x.c[2] == 0xB) and (x.c[3] == 0xA);
0120   // TODO this assumes that only little and big endian exists and checks only
0121   // for little. maybe verify that it always is one or the other?
0122   return is_little_endian ? '<' : '>';
0123 }
0124 
0125 template<typename NamedTuple>
0126 inline std::string
0127 dtypes_description(const NamedTuple& nt) {
0128   std::string descr;
0129   std::size_t n = std::tuple_size<typename NamedTuple::Tuple>::value;
0130   auto names = nt.names();
0131   auto codes = dtypes_codes(nt.tuple());
0132   auto endianness_modifier = dtype_endianness_modifier();
0133   descr += '[';
0134   for (decltype(n) i = 0; i < n; ++i) {
0135     descr += "('";
0136     descr += names[i];
0137     descr += "', '";
0138     descr += endianness_modifier;
0139     descr += codes[i];
0140     descr += "')";
0141     if ((i + 1) < n) {
0142       descr += ", ";
0143     }
0144   }
0145   descr += ']';
0146   return descr;
0147 }
0148 
0149 } // namespace io_npy_impl
0150 
0151 // implementation
0152 
0153 template<typename NamedTuple>
0154 inline NamedTupleNumpyWriter<NamedTuple>::NamedTupleNumpyWriter(
0155   const std::string& path)
0156   : m_fixed_header_length(0), m_num_tuples(0) {
0157   // make our life easier. always throw on error
0158   m_file.exceptions(std::ofstream::badbit | std::ofstream::failbit);
0159   m_file.open(
0160     path, std::ios_base::binary | std::ios_base::out | std::ios_base::trunc);
0161   // write a header that uses the maximum amount of space, i.e. biggest
0162   // possible number of ntuples, so that we have enough space when we
0163   // overwrite it w/ the actual number of tuples at closing time.
0164   write_header(SIZE_MAX);
0165   write_header(0);
0166 }
0167 
0168 template<typename NamedTuple>
0169 inline NamedTupleNumpyWriter<NamedTuple>::~NamedTupleNumpyWriter() {
0170   if (!m_file.is_open()) {
0171     return;
0172   }
0173   write_header(m_num_tuples);
0174   m_file.close();
0175 }
0176 
0177 template<typename NamedTuple>
0178 inline void
0179 NamedTupleNumpyWriter<NamedTuple>::append(const NamedTuple& record) {
0180   write_record(
0181     record, std::make_index_sequence<std::tuple_size<Tuple>::value>{});
0182   m_num_tuples += 1;
0183 }
0184 
0185 template<typename NamedTuple>
0186 inline void
0187 NamedTupleNumpyWriter<NamedTuple>::write_header(std::size_t num_tuples) {
0188   std::string header;
0189   // magic
0190   header += "\x93NUMPY";
0191   // fixed version number (major, minor), 1byte unsigned each
0192   header += static_cast<char>(0x1);
0193   header += static_cast<char>(0x0);
0194   // placeholder value for the header length, 2byte little endian unsigned
0195   header += static_cast<char>(0xAF);
0196   header += static_cast<char>(0xFE);
0197   // python dict w/ data type and size information
0198   header += "{'descr': ";
0199   header += io_npy_impl::dtypes_description(NamedTuple());
0200   header += ", 'fortran_order': False";
0201   header += ", 'shape': (";
0202   header += std::to_string(num_tuples);
0203   header += ",)}";
0204   // padd w/ spaces for 16 byte alignment of the whole header
0205   while (((header.size() + 1) % 16) != 0) {
0206     header += ' ';
0207   }
0208   // the initial header fixes the available header size. updated headers
0209   // must always occupy the same space and might require additional
0210   // padding spaces
0211   if (m_fixed_header_length == 0) {
0212     m_fixed_header_length = header.size();
0213   } else {
0214     while (header.size() < m_fixed_header_length) {
0215       header += ' ';
0216     }
0217   }
0218   header += '\n';
0219   // replace the header length place holder
0220   std::size_t header_length = header.size() - 10;
0221   header[8] = static_cast<char>(header_length >> 0);
0222   header[9] = static_cast<char>(header_length >> 8);
0223   m_file.seekp(0);
0224   m_file.write(header.data(), header.size());
0225 }
0226 
0227 template<typename NamedTuple>
0228 template<std::size_t... I>
0229 inline void
0230 NamedTupleNumpyWriter<NamedTuple>::write_record(
0231   const NamedTuple& record, std::index_sequence<I...>) {
0232   // see namedtuple_impl::print_tuple for explanation
0233   using std::get;
0234   using Vacuum = int[];
0235   (void)Vacuum{(write_bytes(&get<I>(record)), 0)...};
0236 }
0237 
0238 template<typename NamedTuple>
0239 template<typename T>
0240 inline void
0241 NamedTupleNumpyWriter<NamedTuple>::write_bytes(const T* ptr) {
0242   m_file.write(reinterpret_cast<const char*>(ptr), sizeof(T));
0243 }
0244 
0245 } // namespace dfe