Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-19 08:55:32

0001 /*
0002  *  Copyright (c) 2022 Blue Brain Project
0003  *
0004  *  Distributed under the Boost Software License, Version 1.0.
0005  *    (See accompanying file LICENSE_1_0.txt or copy at
0006  *          http://www.boost.org/LICENSE_1_0.txt)
0007  *
0008  */
0009 #pragma once
0010 
0011 #include <type_traits>
0012 
0013 #include "H5Inspector_misc.hpp"
0014 #include "../H5DataType.hpp"
0015 
0016 namespace HighFive {
0017 namespace details {
0018 
0019 template <class T>
0020 struct is_std_string {
0021     static constexpr bool value =
0022         std::is_same<typename inspector<T>::base_type, std::string>::value;
0023 };
0024 
0025 template <class T, class V = void>
0026 struct enable_shallow_copy
0027     : public std::enable_if<!is_std_string<T>::value && inspector<T>::is_trivially_copyable, V> {};
0028 
0029 template <class T, class V = void>
0030 struct enable_deep_copy
0031     : public std::enable_if<!is_std_string<T>::value && !inspector<T>::is_trivially_copyable, V> {};
0032 
0033 template <class T, class V = void>
0034 struct enable_string_copy: public std::enable_if<is_std_string<T>::value, V> {};
0035 
0036 
0037 template <typename T, bool IsReadOnly>
0038 struct ShallowCopyBuffer {
0039     using type = unqualified_t<T>;
0040     using hdf5_type =
0041         typename std::conditional<IsReadOnly,
0042                                   typename std::add_const<typename inspector<T>::hdf5_type>::type,
0043                                   typename inspector<T>::hdf5_type>::type;
0044 
0045     ShallowCopyBuffer() = delete;
0046 
0047     explicit ShallowCopyBuffer(typename std::conditional<IsReadOnly, const T&, T&>::type val)
0048         : ptr(inspector<T>::data(val)){};
0049 
0050     hdf5_type* getPointer() const {
0051         return ptr;
0052     }
0053 
0054     hdf5_type* begin() const {
0055         return getPointer();
0056     }
0057 
0058     void unserialize(T& /* val */) const {
0059         /* nothing to do. */
0060     }
0061 
0062   private:
0063     hdf5_type* ptr;
0064 };
0065 
0066 template <class T>
0067 struct DeepCopyBuffer {
0068     using type = unqualified_t<T>;
0069     using hdf5_type = typename inspector<type>::hdf5_type;
0070 
0071     explicit DeepCopyBuffer(const std::vector<size_t>& _dims)
0072         : buffer(inspector<T>::getSize(_dims))
0073         , dims(_dims) {}
0074 
0075     hdf5_type* getPointer() {
0076         return buffer.data();
0077     }
0078 
0079     hdf5_type const* getPointer() const {
0080         return buffer.data();
0081     }
0082 
0083     hdf5_type* begin() {
0084         return getPointer();
0085     }
0086 
0087     hdf5_type const* begin() const {
0088         return getPointer();
0089     }
0090 
0091     void unserialize(T& val) const {
0092         inspector<type>::unserialize(buffer.data(), dims, val);
0093     }
0094 
0095   private:
0096     std::vector<hdf5_type> buffer;
0097     std::vector<size_t> dims;
0098 };
0099 
0100 enum class BufferMode { Read, Write };
0101 
0102 
0103 ///
0104 /// \brief String length in bytes excluding the `\0`.
0105 ///
0106 inline size_t char_buffer_size(char const* const str, size_t max_string_length) {
0107     for (size_t i = 0; i <= max_string_length; ++i) {
0108         if (str[i] == '\0') {
0109             return i;
0110         }
0111     }
0112 
0113     return max_string_length;
0114 }
0115 
0116 
0117 ///
0118 /// \brief A buffer for reading/writing strings.
0119 ///
0120 /// A string in HDF5 can be represented as a fixed or variable length string.
0121 /// The important difference for this buffer is that `H5D{read,write}` expects
0122 /// different input depending on whether the strings are fixed or variable length.
0123 /// For fixed length strings, it expects an array of chars, i.e. one string
0124 /// packed after the other contiguously. While for variable length strings it
0125 /// expects a list of pointers to the beginning of each string. Variable length
0126 /// string must be null-terminated; because that's how their length is
0127 /// determined.
0128 ///
0129 /// This buffer hides the difference between fixed and variable length strings
0130 /// by having internal data structures available for both cases at compile time.
0131 /// The choice which internal buffer to use is made at runtime.
0132 ///
0133 /// Consider an HDF5 dataset with N fixed-length strings, each of which is M
0134 /// characters long. Then the in-memory strings are copied into an internal
0135 /// buffer of size N*M. If null- or space-padded the buffer should be filled
0136 /// with the appropriate character. This is important if the in-memory strings
0137 /// are less than M characters long.
0138 ///
0139 /// An HDF5 dataset with N variable-length strings (all null-terminated) uses
0140 /// the internal list of pointers to the beginning of each string. Those
0141 /// pointers can either point to the in-memory strings themselves, if those
0142 /// strings are known to be null-terminated. Otherwise the in-memory strings are
0143 /// copied to an internal buffer of null-terminated strings; and the pointer
0144 /// points to the start of the string in the internal buffer.
0145 ///
0146 /// This class is responsible for arranging the strings properly before passing
0147 /// the buffers to HDF5. To keep this class generic, it provides a generic
0148 /// read/write interface to the internal strings, i.e. a pointer with a size.
0149 /// For reading from the buffer the proxy is called `StringConstView`. This
0150 /// proxy object is to be used by the `inspector` to copy from the buffer into
0151 /// the final destination, e.g. an `std::string`.  Similarly, there's a proxy
0152 /// object for serializing into the buffer, i.e. the `StringView`. Again the
0153 /// `inspector` is responsible for obtaining the pointer, size and padding of
0154 /// the string.
0155 ///
0156 /// Nomenclature:
0157 ///   - size of a string is the number of bytes required to store the string,
0158 ///     including the null character for null-terminated strings.
0159 ///
0160 ///   - length of a string is the number of bytes without the null character.
0161 ///
0162 /// Note: both 'length' and 'size' are counted in number of bytes, not number
0163 ///   of symbols or characters. Even for UTF8 strings.
0164 template <typename T, BufferMode buffer_mode>
0165 struct StringBuffer {
0166     using type = unqualified_t<T>;
0167     using hdf5_type = typename inspector<type>::hdf5_type;
0168 
0169     class StringView {
0170       public:
0171         StringView(StringBuffer<T, buffer_mode>& _buffer, size_t _i)
0172             : buffer(_buffer)
0173             , i(_i) {}
0174 
0175         ///
0176         /// \brief Assign the in-memory string to the buffer.
0177         ///
0178         /// This method copies the in-memory string to the appropriate
0179         /// internal buffer as needed.
0180         ///
0181         /// The `length` is the length of the string in bytes.
0182         void assign(char const* data, size_t length, StringPadding pad) {
0183             if (buffer.isVariableLengthString()) {
0184                 if (pad == StringPadding::NullTerminated) {
0185                     buffer.variable_length_pointers[i] = data;
0186                 } else {
0187                     buffer.variable_length_buffer[i] = std::string(data, length);
0188                     buffer.variable_length_pointers[i] = buffer.variable_length_buffer[i].data();
0189                 }
0190             } else if (buffer.isFixedLengthString()) {
0191                 // If the buffer is fixed-length and null-terminated, then
0192                 // `buffer.string_length` doesn't include the null-character.
0193                 if (length > buffer.string_length) {
0194                     throw std::invalid_argument("String length too big.");
0195                 }
0196 
0197                 memcpy(&buffer.fixed_length_buffer[i * buffer.string_size], data, length);
0198             }
0199         }
0200 
0201       private:
0202         StringBuffer<T, buffer_mode>& buffer;
0203         size_t i;
0204     };
0205 
0206 
0207     class StringConstView {
0208       public:
0209         StringConstView(const StringBuffer<T, buffer_mode>& _buffer, size_t _i)
0210             : buffer(_buffer)
0211             , i(_i) {}
0212 
0213         /// \brief Pointer to the first byte of the string.
0214         ///
0215         /// The valid indices for this pointer are: 0, ..., length() - 1.
0216         char const* data() const {
0217             if (buffer.isVariableLengthString()) {
0218                 return buffer.variable_length_pointers[i];
0219             } else {
0220                 return &buffer.fixed_length_buffer[i * buffer.string_size];
0221             }
0222         }
0223 
0224         /// \brief Length of the string in bytes.
0225         ///
0226         /// Note that for null-terminated strings the "length" doesn't include
0227         /// the null character. Hence, if storing this string as a
0228         /// null-terminated string, the destination buffer needs to be at least
0229         /// `length() + 1` bytes long.
0230         size_t length() const {
0231             if (buffer.isNullTerminated()) {
0232                 return char_buffer_size(data(), buffer.string_length);
0233             } else {
0234                 return buffer.string_length;
0235             }
0236         }
0237 
0238       private:
0239         const StringBuffer<T, buffer_mode>& buffer;
0240         size_t i;
0241     };
0242 
0243 
0244     class Iterator {
0245       public:
0246         Iterator(StringBuffer<T, buffer_mode>& _buffer, size_t _pos)
0247             : buffer(_buffer)
0248             , pos(_pos) {}
0249 
0250         Iterator operator+(size_t n_strings) const {
0251             return Iterator(buffer, pos + n_strings);
0252         }
0253 
0254         void operator+=(size_t n_strings) {
0255             pos += n_strings;
0256         }
0257 
0258         StringView operator*() {
0259             return StringView(buffer, pos);
0260         }
0261 
0262         StringConstView operator*() const {
0263             return StringConstView(buffer, pos);
0264         }
0265 
0266       private:
0267         StringBuffer<T, buffer_mode>& buffer;
0268         size_t pos;
0269     };
0270 
0271     StringBuffer(std::vector<size_t> _dims, const DataType& _file_datatype)
0272         : file_datatype(_file_datatype.asStringType())
0273         , padding(file_datatype.getPadding())
0274         , string_size(file_datatype.isVariableStr() ? size_t(-1) : file_datatype.getSize())
0275         , string_length(string_size - size_t(isNullTerminated()))
0276         , dims(_dims) {
0277         if (string_size == 0 && isNullTerminated()) {
0278             throw DataTypeException(
0279                 "Fixed-length, null-terminated need at least one byte to store the "
0280                 "null-character.");
0281         }
0282 
0283         auto n_strings = compute_total_size(dims);
0284         if (isVariableLengthString()) {
0285             variable_length_buffer.resize(n_strings);
0286             variable_length_pointers.resize(n_strings);
0287         } else {
0288             char pad = padding == StringPadding::SpacePadded ? ' ' : '\0';
0289             fixed_length_buffer.assign(n_strings * string_size, pad);
0290         }
0291     }
0292 
0293     bool isVariableLengthString() const {
0294         return file_datatype.isVariableStr();
0295     }
0296 
0297     bool isFixedLengthString() const {
0298         return file_datatype.isFixedLenStr();
0299     }
0300 
0301     bool isNullTerminated() const {
0302         return file_datatype.getPadding() == StringPadding::NullTerminated;
0303     }
0304 
0305 
0306     void* getPointer() {
0307         if (file_datatype.isVariableStr()) {
0308             return variable_length_pointers.data();
0309         } else {
0310             return fixed_length_buffer.data();
0311         }
0312     }
0313 
0314     Iterator begin() {
0315         return Iterator(*this, 0ul);
0316     }
0317 
0318     void unserialize(T& val) {
0319         inspector<type>::unserialize(begin(), dims, val);
0320     }
0321 
0322   private:
0323     StringType file_datatype;
0324     StringPadding padding;
0325     size_t string_size;    // Size of buffer required to store the string.
0326                            // Meaningful for fixed length strings only.
0327     size_t string_length;  // Semantic length of string.
0328     std::vector<size_t> dims;
0329 
0330     std::vector<char> fixed_length_buffer;
0331     std::vector<std::string> variable_length_buffer;
0332     std::vector<
0333         typename std::conditional<buffer_mode == BufferMode::Write, const char, char>::type*>
0334         variable_length_pointers;
0335 };
0336 
0337 
0338 template <typename T, typename Enable = void>
0339 struct Writer;
0340 
0341 template <typename T>
0342 struct Writer<T, typename enable_shallow_copy<T>::type>: public ShallowCopyBuffer<T, true> {
0343   private:
0344     using super = ShallowCopyBuffer<T, true>;
0345 
0346   public:
0347     explicit Writer(const T& val, const DataType& /* file_datatype */)
0348         : super(val){};
0349 };
0350 
0351 template <typename T>
0352 struct Writer<T, typename enable_deep_copy<T>::type>: public DeepCopyBuffer<T> {
0353     explicit Writer(const T& val, const DataType& /* file_datatype */)
0354         : DeepCopyBuffer<T>(inspector<T>::getDimensions(val)) {
0355         inspector<T>::serialize(val, this->begin());
0356     }
0357 };
0358 
0359 template <typename T>
0360 struct Writer<T, typename enable_string_copy<T>::type>: public StringBuffer<T, BufferMode::Write> {
0361     explicit Writer(const T& val, const DataType& _file_datatype)
0362         : StringBuffer<T, BufferMode::Write>(inspector<T>::getDimensions(val), _file_datatype) {
0363         inspector<T>::serialize(val, this->begin());
0364     }
0365 };
0366 
0367 template <typename T, typename Enable = void>
0368 struct Reader;
0369 
0370 template <typename T>
0371 struct Reader<T, typename enable_shallow_copy<T>::type>: public ShallowCopyBuffer<T, false> {
0372   private:
0373     using super = ShallowCopyBuffer<T, false>;
0374     using type = typename super::type;
0375 
0376   public:
0377     Reader(const std::vector<size_t>&, type& val, const DataType& /* file_datatype */)
0378         : super(val) {}
0379 };
0380 
0381 template <typename T>
0382 struct Reader<T, typename enable_deep_copy<T>::type>: public DeepCopyBuffer<T> {
0383   private:
0384     using super = DeepCopyBuffer<T>;
0385     using type = typename super::type;
0386 
0387   public:
0388     Reader(const std::vector<size_t>& _dims, type&, const DataType& /* file_datatype */)
0389         : super(_dims) {}
0390 };
0391 
0392 
0393 template <typename T>
0394 struct Reader<T, typename enable_string_copy<T>::type>: public StringBuffer<T, BufferMode::Write> {
0395   public:
0396     explicit Reader(const std::vector<size_t>& _dims,
0397                     const T& /* val */,
0398                     const DataType& _file_datatype)
0399         : StringBuffer<T, BufferMode::Write>(_dims, _file_datatype) {}
0400 };
0401 
0402 struct data_converter {
0403     template <typename T>
0404     static Writer<T> serialize(const typename inspector<T>::type& val,
0405                                const DataType& file_datatype) {
0406         return Writer<T>(val, file_datatype);
0407     }
0408 
0409     template <typename T>
0410     static Reader<T> get_reader(const std::vector<size_t>& dims,
0411                                 T& val,
0412                                 const DataType& file_datatype) {
0413         // TODO Use bufferinfo for recursive_ndim
0414         auto effective_dims = details::squeezeDimensions(dims, inspector<T>::recursive_ndim);
0415         inspector<T>::prepare(val, effective_dims);
0416         return Reader<T>(effective_dims, val, file_datatype);
0417     }
0418 };
0419 
0420 }  // namespace details
0421 }  // namespace HighFive