Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-25 08:14:02

0001 /*
0002  *  Copyright (c), 2017, Adrien Devresse <adrien.devresse@epfl.ch>
0003  *
0004  *  Distributed under the Boost Software License, Version 1.0.
0005  *    (See accompanying file LICENSE_1_0.txt or copy at
0006  *          http://www.boost.org/LICENSE_1_0.txt)
0007  *
0008  */
0009 #pragma once
0010 
0011 #include <cstdlib>
0012 #include <vector>
0013 
0014 #include "H5_definitions.hpp"
0015 #include "H5Utils.hpp"
0016 #include "convert_size_vector.hpp"
0017 
0018 #include "../H5PropertyList.hpp"
0019 #include "h5s_wrapper.hpp"
0020 
0021 namespace HighFive {
0022 
0023 class ElementSet {
0024   public:
0025     ///
0026     /// \brief Create a list of points of N-dimension for selection.
0027     ///
0028     /// \param list List of continuous coordinates (e.g.: in 2 dimensions space
0029     /// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
0030     ElementSet(std::initializer_list<std::size_t> list);
0031     ///
0032     /// \brief Create a list of points of N-dimension for selection.
0033     ///
0034     /// \param list List of N-dim points.
0035     ElementSet(std::initializer_list<std::vector<std::size_t>> list);
0036     ///
0037     /// \brief Create a list of points of N-dimension for selection.
0038     ///
0039     /// \param element_ids List of continuous coordinates (e.g.: in 2 dimensions space
0040     /// `ElementSet{1, 2, 3 ,4}` creates points `(1, 2)` and `(3, 4)`).
0041     explicit ElementSet(const std::vector<std::size_t>& element_ids);
0042     ///
0043     /// \brief Create a list of points of N-dimension for selection.
0044     ///
0045     /// \param element_ids List of N-dim points.
0046     explicit ElementSet(const std::vector<std::vector<std::size_t>>& element_ids);
0047 
0048   private:
0049     std::vector<std::size_t> _ids;
0050 
0051     template <typename Derivate>
0052     friend class SliceTraits;
0053 };
0054 
0055 inline std::vector<hsize_t> toHDF5SizeVector(const std::vector<size_t>& from) {
0056     return detail::convertSizeVector<hsize_t>(from);
0057 }
0058 
0059 inline std::vector<size_t> toSTLSizeVector(const std::vector<hsize_t>& from) {
0060     return detail::convertSizeVector<size_t>(from);
0061 }
0062 
0063 ///
0064 /// \brief A CRTP base class for hyper slab-like objects.
0065 ///
0066 template <typename Impl>
0067 class HyperSlabInterface {
0068   public:
0069     DataSpace apply(const DataSpace& space) const {
0070         return static_cast<const Impl&>(*this).apply(space);
0071     }
0072 };
0073 
0074 struct RegularHyperSlab {
0075     RegularHyperSlab() = default;
0076 
0077     explicit RegularHyperSlab(const std::vector<size_t>& offset_,
0078                               const std::vector<size_t>& count_ = {},
0079                               const std::vector<size_t>& stride_ = {},
0080                               const std::vector<size_t>& block_ = {})
0081         : offset(toHDF5SizeVector(offset_))
0082         , count(toHDF5SizeVector(count_))
0083         , stride(toHDF5SizeVector(stride_))
0084         , block(toHDF5SizeVector(block_)) {}
0085 
0086     static RegularHyperSlab fromHDF5Sizes(std::vector<hsize_t> offset_,
0087                                           std::vector<hsize_t> count_ = {},
0088                                           std::vector<hsize_t> stride_ = {},
0089                                           std::vector<hsize_t> block_ = {}) {
0090         RegularHyperSlab slab;
0091         slab.offset = std::move(offset_);
0092         slab.count = std::move(count_);
0093         slab.stride = std::move(stride_);
0094         slab.block = std::move(block_);
0095 
0096         return slab;
0097     }
0098 
0099     size_t rank() const {
0100         return std::max(std::max(offset.size(), count.size()),
0101                         std::max(stride.size(), block.size()));
0102     }
0103 
0104     /// Dimensions when all gaps are removed.
0105     std::vector<size_t> packedDims() const {
0106         auto n_dims = rank();
0107         auto dims = std::vector<size_t>(n_dims, 0);
0108 
0109         for (size_t i = 0; i < n_dims; ++i) {
0110             dims[i] = count[i] * (block.empty() ? 1 : block[i]);
0111         }
0112 
0113         return dims;
0114     }
0115 
0116     std::vector<hsize_t> offset;
0117     std::vector<hsize_t> count;
0118     std::vector<hsize_t> stride;
0119     std::vector<hsize_t> block;
0120 };
0121 
0122 class HyperSlab: public HyperSlabInterface<HyperSlab> {
0123   public:
0124     HyperSlab() {
0125         selects.emplace_back(RegularHyperSlab{}, Op::None);
0126     };
0127 
0128     explicit HyperSlab(const RegularHyperSlab& sel) {
0129         selects.emplace_back(sel, Op::Set);
0130     }
0131 
0132     HyperSlab operator|(const RegularHyperSlab& sel) const {
0133         auto ret = *this;
0134         ret |= sel;
0135         return ret;
0136     }
0137 
0138     HyperSlab& operator|=(const RegularHyperSlab& sel) {
0139         selects.emplace_back(sel, Op::Or);
0140         return *this;
0141     }
0142 
0143     HyperSlab operator&(const RegularHyperSlab& sel) const {
0144         auto ret = *this;
0145         ret &= sel;
0146         return ret;
0147     }
0148 
0149     HyperSlab& operator&=(const RegularHyperSlab& sel) {
0150         selects.emplace_back(sel, Op::And);
0151         return *this;
0152     }
0153 
0154     HyperSlab operator^(const RegularHyperSlab& sel) const {
0155         auto ret = *this;
0156         ret ^= sel;
0157         return ret;
0158     }
0159 
0160     HyperSlab& operator^=(const RegularHyperSlab& sel) {
0161         selects.emplace_back(sel, Op::Xor);
0162         return *this;
0163     }
0164 
0165     HyperSlab& notA(const RegularHyperSlab& sel) {
0166         selects.emplace_back(sel, Op::NotA);
0167         return *this;
0168     }
0169 
0170     HyperSlab& notB(const RegularHyperSlab& sel) {
0171         selects.emplace_back(sel, Op::NotB);
0172         return *this;
0173     }
0174 
0175     DataSpace apply(const DataSpace& space_) const {
0176         return apply_impl(space_);
0177     }
0178 
0179   private:
0180     enum class Op {
0181         Noop,
0182         Set,
0183         Or,
0184         And,
0185         Xor,
0186         NotB,
0187         NotA,
0188         Append,
0189         Prepend,
0190         Invalid,
0191         None,
0192     };
0193 
0194     H5S_seloper_t convert(Op op) const {
0195         switch (op) {
0196         case Op::Noop:
0197             return H5S_SELECT_NOOP;
0198         case Op::Set:
0199             return H5S_SELECT_SET;
0200         case Op::Or:
0201             return H5S_SELECT_OR;
0202         case Op::And:
0203             return H5S_SELECT_AND;
0204         case Op::Xor:
0205             return H5S_SELECT_XOR;
0206         case Op::NotB:
0207             return H5S_SELECT_NOTB;
0208         case Op::NotA:
0209             return H5S_SELECT_NOTA;
0210         case Op::Append:
0211             return H5S_SELECT_APPEND;
0212         case Op::Prepend:
0213             return H5S_SELECT_PREPEND;
0214         case Op::Invalid:
0215             return H5S_SELECT_INVALID;
0216         default:
0217             throw DataSpaceException("Invalid HyperSlab operation.");
0218         }
0219     }
0220 
0221     struct Select_: public RegularHyperSlab {
0222         Select_(const RegularHyperSlab& sel, Op op_)
0223             : RegularHyperSlab(sel)
0224             , op(op_) {}
0225 
0226         Op op;
0227     };
0228 
0229     std::vector<Select_> selects;
0230 
0231   protected:
0232     DataSpace select_none(const DataSpace& outer_space) const {
0233         auto space = outer_space.clone();
0234         detail::h5s_select_none(space.getId());
0235         return space;
0236     }
0237 
0238     void select_hyperslab(DataSpace& space, const Select_& sel) const {
0239         detail::h5s_select_hyperslab(space.getId(),
0240                                      convert(sel.op),
0241                                      sel.offset.empty() ? nullptr : sel.offset.data(),
0242                                      sel.stride.empty() ? nullptr : sel.stride.data(),
0243                                      sel.count.empty() ? nullptr : sel.count.data(),
0244                                      sel.block.empty() ? nullptr : sel.block.data());
0245     }
0246 
0247 #if H5_VERSION_GE(1, 10, 6)
0248     /// The length of a stream of `Op::Or` starting at `begin`.
0249     size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const {
0250         assert(op == Op::Or);
0251         auto const* it =
0252             std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; });
0253         return static_cast<size_t>(it - begin);
0254     }
0255 
0256     DataSpace combine_selections(const DataSpace& left_space,
0257                                  Op op,
0258                                  const DataSpace& right_space) const {
0259         assert(op == Op::Or);
0260 
0261         auto left_type = detail::h5s_get_select_type(left_space.getId());
0262         auto right_type = detail::h5s_get_select_type(right_space.getId());
0263 
0264         // Since HDF5 doesn't allow `combine_selections` with a None
0265         // selection, we need to avoid the issue:
0266         if (left_type == H5S_SEL_NONE) {
0267             return right_space;
0268         } else if (right_type == H5S_SEL_NONE) {
0269             return left_space;
0270         } else if (left_type == H5S_SEL_ALL) {
0271             return left_space;
0272         } else if (right_type == H5S_SEL_ALL) {
0273             return right_space;
0274         } else {
0275             return detail::make_data_space(
0276                 detail::h5s_combine_select(left_space.getId(), convert(op), right_space.getId()));
0277         }
0278     }
0279 
0280     /// Reduce a sequence of `Op::Or` efficiently.
0281     ///
0282     /// The issue is that `H5Sselect_hyperslab` runs in time that linear of the
0283     /// number of block in the existing selection. Therefore, a loop that adds
0284     /// slab-by-slab has quadratic runtime in the number of slabs.
0285     ///
0286     /// Fortunately, `H5Scombine_select` doesn't suffer from the same problem.
0287     /// However, it's only available in 1.10.6 and newer.
0288     ///
0289     /// The solution is to use divide-and-conquer to reduce (long) streaks of
0290     /// `Op::Or` in what seems to be log-linear time.
0291     DataSpace reduce_streak(const DataSpace& outer_space,
0292                             Select_ const* begin,
0293                             Select_ const* end,
0294                             Op op) const {
0295         assert(op == Op::Or);
0296 
0297         if (begin == end) {
0298             throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'.");
0299         }
0300 
0301         std::ptrdiff_t distance = end - begin;
0302         if (distance == 1) {
0303             auto space = select_none(outer_space);
0304             select_hyperslab(space, *begin);
0305             return space;
0306         }
0307 
0308         Select_ const* mid = begin + distance / 2;
0309         auto right_space = reduce_streak(outer_space, begin, mid, op);
0310         auto left_space = reduce_streak(outer_space, mid, end, op);
0311 
0312         return combine_selections(left_space, op, right_space);
0313     }
0314 
0315     DataSpace apply_impl(const DataSpace& space_) const {
0316         auto space = space_.clone();
0317         auto n_selects = selects.size();
0318         for (size_t i = 0; i < n_selects; ++i) {
0319             auto const* const begin = selects.data() + i;
0320             auto const* const end = selects.data() + n_selects;
0321 
0322             auto n_ors = detect_streak(begin, end, Op::Or);
0323 
0324             if (n_ors > 1) {
0325                 auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or);
0326                 space = combine_selections(space, Op::Or, right_space);
0327                 i += n_ors - 1;
0328             } else if (selects[i].op == Op::None) {
0329                 detail::h5s_select_none(space.getId());
0330             } else {
0331                 select_hyperslab(space, selects[i]);
0332             }
0333         }
0334         return space;
0335     }
0336 #else
0337     DataSpace apply_impl(const DataSpace& space_) const {
0338         auto space = space_.clone();
0339         for (const auto& sel: selects) {
0340             if (sel.op == Op::None) {
0341                 detail::h5s_select_none(space.getId());
0342             } else {
0343                 select_hyperslab(space, sel);
0344             }
0345         }
0346         return space;
0347     }
0348 #endif
0349 };
0350 
0351 ///
0352 /// \brief Selects the Cartesian product of slices.
0353 ///
0354 /// Given a one-dimensional dataset one might want to select the union of
0355 /// multiple, non-overlapping slices. For example,
0356 ///
0357 ///     using Slice = std::array<size_t, 2>;
0358 ///     using Slices = std::vector<Slice>;
0359 ///     auto slices = Slices{{0, 2}, {4, 10}};
0360 ///     dset.select(ProductSet(slices);
0361 ///
0362 /// to select elements `0`, `1` and `4`, ..., `9` (inclusive).
0363 ///
0364 /// For a two-dimensional array one which to select the row specified above,
0365 /// but only columns `2`, `3` and `4`:
0366 ///
0367 ///    dset.select(ProductSet(slices, Slice{2, 5}));
0368 ///    // Analogues with the roles of columns and rows reversed:
0369 ///    dset.select(ProductSet(Slice{2, 5}, slices));
0370 ///
0371 /// One can generalize once more and allow the unions of slices in both x- and
0372 /// y-dimension:
0373 ///
0374 ///     auto yslices = Slices{{1, 5}, {7, 8}};
0375 ///     auto xslices = Slices{{0, 3}, {6, 8}};
0376 ///     dset.select(ProductSet(yslices, xslices));
0377 ///
0378 /// which selects the following from a 11x8 dataset:
0379 ///
0380 ///     . . . . . . . .
0381 ///     x x x . . . x x
0382 ///     x x x . . . x x
0383 ///     x x x . . . x x
0384 ///     x x x . . . x x
0385 ///     . . . . . . . .
0386 ///     . . . . . . . .
0387 ///     x x x . . . x x
0388 ///     . . . . . . . .
0389 ///     . . . . . . . .
0390 ///     . . . . . . . .
0391 ///
0392 /// Final twist, the selection along and axis may be discrete indices, from
0393 /// which a vector of, possibly single-element, slices can be constructed. The
0394 /// corresponding types are `std::vector<size_t>` and `size_t` for multiple or
0395 /// just a single values. Note, looping over rows or columns one-by-one can be
0396 /// a very serious performance problem. In particular,
0397 ///
0398 ///     // Avoid:
0399 ///     for(auto i : indices) {
0400 ///         dset.select(i).read<double>();
0401 ///     }
0402 ///
0403 ///     // Use:
0404 ///     std::vector<size_t> tmp(indices.begin(), indices.end());
0405 ///     dset.select(tmp).read<std::vector<double>>();
0406 ///
0407 /// obvious omit the copy if `indices` already has the correct type.
0408 ///
0409 /// The solution works analogous in higher dimensions. A selection `sk` along
0410 /// axis `k` can be interpreted as a subset `S_k` of the natural numbers. The
0411 /// index `i` is in `S_k` if it's selected by `sk`.  The `ProductSet` of `s0`,
0412 /// ..., `sN` selects the Cartesian product `S_0 x ... x S_N`.
0413 ///
0414 /// Note that the selections along each axis must be sorted and non-overlapping.
0415 ///
0416 /// \since 3.0
0417 class ProductSet {
0418   public:
0419     template <class... Slices>
0420     explicit ProductSet(const Slices&... slices);
0421 
0422   private:
0423     HyperSlab slab;
0424     std::vector<size_t> shape;
0425 
0426     template <typename Derivate>
0427     friend class SliceTraits;
0428 };
0429 
0430 template <typename Derivate>
0431 class SliceTraits {
0432   public:
0433     ///
0434     /// \brief Select an \p hyper_slab in the current Slice/Dataset.
0435     ///
0436     /// HyperSlabs can be either regular or irregular. Irregular hyperslabs are typically generated
0437     /// by taking the union of regular hyperslabs. An irregular hyperslab, in general, does not fit
0438     /// nicely into a multi-dimensional array, but only a subset of such an array.
0439     ///
0440     /// Therefore, the only memspaces supported for general hyperslabs are one-dimensional arrays.
0441     template <class Impl>
0442     Selection select(const HyperSlabInterface<Impl>& hyper_slab) const;
0443 
0444     ///
0445     /// \brief Select an \p hyper_slab in the current Slice/Dataset.
0446     ///
0447     /// If the selection can be read into a simple, multi-dimensional dataspace,
0448     /// then this overload enable specifying the shape of the memory dataspace
0449     /// with `memspace`. Note, that simple implies no offsets, strides or
0450     /// number of blocks, just the size of the block in each dimension.
0451     Selection select(const HyperSlab& hyper_slab, const DataSpace& memspace) const;
0452 
0453     ///
0454     /// \brief Select a region in the current Slice/Dataset of \p count points at
0455     /// \p offset separated by \p stride. If strides are not provided they will
0456     /// default to 1 in all dimensions.
0457     ///
0458     /// vector offset and count have to be from the same dimension
0459     ///
0460     Selection select(const std::vector<size_t>& offset,
0461                      const std::vector<size_t>& count,
0462                      const std::vector<size_t>& stride = {},
0463                      const std::vector<size_t>& block = {}) const;
0464 
0465     ///
0466     /// \brief Select a set of columns in the last dimension of this dataset.
0467     ///
0468     /// The column indices must be smaller than the dimension size.
0469     ///
0470     Selection select(const std::vector<size_t>& columns) const;
0471 
0472     ///
0473     /// \brief Select a region in the current Slice/Dataset out of a list of elements.
0474     ///
0475     Selection select(const ElementSet& elements) const;
0476 
0477     ///
0478     /// \brief Select a region consisting of a product of slices.
0479     ///
0480     /// \since 3.0
0481     ///
0482     Selection select(const ProductSet& product_set) const;
0483 
0484     template <typename T>
0485     T read(const DataTransferProps& xfer_props = DataTransferProps()) const;
0486 
0487     ///
0488     /// Read the entire dataset into a buffer
0489     ///
0490     /// An exception is raised is if the numbers of dimension of the buffer and
0491     /// of the dataset are different.
0492     ///
0493     /// The array type can be a N-pointer or a N-vector. For plain pointers
0494     /// not dimensionality checking will be performed, it is the user's
0495     /// responsibility to ensure that the right amount of space has been
0496     /// allocated.
0497     template <typename T>
0498     void read(T& array, const DataTransferProps& xfer_props = DataTransferProps()) const;
0499 
0500     ///
0501     /// Read the entire dataset into a raw buffer
0502     ///
0503     /// No dimensionality checks will be performed, it is the user's
0504     /// responsibility to ensure that the right amount of space has been
0505     /// allocated.
0506     /// \param array: A buffer containing enough space for the data
0507     /// \param mem_datatype: The type of the data in memory. This prevents deducing it from T.
0508     /// \param xfer_props: Data Transfer properties
0509     template <typename T>
0510     void read_raw(T* array,
0511                   const DataType& mem_datatype,
0512                   const DataTransferProps& xfer_props = DataTransferProps()) const;
0513 
0514     ///
0515     /// Read the entire dataset into a raw buffer
0516     ///
0517     /// Same as `read(T*, const DataType&, const DataTransferProps&)`. However,
0518     /// this overload deduces the HDF5 datatype of the element of `array` from
0519     /// `T`. Note, that the file datatype is already fixed.
0520     ///
0521     /// \param array: A buffer containing enough space for the data
0522     /// \param xfer_props: Data Transfer properties
0523     template <typename T>
0524     void read_raw(T* array, const DataTransferProps& xfer_props = DataTransferProps()) const;
0525 
0526 
0527     ///
0528     /// Write the integrality N-dimension buffer to this dataset
0529     /// An exception is raised is if the numbers of dimension of the buffer and
0530     /// of the dataset are different
0531     ///
0532     /// The array type can be a N-pointer or a N-vector ( e.g int** integer two
0533     /// dimensional array )
0534     template <typename T>
0535     void write(const T& buffer, const DataTransferProps& xfer_props = DataTransferProps());
0536 
0537     ///
0538     /// Write from a raw pointer into this dataset.
0539     ///
0540     /// No dimensionality checks will be performed, it is the user's
0541     /// responsibility to ensure that the buffer holds the right amount of
0542     /// elements. For n-dimensional matrices the buffer layout follows H5
0543     /// default conventions.
0544     ///
0545     /// Note, this is the shallowest wrapper around `H5Dwrite` and should
0546     /// be used if full control is needed. Generally prefer `write`.
0547     ///
0548     /// \param buffer: A buffer containing the data to be written
0549     /// \param dtype: The datatype of `buffer`, i.e. the memory data type.
0550     /// \param xfer_props: The HDF5 data transfer properties, e.g. collective MPI-IO.
0551     template <typename T>
0552     void write_raw(const T* buffer,
0553                    const DataType& mem_datatype,
0554                    const DataTransferProps& xfer_props = DataTransferProps());
0555 
0556     ///
0557     /// Write from a raw pointer into this dataset.
0558     ///
0559     /// Same as `write_raw(const T*, const DataTransferProps&)`. However, this
0560     /// overload attempts to guess the data type of `buffer`, i.e. the memory
0561     /// datatype. Note that the file datatype is already fixed.
0562     ///
0563     template <typename T>
0564     void write_raw(const T* buffer, const DataTransferProps& xfer_props = DataTransferProps());
0565 
0566     ///
0567     /// \brief Return a `Selection` with `axes` squeezed from the memspace.
0568     ///
0569     /// Returns a selection in which the memspace has been modified
0570     /// to not include the axes listed in `axes`.
0571     ///
0572     /// Throws if any axis to be squeezes has a dimension other than `1`.
0573     ///
0574     /// \since 3.0
0575     Selection squeezeMemSpace(const std::vector<size_t>& axes) const;
0576 
0577     ///
0578     /// \brief Return a `Selection` with a simple memspace with `dims`.
0579     ///
0580     /// Returns a selection in which the memspace has been modified
0581     /// to be a simple dataspace with dimensions `dims`.
0582     ///
0583     /// Throws if the number of elements changes.
0584     ///
0585     /// \since 3.0
0586     Selection reshapeMemSpace(const std::vector<size_t>& dims) const;
0587 };
0588 
0589 }  // namespace HighFive