Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:38:11

0001 // Copyright 2019 Hans Dembinski
0002 //
0003 // Distributed under the Boost Software License, Version 1.0.
0004 // (See accompanying file LICENSE_1_0.txt
0005 // or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 
0007 #ifndef BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
0008 #define BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
0009 
0010 #include <algorithm>
0011 #include <boost/core/make_span.hpp>
0012 #include <boost/core/span.hpp>
0013 #include <boost/histogram/axis/option.hpp>
0014 #include <boost/histogram/axis/traits.hpp>
0015 #include <boost/histogram/detail/axes.hpp>
0016 #include <boost/histogram/detail/detect.hpp>
0017 #include <boost/histogram/detail/fill.hpp>
0018 #include <boost/histogram/detail/linearize.hpp>
0019 #include <boost/histogram/detail/nonmember_container_access.hpp>
0020 #include <boost/histogram/detail/optional_index.hpp>
0021 #include <boost/histogram/detail/static_if.hpp>
0022 #include <boost/histogram/fwd.hpp>
0023 #include <boost/mp11/algorithm.hpp>
0024 #include <boost/mp11/bind.hpp>
0025 #include <boost/mp11/utility.hpp>
0026 #include <boost/throw_exception.hpp>
0027 #include <boost/variant2/variant.hpp>
0028 #include <cassert>
0029 #include <initializer_list>
0030 #include <stdexcept>
0031 #include <type_traits>
0032 #include <utility>
0033 
0034 namespace boost {
0035 namespace histogram {
0036 namespace detail {
0037 
0038 namespace dtl = boost::histogram::detail;
0039 
0040 template <class Axes, class T>
0041 using is_convertible_to_any_value_type =
0042     mp11::mp_any_of_q<value_types<Axes>, mp11::mp_bind_front<std::is_convertible, T>>;
0043 
0044 template <class T>
0045 auto to_ptr_size(const T& x) {
0046   return static_if<std::is_scalar<T>>(
0047       [](const auto& x) { return std::make_pair(&x, static_cast<std::size_t>(0)); },
0048       [](const auto& x) { return std::make_pair(dtl::data(x), dtl::size(x)); }, x);
0049 }
0050 
0051 template <class F, class V>
0052 decltype(auto) maybe_visit(F&& f, V&& v) {
0053   return static_if<is_variant<std::decay_t<V>>>(
0054       [](auto&& f, auto&& v) {
0055         return variant2::visit(std::forward<F>(f), std::forward<V>(v));
0056       },
0057       [](auto&& f, auto&& v) { return std::forward<F>(f)(std::forward<V>(v)); },
0058       std::forward<F>(f), std::forward<V>(v));
0059 }
0060 
0061 template <class Index, class Axis, class IsGrowing>
0062 struct index_visitor {
0063   using index_type = Index;
0064   using pointer = index_type*;
0065   using value_type = axis::traits::value_type<Axis>;
0066   using Opt = axis::traits::get_options<Axis>;
0067 
0068   Axis& axis_;
0069   const std::size_t stride_, start_, size_; // start and size of value collection
0070   const pointer begin_;
0071   axis::index_type* shift_;
0072 
0073   index_visitor(Axis& a, std::size_t& str, const std::size_t& sta, const std::size_t& si,
0074                 const pointer it, axis::index_type* shift)
0075       : axis_(a), stride_(str), start_(sta), size_(si), begin_(it), shift_(shift) {}
0076 
0077   template <class T>
0078   void call_2(std::true_type, pointer it, const T& x) const {
0079     // must use this code for all axes if one of them is growing
0080     axis::index_type shift;
0081     linearize_growth(*it, shift, stride_, axis_,
0082                      try_cast<value_type, std::invalid_argument>(x));
0083     if (shift > 0) { // shift previous indices, because axis zero-point has changed
0084       while (it != begin_) *--it += static_cast<std::size_t>(shift) * stride_;
0085       *shift_ += shift;
0086     }
0087   }
0088 
0089   template <class T>
0090   void call_2(std::false_type, pointer it, const T& x) const {
0091     // no axis is growing
0092     linearize(*it, stride_, axis_, try_cast<value_type, std::invalid_argument>(x));
0093   }
0094 
0095   template <class T>
0096   void call_1(std::false_type, const T& iterable) const {
0097     // T is iterable; fill N values
0098     const auto* tp = dtl::data(iterable) + start_;
0099     for (auto it = begin_; it != begin_ + size_; ++it) call_2(IsGrowing{}, it, *tp++);
0100   }
0101 
0102   template <class T>
0103   void call_1(std::true_type, const T& value) const {
0104     // T is compatible value; fill single value N times
0105 
0106     // Optimization: We call call_2 only once and then add the index shift onto the
0107     // whole array of indices, because it is always the same. This also works if the
0108     // axis grows during this operation. There are no shifts to apply if the zero-point
0109     // changes.
0110     const auto before = *begin_;
0111     call_2(IsGrowing{}, begin_, value);
0112     if (is_valid(*begin_)) {
0113       // since index can be std::size_t or optional_index, must do conversion here
0114       const auto delta =
0115           static_cast<std::intptr_t>(*begin_) - static_cast<std::intptr_t>(before);
0116       for (auto it = begin_ + 1; it != begin_ + size_; ++it) *it += delta;
0117     } else
0118       std::fill(begin_, begin_ + size_, invalid_index);
0119   }
0120 
0121   template <class T>
0122   void operator()(const T& iterable_or_value) const {
0123     call_1(mp11::mp_bool<(std::is_convertible<T, value_type>::value ||
0124                           !is_iterable<T>::value)>{},
0125            iterable_or_value);
0126   }
0127 };
0128 
0129 template <class Index, class S, class Axes, class T>
0130 void fill_n_indices(Index* indices, const std::size_t start, const std::size_t size,
0131                     const std::size_t offset, S& storage, Axes& axes, const T* viter) {
0132   axis::index_type extents[buffer_size<Axes>::value];
0133   axis::index_type shifts[buffer_size<Axes>::value];
0134   for_each_axis(axes, [eit = extents, sit = shifts](const auto& a) mutable {
0135     *sit++ = 0;
0136     *eit++ = axis::traits::extent(a);
0137   }); // LCOV_EXCL_LINE: gcc-8 is missing this line for no reason
0138 
0139   // TODO this seems to always take the path for growing axes, even if Axes is vector
0140   // of variant and types actually held are not growing axes?
0141   // index offset must be zero for growing axes
0142   using IsGrowing = has_growing_axis<Axes>;
0143   std::fill(indices, indices + size, IsGrowing::value ? 0 : offset);
0144   for_each_axis(axes, [&, stride = static_cast<std::size_t>(1),
0145                        pshift = shifts](auto& axis) mutable {
0146     using Axis = std::decay_t<decltype(axis)>;
0147     maybe_visit(
0148         index_visitor<Index, Axis, IsGrowing>{axis, stride, start, size, indices, pshift},
0149         *viter++);
0150     stride *= static_cast<std::size_t>(axis::traits::extent(axis));
0151     ++pshift;
0152   });
0153 
0154   bool update_needed = false;
0155   for_each_axis(axes, [&update_needed, eit = extents](const auto& a) mutable {
0156     update_needed |= *eit++ != axis::traits::extent(a);
0157   });
0158   if (update_needed) {
0159     storage_grower<Axes> g(axes);
0160     g.from_extents(extents);
0161     g.apply(storage, shifts);
0162   }
0163 }
0164 
0165 template <class S, class Index, class... Ts>
0166 void fill_n_storage(S& s, const Index idx, Ts&&... p) noexcept {
0167   if (is_valid(idx)) {
0168     assert(idx < s.size());
0169     fill_storage_element(s[idx], *p.first...);
0170   }
0171   // operator folding emulation
0172   (void)std::initializer_list<int>{(p.second ? (++p.first, 0) : 0)...};
0173 }
0174 
0175 template <class S, class Index, class T, class... Ts>
0176 void fill_n_storage(S& s, const Index idx, weight_type<T>&& w, Ts&&... ps) noexcept {
0177   if (is_valid(idx)) {
0178     assert(idx < s.size());
0179     fill_storage_element(s[idx], weight(*w.value.first), *ps.first...);
0180   }
0181   if (w.value.second) ++w.value.first;
0182   // operator folding emulation
0183   (void)std::initializer_list<int>{(ps.second ? (++ps.first, 0) : 0)...};
0184 }
0185 
0186 // general Nd treatment
0187 template <class Index, class S, class A, class T, class... Ts>
0188 void fill_n_nd(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
0189                const T* values, Ts&&... ts) {
0190   constexpr std::size_t buffer_size = 1ul << 14;
0191   Index indices[buffer_size];
0192 
0193   /*
0194     Parallelization options.
0195 
0196     A) Run the whole fill2 method in parallel, each thread fills its own buffer of
0197     indices, synchronization (atomics) are needed to synchronize the incrementing of
0198     the storage cells. This leads to a lot of congestion for small histograms.
0199 
0200     B) Run only fill_n_indices in parallel, subsections of the indices buffer
0201     can be filled by different threads. The final loop that fills the storage runs
0202     in the main thread, this requires no synchronization for the storage, cells do
0203     not need to support atomic operations.
0204 
0205     C) Like B), then sort the indices in the main thread and fill the
0206     storage in parallel, where each thread uses a disjunct set of indices. This
0207     should create less congestion and requires no synchronization for the storage.
0208 
0209     Note on C): Let's say we have an axis with 5 bins (with *flow to simplify).
0210     Then after filling 10 values, converting to indices and sorting, the index
0211     buffer may look like this: 0 0 0 1 2 2 2 4 4 5. Let's use two threads to fill
0212     the storage. Still in the main thread, we compute an iterator to the middle of
0213     the index buffer and move it to the right until the pointee changes. Now we have
0214     two ranges which contain disjunct sets of indices. We pass these ranges to the
0215     threads which then fill the storage. Since the threads by construction do not
0216     compete to increment the same cell, no further synchronization is required.
0217 
0218     In all cases, growing axes cannot be parallelized.
0219   */
0220 
0221   for (std::size_t start = 0; start < vsize; start += buffer_size) {
0222     const std::size_t n = (std::min)(buffer_size, vsize - start);
0223     // fill buffer of indices...
0224     fill_n_indices(indices, start, n, offset, storage, axes, values);
0225     // ...and fill corresponding storage cells
0226     for (auto&& idx : make_span(indices, n))
0227       fill_n_storage(storage, idx, std::forward<Ts>(ts)...);
0228   }
0229 }
0230 
0231 template <class S, class... As, class T, class... Us>
0232 void fill_n_1(const std::size_t offset, S& storage, std::tuple<As...>& axes,
0233               const std::size_t vsize, const T* values, Us&&... us) {
0234   using index_type =
0235       mp11::mp_if<has_non_inclusive_axis<std::tuple<As...>>, optional_index, std::size_t>;
0236   fill_n_nd<index_type>(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
0237 }
0238 
0239 template <class S, class A, class T, class... Us>
0240 void fill_n_1(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
0241               const T* values, Us&&... us) {
0242   bool all_inclusive = true;
0243   for_each_axis(axes,
0244                 [&](const auto& ax) { all_inclusive &= axis::traits::inclusive(ax); });
0245   if (axes_rank(axes) == 1) {
0246     // Optimization: benchmark shows that this makes filling dynamic 1D histogram faster
0247     axis::visit(
0248         [&](auto& ax) {
0249           std::tuple<decltype(ax)> axes{ax};
0250           fill_n_1(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
0251         },
0252         axes[0]);
0253   } else {
0254     if (all_inclusive)
0255       fill_n_nd<std::size_t>(offset, storage, axes, vsize, values,
0256                              std::forward<Us>(us)...);
0257     else
0258       fill_n_nd<optional_index>(offset, storage, axes, vsize, values,
0259                                 std::forward<Us>(us)...);
0260   }
0261 }
0262 
0263 template <class A, class T, std::size_t N>
0264 std::size_t get_total_size(const A& axes, const span<const T, N>& values) {
0265   // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
0266   // - span<CT, N>: for any histogram, N == rank
0267   // - span<V<T, CT>, N>: for any histogram, N == rank
0268   assert(axes_rank(axes) == values.size());
0269   constexpr auto unset = static_cast<std::size_t>(-1);
0270   std::size_t size = unset;
0271   for_each_axis(axes, [&size, vit = values.begin()](const auto& ax) mutable {
0272     using AV = axis::traits::value_type<std::decay_t<decltype(ax)>>;
0273     maybe_visit(
0274         [&size](const auto& v) {
0275           // v is either convertible to value or a sequence of values
0276           using V = std::remove_const_t<std::remove_reference_t<decltype(v)>>;
0277           static_if_c<(std::is_convertible<decltype(v), AV>::value ||
0278                        !is_iterable<V>::value)>(
0279               [](const auto&) {},
0280               [&size](const auto& v) {
0281                 const auto n = dtl::size(v);
0282                 // must repeat this here for msvc :(
0283                 constexpr auto unset = static_cast<std::size_t>(-1);
0284                 if (size == unset)
0285                   size = dtl::size(v);
0286                 else if (size != n)
0287                   BOOST_THROW_EXCEPTION(
0288                       std::invalid_argument("spans must have compatible lengths"));
0289               },
0290               v);
0291         },
0292         *vit++);
0293   });
0294   // if all arguments are not iterables, return size of 1
0295   return size == unset ? 1 : size;
0296 }
0297 
0298 inline void fill_n_check_extra_args(std::size_t) noexcept {}
0299 
0300 template <class T, class... Ts>
0301 void fill_n_check_extra_args(std::size_t size, T&& x, Ts&&... ts) {
0302   // sequences must have same lengths, but sequences of length 0 are broadcast
0303   if (x.second != 0 && x.second != size)
0304     BOOST_THROW_EXCEPTION(std::invalid_argument("spans must have compatible lengths"));
0305   fill_n_check_extra_args(size, std::forward<Ts>(ts)...);
0306 }
0307 
0308 template <class T, class... Ts>
0309 void fill_n_check_extra_args(std::size_t size, weight_type<T>&& w, Ts&&... ts) {
0310   fill_n_check_extra_args(size, w.value, std::forward<Ts>(ts)...);
0311 }
0312 
0313 template <class S, class A, class T, std::size_t N, class... Us>
0314 void fill_n(std::true_type, const std::size_t offset, S& storage, A& axes,
0315             const span<const T, N> values, Us&&... us) {
0316   // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
0317   // - span<T, N>: only valid for 1D histogram, N > 1 allowed
0318   // - span<CT, N>: for any histogram, N == rank
0319   // - span<V<T, CT>, N>: for any histogram, N == rank
0320   static_if<is_convertible_to_any_value_type<A, T>>(
0321       [&](const auto& values, auto&&... us) {
0322         // T matches one of the axis value types, must be 1D special case
0323         if (axes_rank(axes) != 1)
0324           BOOST_THROW_EXCEPTION(
0325               std::invalid_argument("number of arguments must match histogram rank"));
0326         fill_n_check_extra_args(values.size(), std::forward<Us>(us)...);
0327         fill_n_1(offset, storage, axes, values.size(), &values, std::forward<Us>(us)...);
0328       },
0329       [&](const auto& values, auto&&... us) {
0330         // generic ND case
0331         if (axes_rank(axes) != values.size())
0332           BOOST_THROW_EXCEPTION(
0333               std::invalid_argument("number of arguments must match histogram rank"));
0334         const auto vsize = get_total_size(axes, values);
0335         fill_n_check_extra_args(vsize, std::forward<Us>(us)...);
0336         fill_n_1(offset, storage, axes, vsize, values.data(), std::forward<Us>(us)...);
0337       },
0338       values, std::forward<Us>(us)...);
0339 }
0340 
0341 // empty implementation for bad arguments to stop compiler from showing internals
0342 template <class... Ts>
0343 void fill_n(std::false_type, Ts...) {}
0344 
0345 } // namespace detail
0346 } // namespace histogram
0347 } // namespace boost
0348 
0349 #endif // BOOST_HISTOGRAM_DETAIL_FILL_N_HPP