arch/generic/xsimd_generic_logical.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_GENERIC_LOGICAL_HPP
0013 #define XSIMD_GENERIC_LOGICAL_HPP
0014
0015 #include "./xsimd_generic_details.hpp"
0016
0017 #include <climits>
0018
0019 namespace xsimd
0020 {
0021
0022     namespace kernel
0023     {
0024
0025         using namespace types;
0026
0027         // count
0028         template <class A, class T>
0029         XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
0030         {
0031             uint64_t m = self.mask();
0032             XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size < 14)
0033             {
0034                 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
0035                 return (m * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
0036             }
0037             else
0038             {
0039 #if defined __has_builtin
0040 #if __has_builtin(__builtin_popcountg)
0041 #define builtin_popcount(v) __builtin_popcountg(v)
0042 #endif
0043 #endif
0044
0045 #ifdef builtin_popcount
0046                 return builtin_popcount(m);
0047 #else
0048                 // FIXME: we could do better by dispatching to the appropriate
0049                 // popcount instruction depending on the arch...
0050                 XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size <= 32)
0051                 {
0052                     uint32_t m32 = m;
0053                     // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
0054                     m32 = m32 - ((m32 >> 1) & 0x55555555); // reuse input as temporary
0055                     m32 = (m32 & 0x33333333) + ((m32 >> 2) & 0x33333333); // temp
0056                     return (((m32 + (m32 >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; // count
0057                 }
0058                 else
0059                 {
0060                     // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
0061                     m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp
0062                     m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp
0063                     m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
0064                     return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
0065                 }
0066 #endif
0067             }
0068         }
0069
0070         // from  mask
0071         template <class A, class T>
0072         XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<generic>) noexcept
0073         {
0074             alignas(A::alignment()) bool buffer[batch_bool<T, A>::size];
0075             // This is inefficient but should never be called. It's just a
0076             // temporary implementation until arm support is added.
0077             for (size_t i = 0; i < batch_bool<T, A>::size; ++i)
0078                 buffer[i] = mask & (1ull << i);
0079             return batch_bool<T, A>::load_aligned(buffer);
0080         }
0081
0082         // ge
0083         template <class A, class T>
0084         XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0085         {
0086             return other <= self;
0087         }
0088
0089         // gt
0090         template <class A, class T>
0091         XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0092         {
0093             return other < self;
0094         }
0095
0096         // is_even
0097         template <class A, class T>
0098         XSIMD_INLINE batch_bool<T, A> is_even(batch<T, A> const& self, requires_arch<generic>) noexcept
0099         {
0100             return is_flint(self * T(0.5));
0101         }
0102
0103         // is_flint
0104         template <class A, class T>
0105         XSIMD_INLINE batch_bool<T, A> is_flint(batch<T, A> const& self, requires_arch<generic>) noexcept
0106         {
0107             auto frac = select(isnan(self - self), constants::nan<batch<T, A>>(), self - trunc(self));
0108             return frac == T(0.);
0109         }
0110
0111         // is_odd
0112         template <class A, class T>
0113         XSIMD_INLINE batch_bool<T, A> is_odd(batch<T, A> const& self, requires_arch<generic>) noexcept
0114         {
0115             return is_even(self - T(1.));
0116         }
0117
0118         // isinf
0119         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0120         XSIMD_INLINE batch_bool<T, A> isinf(batch<T, A> const&, requires_arch<generic>) noexcept
0121         {
0122             return batch_bool<T, A>(false);
0123         }
0124         template <class A>
0125         XSIMD_INLINE batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<generic>) noexcept
0126         {
0127             return abs(self) == std::numeric_limits<float>::infinity();
0128         }
0129         template <class A>
0130         XSIMD_INLINE batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<generic>) noexcept
0131         {
0132             return abs(self) == std::numeric_limits<double>::infinity();
0133         }
0134
0135         // isfinite
0136         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0137         XSIMD_INLINE batch_bool<T, A> isfinite(batch<T, A> const&, requires_arch<generic>) noexcept
0138         {
0139             return batch_bool<T, A>(true);
0140         }
0141         template <class A>
0142         XSIMD_INLINE batch_bool<float, A> isfinite(batch<float, A> const& self, requires_arch<generic>) noexcept
0143         {
0144             return (self - self) == 0.f;
0145         }
0146         template <class A>
0147         XSIMD_INLINE batch_bool<double, A> isfinite(batch<double, A> const& self, requires_arch<generic>) noexcept
0148         {
0149             return (self - self) == 0.;
0150         }
0151
0152         // isnan
0153         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0154         XSIMD_INLINE batch_bool<T, A> isnan(batch<T, A> const&, requires_arch<generic>) noexcept
0155         {
0156             return batch_bool<T, A>(false);
0157         }
0158
0159         // le
0160         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0161         XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0162         {
0163             return (self < other) || (self == other);
0164         }
0165
0166         // neq
0167         template <class A, class T>
0168         XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0169         {
0170             return !(other == self);
0171         }
0172
0173         // logical_and
0174         template <class A, class T>
0175         XSIMD_INLINE batch<T, A> logical_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0176         {
0177             return detail::apply([](T x, T y) noexcept
0178                                  { return x && y; },
0179                                  self, other);
0180         }
0181
0182         // logical_or
0183         template <class A, class T>
0184         XSIMD_INLINE batch<T, A> logical_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0185         {
0186             return detail::apply([](T x, T y) noexcept
0187                                  { return x || y; },
0188                                  self, other);
0189         }
0190
0191         // mask
0192         template <class A, class T>
0193         XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
0194         {
0195             alignas(A::alignment()) bool buffer[batch_bool<T, A>::size];
0196             self.store_aligned(buffer);
0197             // This is inefficient but should never be called. It's just a
0198             // temporary implementation until arm support is added.
0199             uint64_t res = 0;
0200             for (size_t i = 0; i < batch_bool<T, A>::size; ++i)
0201                 if (buffer[i])
0202                     res |= 1ul << i;
0203             return res;
0204         }
0205     }
0206 }
0207
0208 #endif