File indexing completed on 2025-08-28 09:11:27
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_GENERIC_DETAILS_HPP
0013 #define XSIMD_GENERIC_DETAILS_HPP
0014
0015 #include <complex>
0016
0017 #include "../../math/xsimd_rem_pio2.hpp"
0018 #include "../../types/xsimd_generic_arch.hpp"
0019 #include "../../types/xsimd_utils.hpp"
0020 #include "../xsimd_constants.hpp"
0021
0022 namespace xsimd
0023 {
0024
0025 template <class T, class A>
0026 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self) noexcept;
0027 template <class T, class A>
0028 XSIMD_INLINE batch<T, A> abs(batch<std::complex<T>, A> const& self) noexcept;
0029 template <class T, class A>
0030 XSIMD_INLINE bool any(batch_bool<T, A> const& self) noexcept;
0031 template <class T, class A>
0032 XSIMD_INLINE batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other) noexcept;
0033 template <class A, class T_out, class T_in>
0034 XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const&, batch<T_out, A> const& out) noexcept;
0035 template <class T, class A>
0036 XSIMD_INLINE batch<T, A> bitofsign(batch<T, A> const& self) noexcept;
0037 template <class T_out, class T_in, class A>
0038 XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& self) noexcept;
0039 template <class T, class A>
0040 XSIMD_INLINE batch<T, A> cos(batch<T, A> const& self) noexcept;
0041 template <class T, class A>
0042 XSIMD_INLINE batch<T, A> cosh(batch<T, A> const& self) noexcept;
0043 template <class T, class A>
0044 XSIMD_INLINE batch<T, A> exp(batch<T, A> const& self) noexcept;
0045 template <class T, class A>
0046 XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
0047 template <class T, class A>
0048 XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
0049 template <class T, class A>
0050 XSIMD_INLINE batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
0051 template <class T, class A, uint64_t... Coefs>
0052 XSIMD_INLINE batch<T, A> horner(const batch<T, A>& self) noexcept;
0053 template <class T, class A>
0054 XSIMD_INLINE batch<T, A> hypot(const batch<T, A>& self) noexcept;
0055 template <class T, class A>
0056 XSIMD_INLINE batch_bool<T, A> is_even(batch<T, A> const& self) noexcept;
0057 template <class T, class A>
0058 XSIMD_INLINE batch_bool<T, A> is_flint(batch<T, A> const& self) noexcept;
0059 template <class T, class A>
0060 XSIMD_INLINE batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept;
0061 template <class T, class A>
0062 XSIMD_INLINE typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& self) noexcept;
0063 template <class T, class A>
0064 XSIMD_INLINE typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& self) noexcept;
0065 template <class T, class A>
0066 XSIMD_INLINE typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept;
0067 template <class T, class A>
0068 XSIMD_INLINE batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
0069 template <class T, class A>
0070 XSIMD_INLINE batch<T, A> log(batch<T, A> const& self) noexcept;
0071 template <class T, class A>
0072 XSIMD_INLINE batch<T, A> nearbyint(batch<T, A> const& self) noexcept;
0073 template <class T, class A>
0074 XSIMD_INLINE batch<as_integer_t<T>, A> nearbyint_as_int(const batch<T, A>& x) noexcept;
0075 template <class T, class A>
0076 XSIMD_INLINE T reduce_add(batch<T, A> const&) noexcept;
0077 template <class T, class A>
0078 XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept;
0079 template <class T, class A>
0080 XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept;
0081 template <class T, class A>
0082 XSIMD_INLINE batch<T, A> sign(batch<T, A> const& self) noexcept;
0083 template <class T, class A>
0084 XSIMD_INLINE batch<T, A> signnz(batch<T, A> const& self) noexcept;
0085 template <class T, class A>
0086 XSIMD_INLINE batch<T, A> sin(batch<T, A> const& self) noexcept;
0087 template <class T, class A>
0088 XSIMD_INLINE batch<T, A> sinh(batch<T, A> const& self) noexcept;
0089 template <class T, class A>
0090 XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept;
0091 template <class T, class A>
0092 XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& self) noexcept;
0093 template <class T, class A>
0094 XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self) noexcept;
0095 template <class T, class A>
0096 XSIMD_INLINE batch<as_float_t<T>, A> to_float(batch<T, A> const& self) noexcept;
0097 template <class T, class A>
0098 XSIMD_INLINE batch<as_integer_t<T>, A> to_int(batch<T, A> const& self) noexcept;
0099 template <class T, class A>
0100 XSIMD_INLINE batch<T, A> trunc(batch<T, A> const& self) noexcept;
0101
0102 namespace kernel
0103 {
0104
0105 namespace detail
0106 {
0107 template <class F, class A, class T, class... Batches>
0108 XSIMD_INLINE batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) noexcept
0109 {
0110 constexpr std::size_t size = batch<T, A>::size;
0111 alignas(A::alignment()) T self_buffer[size];
0112 alignas(A::alignment()) T other_buffer[size];
0113 self.store_aligned(&self_buffer[0]);
0114 other.store_aligned(&other_buffer[0]);
0115 for (std::size_t i = 0; i < size; ++i)
0116 {
0117 self_buffer[i] = func(self_buffer[i], other_buffer[i]);
0118 }
0119 return batch<T, A>::load_aligned(self_buffer);
0120 }
0121
0122 template <class U, class F, class A, class T>
0123 XSIMD_INLINE batch<U, A> apply_transform(F&& func, batch<T, A> const& self) noexcept
0124 {
0125 static_assert(batch<T, A>::size == batch<U, A>::size,
0126 "Source and destination sizes must match");
0127 constexpr std::size_t src_size = batch<T, A>::size;
0128 constexpr std::size_t dest_size = batch<U, A>::size;
0129 alignas(A::alignment()) T self_buffer[src_size];
0130 alignas(A::alignment()) U other_buffer[dest_size];
0131 self.store_aligned(&self_buffer[0]);
0132 for (std::size_t i = 0; i < src_size; ++i)
0133 {
0134 other_buffer[i] = func(self_buffer[i]);
0135 }
0136 return batch<U, A>::load_aligned(other_buffer);
0137 }
0138 }
0139
0140
0141 namespace detail
0142 {
0143 template <class A>
0144 XSIMD_INLINE batch<uint8_t, A> fast_cast(batch<int8_t, A> const& self, batch<uint8_t, A> const&, requires_arch<generic>) noexcept
0145 {
0146 return bitwise_cast<uint8_t>(self);
0147 }
0148 template <class A>
0149 XSIMD_INLINE batch<uint16_t, A> fast_cast(batch<int16_t, A> const& self, batch<uint16_t, A> const&, requires_arch<generic>) noexcept
0150 {
0151 return bitwise_cast<uint16_t>(self);
0152 }
0153 template <class A>
0154 XSIMD_INLINE batch<uint32_t, A> fast_cast(batch<int32_t, A> const& self, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
0155 {
0156 return bitwise_cast<uint32_t>(self);
0157 }
0158 template <class A>
0159 XSIMD_INLINE batch<uint64_t, A> fast_cast(batch<int64_t, A> const& self, batch<uint64_t, A> const&, requires_arch<generic>) noexcept
0160 {
0161 return bitwise_cast<uint64_t>(self);
0162 }
0163 template <class A>
0164 XSIMD_INLINE batch<int8_t, A> fast_cast(batch<uint8_t, A> const& self, batch<int8_t, A> const&, requires_arch<generic>) noexcept
0165 {
0166 return bitwise_cast<int8_t>(self);
0167 }
0168 template <class A>
0169 XSIMD_INLINE batch<int16_t, A> fast_cast(batch<uint16_t, A> const& self, batch<int16_t, A> const&, requires_arch<generic>) noexcept
0170 {
0171 return bitwise_cast<int16_t>(self);
0172 }
0173 template <class A>
0174 XSIMD_INLINE batch<int32_t, A> fast_cast(batch<uint32_t, A> const& self, batch<int32_t, A> const&, requires_arch<generic>) noexcept
0175 {
0176 return bitwise_cast<int32_t>(self);
0177 }
0178 template <class A>
0179 XSIMD_INLINE batch<int64_t, A> fast_cast(batch<uint64_t, A> const& self, batch<int64_t, A> const&, requires_arch<generic>) noexcept
0180 {
0181 return bitwise_cast<int64_t>(self);
0182 }
0183
0184
0185
0186 template <class A, class _ = decltype(fast_cast(std::declval<batch<int32_t, A> const&>(), std::declval<batch<float, A> const&>(), A {}))>
0187 XSIMD_INLINE batch<float, A> fast_cast(batch<uint32_t, A> const& v, batch<float, A> const&, requires_arch<generic>) noexcept
0188 {
0189
0190 batch<uint32_t, A> msk_lo(0xFFFF);
0191 batch<float, A> cnst65536f(65536.0f);
0192
0193 auto v_lo = batch_cast<int32_t>(v & msk_lo);
0194 auto v_hi = batch_cast<int32_t>(v >> 16);
0195 auto v_lo_flt = batch_cast<float>(v_lo);
0196 auto v_hi_flt = batch_cast<float>(v_hi);
0197 v_hi_flt = cnst65536f * v_hi_flt;
0198 return v_hi_flt + v_lo_flt;
0199 }
0200
0201
0202
0203 template <class A, class _ = decltype(fast_cast(std::declval<batch<float, A> const&>(), std::declval<batch<int32_t, A> const&>(), A {}))>
0204 XSIMD_INLINE batch<uint32_t, A> fast_cast(batch<float, A> const& v, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
0205 {
0206 auto is_large = v >= batch<float, A>(1u << 31);
0207 auto small_v = bitwise_cast<float>(batch_cast<int32_t>(v));
0208 auto large_v = bitwise_cast<float>(
0209 batch_cast<int32_t>(v - batch<float, A>(1u << 31))
0210 ^ batch<int32_t, A>(1u << 31));
0211 return bitwise_cast<uint32_t>(select(is_large, large_v, small_v));
0212 }
0213 }
0214
0215 namespace detail
0216 {
0217
0218
0219
0220
0221 struct with_fast_conversion
0222 {
0223 };
0224 struct with_slow_conversion
0225 {
0226 };
0227
0228 template <class A, class From, class To, class = void>
0229 struct conversion_type_impl
0230 {
0231 using type = with_slow_conversion;
0232 };
0233
0234 using xsimd::detail::void_t;
0235
0236 template <class A, class From, class To>
0237 struct conversion_type_impl<A, From, To,
0238 void_t<decltype(fast_cast(std::declval<const batch<From, A>&>(),
0239 std::declval<const batch<To, A>&>(),
0240 std::declval<const A&>()))>>
0241 {
0242 using type = with_fast_conversion;
0243 };
0244
0245 template <class A, class From, class To>
0246 using conversion_type = typename conversion_type_impl<A, From, To>::type;
0247 }
0248
0249 namespace detail
0250 {
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260 template <class B, uint64_t c>
0261 XSIMD_INLINE B coef() noexcept
0262 {
0263 using value_type = typename B::value_type;
0264 return B(bit_cast<value_type>(as_unsigned_integer_t<value_type>(c)));
0265 }
0266 template <class B>
0267 XSIMD_INLINE B horner(const B&) noexcept
0268 {
0269 return B(typename B::value_type(0.));
0270 }
0271
0272 template <class B, uint64_t c0>
0273 XSIMD_INLINE B horner(const B&) noexcept
0274 {
0275 return coef<B, c0>();
0276 }
0277
0278 template <class B, uint64_t c0, uint64_t c1, uint64_t... args>
0279 XSIMD_INLINE B horner(const B& self) noexcept
0280 {
0281 return fma(self, horner<B, c1, args...>(self), coef<B, c0>());
0282 }
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293 template <class B>
0294 XSIMD_INLINE B horner1(const B&) noexcept
0295 {
0296 return B(1.);
0297 }
0298
0299 template <class B, uint64_t c0>
0300 XSIMD_INLINE B horner1(const B& x) noexcept
0301 {
0302 return x + detail::coef<B, c0>();
0303 }
0304
0305 template <class B, uint64_t c0, uint64_t c1, uint64_t... args>
0306 XSIMD_INLINE B horner1(const B& x) noexcept
0307 {
0308 return fma(x, horner1<B, c1, args...>(x), detail::coef<B, c0>());
0309 }
0310 }
0311
0312 }
0313
0314 }
0315
0316 #endif