File indexing completed on 2025-08-28 09:11:32
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_FMA3_SSE_HPP
0013 #define XSIMD_FMA3_SSE_HPP
0014
0015 #include "../types/xsimd_fma3_sse_register.hpp"
0016
0017 namespace xsimd
0018 {
0019
0020 namespace kernel
0021 {
0022 using namespace types;
0023
0024 template <class A>
0025 XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0026 {
0027 return _mm_fnmadd_ps(x, y, z);
0028 }
0029
0030 template <class A>
0031 XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0032 {
0033 return _mm_fnmadd_pd(x, y, z);
0034 }
0035
0036
0037 template <class A>
0038 XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0039 {
0040 return _mm_fnmsub_ps(x, y, z);
0041 }
0042
0043 template <class A>
0044 XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0045 {
0046 return _mm_fnmsub_pd(x, y, z);
0047 }
0048
0049
0050 template <class A>
0051 XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0052 {
0053 return _mm_fmadd_ps(x, y, z);
0054 }
0055
0056 template <class A>
0057 XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0058 {
0059 return _mm_fmadd_pd(x, y, z);
0060 }
0061
0062
0063 template <class A>
0064 XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0065 {
0066 return _mm_fmsub_ps(x, y, z);
0067 }
0068
0069 template <class A>
0070 XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0071 {
0072 return _mm_fmsub_pd(x, y, z);
0073 }
0074
0075 }
0076
0077 }
0078
0079 #endif