File indexing completed on 2025-08-28 09:11:32
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_FMA3_AVX_HPP
0013 #define XSIMD_FMA3_AVX_HPP
0014
0015 #include "../types/xsimd_fma3_avx_register.hpp"
0016
0017 namespace xsimd
0018 {
0019
0020 namespace kernel
0021 {
0022 using namespace types;
0023
0024
0025 template <class A>
0026 XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0027 {
0028 return _mm256_fnmadd_ps(x, y, z);
0029 }
0030
0031 template <class A>
0032 XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0033 {
0034 return _mm256_fnmadd_pd(x, y, z);
0035 }
0036
0037
0038 template <class A>
0039 XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0040 {
0041 return _mm256_fnmsub_ps(x, y, z);
0042 }
0043
0044 template <class A>
0045 XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0046 {
0047 return _mm256_fnmsub_pd(x, y, z);
0048 }
0049
0050
0051 template <class A>
0052 XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0053 {
0054 return _mm256_fmadd_ps(x, y, z);
0055 }
0056
0057 template <class A>
0058 XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0059 {
0060 return _mm256_fmadd_pd(x, y, z);
0061 }
0062
0063
0064 template <class A>
0065 XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0066 {
0067 return _mm256_fmsub_ps(x, y, z);
0068 }
0069
0070 template <class A>
0071 XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0072 {
0073 return _mm256_fmsub_pd(x, y, z);
0074 }
0075
0076 }
0077
0078 }
0079
0080 #endif