File indexing completed on 2025-08-28 09:11:32
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_FMA4_HPP
0013 #define XSIMD_FMA4_HPP
0014
0015 #include "../types/xsimd_fma4_register.hpp"
0016
0017 namespace xsimd
0018 {
0019
0020 namespace kernel
0021 {
0022 using namespace types;
0023
0024
0025 template <class A>
0026 XSIMD_INLINE batch<float, A> fnma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0027 {
0028 return _mm_nmacc_ps(x, y, z);
0029 }
0030
0031 template <class A>
0032 XSIMD_INLINE batch<double, A> fnma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0033 {
0034 return _mm_nmacc_pd(x, y, z);
0035 }
0036
0037
0038 template <class A>
0039 XSIMD_INLINE batch<float, A> fnms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0040 {
0041 return _mm_nmsub_ps(x, y, z);
0042 }
0043
0044 template <class A>
0045 XSIMD_INLINE batch<double, A> fnms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0046 {
0047 return _mm_nmsub_pd(x, y, z);
0048 }
0049
0050
0051 template <class A>
0052 XSIMD_INLINE batch<float, A> fma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0053 {
0054 return _mm_macc_ps(x, y, z);
0055 }
0056
0057 template <class A>
0058 XSIMD_INLINE batch<double, A> fma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0059 {
0060 return _mm_macc_pd(x, y, z);
0061 }
0062
0063
0064 template <class A>
0065 XSIMD_INLINE batch<float, A> fms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0066 {
0067 return _mm_msub_ps(x, y, z);
0068 }
0069
0070 template <class A>
0071 XSIMD_INLINE batch<double, A> fms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0072 {
0073 return _mm_msub_pd(x, y, z);
0074 }
0075 }
0076
0077 }
0078
0079 #endif