xsimd/arch/xsimd_fma3_avx.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_FMA3_AVX_HPP
0013 #define XSIMD_FMA3_AVX_HPP
0014
0015 #include "../types/xsimd_fma3_avx_register.hpp"
0016
0017 namespace xsimd
0018 {
0019
0020     namespace kernel
0021     {
0022         using namespace types;
0023
0024         // fnma
0025         template <class A>
0026         XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0027         {
0028             return _mm256_fnmadd_ps(x, y, z);
0029         }
0030
0031         template <class A>
0032         XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0033         {
0034             return _mm256_fnmadd_pd(x, y, z);
0035         }
0036
0037         // fnms
0038         template <class A>
0039         XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0040         {
0041             return _mm256_fnmsub_ps(x, y, z);
0042         }
0043
0044         template <class A>
0045         XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0046         {
0047             return _mm256_fnmsub_pd(x, y, z);
0048         }
0049
0050         // fma
0051         template <class A>
0052         XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0053         {
0054             return _mm256_fmadd_ps(x, y, z);
0055         }
0056
0057         template <class A>
0058         XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0059         {
0060             return _mm256_fmadd_pd(x, y, z);
0061         }
0062
0063         // fms
0064         template <class A>
0065         XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<avx>>) noexcept
0066         {
0067             return _mm256_fmsub_ps(x, y, z);
0068         }
0069
0070         template <class A>
0071         XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<avx>>) noexcept
0072         {
0073             return _mm256_fmsub_pd(x, y, z);
0074         }
0075
0076     }
0077
0078 }
0079
0080 #endif