xsimd/arch/xsimd_fma4.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_FMA4_HPP
0013 #define XSIMD_FMA4_HPP
0014
0015 #include "../types/xsimd_fma4_register.hpp"
0016
0017 namespace xsimd
0018 {
0019
0020     namespace kernel
0021     {
0022         using namespace types;
0023
0024         // fnma
0025         template <class A>
0026         XSIMD_INLINE batch<float, A> fnma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0027         {
0028             return _mm_nmacc_ps(x, y, z);
0029         }
0030
0031         template <class A>
0032         XSIMD_INLINE batch<double, A> fnma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0033         {
0034             return _mm_nmacc_pd(x, y, z);
0035         }
0036
0037         // fnms
0038         template <class A>
0039         XSIMD_INLINE batch<float, A> fnms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0040         {
0041             return _mm_nmsub_ps(x, y, z);
0042         }
0043
0044         template <class A>
0045         XSIMD_INLINE batch<double, A> fnms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0046         {
0047             return _mm_nmsub_pd(x, y, z);
0048         }
0049
0050         // fma
0051         template <class A>
0052         XSIMD_INLINE batch<float, A> fma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0053         {
0054             return _mm_macc_ps(x, y, z);
0055         }
0056
0057         template <class A>
0058         XSIMD_INLINE batch<double, A> fma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0059         {
0060             return _mm_macc_pd(x, y, z);
0061         }
0062
0063         // fms
0064         template <class A>
0065         XSIMD_INLINE batch<float, A> fms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
0066         {
0067             return _mm_msub_ps(x, y, z);
0068         }
0069
0070         template <class A>
0071         XSIMD_INLINE batch<double, A> fms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
0072         {
0073             return _mm_msub_pd(x, y, z);
0074         }
0075     }
0076
0077 }
0078
0079 #endif