Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 09:11:32

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011 
0012 #ifndef XSIMD_FMA3_SSE_HPP
0013 #define XSIMD_FMA3_SSE_HPP
0014 
0015 #include "../types/xsimd_fma3_sse_register.hpp"
0016 
0017 namespace xsimd
0018 {
0019 
0020     namespace kernel
0021     {
0022         using namespace types;
0023         // fnma
0024         template <class A>
0025         XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0026         {
0027             return _mm_fnmadd_ps(x, y, z);
0028         }
0029 
0030         template <class A>
0031         XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0032         {
0033             return _mm_fnmadd_pd(x, y, z);
0034         }
0035 
0036         // fnms
0037         template <class A>
0038         XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0039         {
0040             return _mm_fnmsub_ps(x, y, z);
0041         }
0042 
0043         template <class A>
0044         XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0045         {
0046             return _mm_fnmsub_pd(x, y, z);
0047         }
0048 
0049         // fma
0050         template <class A>
0051         XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0052         {
0053             return _mm_fmadd_ps(x, y, z);
0054         }
0055 
0056         template <class A>
0057         XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0058         {
0059             return _mm_fmadd_pd(x, y, z);
0060         }
0061 
0062         // fms
0063         template <class A>
0064         XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0065         {
0066             return _mm_fmsub_ps(x, y, z);
0067         }
0068 
0069         template <class A>
0070         XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma3<sse4_2>>) noexcept
0071         {
0072             return _mm_fmsub_pd(x, y, z);
0073         }
0074 
0075     }
0076 
0077 }
0078 
0079 #endif