File indexing completed on 2025-01-31 10:25:31
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_AVX_CONST_H_
0029 #define VC_AVX_CONST_H_
0030
0031 #include <cstddef>
0032 #include "types.h"
0033 #include "const_data.h"
0034 #include "macros.h"
0035
0036 namespace Vc_VERSIONED_NAMESPACE
0037 {
0038 namespace AVX
0039 {
0040 template<typename T> struct IndexesFromZeroData;
0041 template<> struct IndexesFromZeroData<int> {
0042 static Vc_ALWAYS_INLINE Vc_CONST const int *address() { return reinterpret_cast<const int *>(&_IndexesFromZero32[0]); }
0043 };
0044 template<> struct IndexesFromZeroData<unsigned int> {
0045 static Vc_ALWAYS_INLINE Vc_CONST const unsigned int *address() { return &_IndexesFromZero32[0]; }
0046 };
0047 template<> struct IndexesFromZeroData<short> {
0048 static Vc_ALWAYS_INLINE Vc_CONST const short *address() { return reinterpret_cast<const short *>(&_IndexesFromZero16[0]); }
0049 };
0050 template<> struct IndexesFromZeroData<unsigned short> {
0051 static Vc_ALWAYS_INLINE Vc_CONST const unsigned short *address() { return &_IndexesFromZero16[0]; }
0052 };
0053 template<> struct IndexesFromZeroData<signed char> {
0054 static Vc_ALWAYS_INLINE Vc_CONST const signed char *address() { return reinterpret_cast<const signed char *>(&_IndexesFromZero8[0]); }
0055 };
0056 template<> struct IndexesFromZeroData<char> {
0057 static Vc_ALWAYS_INLINE Vc_CONST const char *address() { return reinterpret_cast<const char *>(&_IndexesFromZero8[0]); }
0058 };
0059 template<> struct IndexesFromZeroData<unsigned char> {
0060 static Vc_ALWAYS_INLINE Vc_CONST const unsigned char *address() { return &_IndexesFromZero8[0]; }
0061 };
0062
0063 template<typename _T> struct Const
0064 {
0065 typedef Vector<_T> V;
0066 typedef typename V::EntryType T;
0067 typedef typename V::Mask M;
0068
0069 static Vc_ALWAYS_INLINE Vc_CONST V _pi_4() { return V(c_trig<T>::data[0]); }
0070 static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_hi() { return V(c_trig<T>::data[1]); }
0071 static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem1() { return V(c_trig<T>::data[2]); }
0072 static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem2() { return V(c_trig<T>::data[3]); }
0073 static Vc_ALWAYS_INLINE Vc_CONST V _1_16() { return V(c_trig<T>::data[4]); }
0074 static Vc_ALWAYS_INLINE Vc_CONST V _16() { return V(c_trig<T>::data[5]); }
0075
0076 static Vc_ALWAYS_INLINE Vc_CONST V atanP(int i) { return V(c_trig<T>::data[(12 + i)]); }
0077 static Vc_ALWAYS_INLINE Vc_CONST V atanQ(int i) { return V(c_trig<T>::data[(17 + i)]); }
0078 static Vc_ALWAYS_INLINE Vc_CONST V atanThrsHi() { return V(c_trig<T>::data[22]); }
0079 static Vc_ALWAYS_INLINE Vc_CONST V atanThrsLo() { return V(c_trig<T>::data[23]); }
0080 static Vc_ALWAYS_INLINE Vc_CONST V _pi_2_rem() { return V(c_trig<T>::data[24]); }
0081 static Vc_ALWAYS_INLINE Vc_CONST V lossThreshold() { return V(c_trig<T>::data[8]); }
0082 static Vc_ALWAYS_INLINE Vc_CONST V _4_pi() { return V(c_trig<T>::data[9]); }
0083 static Vc_ALWAYS_INLINE Vc_CONST V _pi_2() { return V(c_trig<T>::data[10]); }
0084 static Vc_ALWAYS_INLINE Vc_CONST V _pi() { return V(c_trig<T>::data[11]); }
0085 static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff0(int i) { return V(c_trig<T>::data[(28 + i)]); }
0086 static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff1(int i) { return V(c_trig<T>::data[(33 + i)]); }
0087 static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff2(int i) { return V(c_trig<T>::data[(37 + i)]); }
0088 static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff3(int i) { return V(c_trig<T>::data[(43 + i)]); }
0089 static Vc_ALWAYS_INLINE Vc_CONST V smallAsinInput() { return V(c_trig<T>::data[25]); }
0090 static Vc_ALWAYS_INLINE Vc_CONST V largeAsinInput() { return V(c_trig<T>::data[26]); }
0091
0092 static Vc_ALWAYS_INLINE Vc_CONST M exponentMask() { return M(V(c_log<T>::d(1)).data()); }
0093 static Vc_ALWAYS_INLINE Vc_CONST V _1_2() { return V(c_log<T>::d(18)); }
0094 static Vc_ALWAYS_INLINE Vc_CONST V _1_sqrt2() { return V(c_log<T>::d(15)); }
0095 static Vc_ALWAYS_INLINE Vc_CONST V P(int i) { return V(c_log<T>::d(2 + i)); }
0096 static Vc_ALWAYS_INLINE Vc_CONST V Q(int i) { return V(c_log<T>::d(8 + i)); }
0097 static Vc_ALWAYS_INLINE Vc_CONST V min() { return V(c_log<T>::d(14)); }
0098 static Vc_ALWAYS_INLINE Vc_CONST V ln2_small() { return V(c_log<T>::d(17)); }
0099 static Vc_ALWAYS_INLINE Vc_CONST V ln2_large() { return V(c_log<T>::d(16)); }
0100 static Vc_ALWAYS_INLINE Vc_CONST V neginf() { return V(c_log<T>::d(13)); }
0101 static Vc_ALWAYS_INLINE Vc_CONST V log10_e() { return V(c_log<T>::d(19)); }
0102 static Vc_ALWAYS_INLINE Vc_CONST V log2_e() { return V(c_log<T>::d(20)); }
0103
0104 static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask() Vc_ALWAYS_INLINE_R Vc_CONST_R;
0105 static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask(int bits) Vc_ALWAYS_INLINE_R Vc_CONST_R;
0106 };
0107
0108 template <> Vc_ALWAYS_INLINE Vc_CONST Vector<float> Const<float>::highMask()
0109 {
0110 return _mm256_broadcast_ss(
0111 reinterpret_cast<const float *>(&c_general::highMaskFloat));
0112 }
0113 template <> Vc_ALWAYS_INLINE Vc_CONST Vector<double> Const<double>::highMask()
0114 {
0115 return _mm256_broadcast_sd(
0116 reinterpret_cast<const double *>(&c_general::highMaskDouble));
0117 }
0118 template <> Vc_ALWAYS_INLINE Vc_CONST Vector<float> Const<float>::highMask(int bits)
0119 {
0120 #ifdef Vc_IMPL_AVX2
0121 #if defined Vc_ICC || defined Vc_MSVC
0122 __m256i allone = _mm256_set1_epi64x(~0);
0123 #else
0124 auto allone = ~__m256i();
0125 #endif
0126 return _mm256_castsi256_ps(_mm256_slli_epi32(allone, bits));
0127 #else
0128 __m128 tmp = _mm_castsi128_ps(_mm_slli_epi32(_mm_setallone_si128(), bits));
0129 return concat(tmp, tmp);
0130 #endif
0131 }
0132 template <> Vc_ALWAYS_INLINE Vc_CONST Vector<double> Const<double>::highMask(int bits)
0133 {
0134 #ifdef Vc_IMPL_AVX2
0135 #if defined Vc_ICC || defined Vc_MSVC
0136 __m256i allone = _mm256_set1_epi64x(~0);
0137 #else
0138 auto allone = ~__m256i();
0139 #endif
0140 return _mm256_castsi256_pd(_mm256_slli_epi64(allone, bits));
0141 #else
0142 __m128d tmp = _mm_castsi128_pd(_mm_slli_epi64(_mm_setallone_si128(), bits));
0143 return concat(tmp, tmp);
0144 #endif
0145 }
0146 }
0147
0148 namespace AVX2
0149 {
0150 using AVX::IndexesFromZeroData;
0151 using AVX::Const;
0152 }
0153 }
0154
0155 #endif