File indexing completed on 2025-01-30 10:25:58
0001 #ifndef VECCORE_BACKEND_IMPLEMENTATION_H
0002 #define VECCORE_BACKEND_IMPLEMENTATION_H
0003
0004 #include "Interface.h"
0005 #include "../Limits.h"
0006
0007 #include <algorithm>
0008 #include <type_traits>
0009
0010 namespace vecCore {
0011
0012 template <typename T>
0013 VECCORE_FORCE_INLINE
0014 VECCORE_ATT_HOST_DEVICE
0015 constexpr size_t VectorSize()
0016 {
0017 using V = typename std::decay<T>::type;
0018 return TypeTraits<V>::Size;
0019 }
0020
0021 template <typename T>
0022 VECCORE_FORCE_INLINE
0023 VECCORE_ATT_HOST_DEVICE
0024 constexpr size_t VectorSize(const T &)
0025 {
0026 return VectorSize<T>();
0027 }
0028
0029
0030
0031
0032 template <typename T>
0033 struct IteratorImplementation {
0034 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> *Begin(T &v)
0035 {
0036 Scalar<T> *addr = (Scalar<T> *)(&v);
0037 return addr;
0038 }
0039
0040 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> *End(T &v)
0041 {
0042 Scalar<T> *addr = (Scalar<T> *)(&v);
0043 return addr + sizeof(v);
0044 }
0045
0046 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> const *Begin(const T &v)
0047 {
0048 Scalar<T> const *addr = (Scalar<T> *)(&v);
0049 return addr;
0050 }
0051
0052 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> const *End(const T &v)
0053 {
0054 Scalar<T> const *addr = (Scalar<T> *)(&v);
0055 return addr + sizeof(v);
0056 }
0057 };
0058
0059 template <typename T>
0060 VECCORE_FORCE_INLINE
0061 VECCORE_ATT_HOST_DEVICE
0062 Scalar<T> *Begin(T &v)
0063 {
0064 return IteratorImplementation<T>::Begin(v);
0065 }
0066
0067 template <typename T>
0068 VECCORE_FORCE_INLINE
0069 VECCORE_ATT_HOST_DEVICE
0070 Scalar<T> *End(T &v)
0071 {
0072 return IteratorImplementation<T>::End(v);
0073 }
0074
0075 template <typename T>
0076 VECCORE_FORCE_INLINE
0077 VECCORE_ATT_HOST_DEVICE
0078 Scalar<T> const *Begin(T const &v)
0079 {
0080 return IteratorImplementation<T>::Begin(v);
0081 }
0082
0083 template <typename T>
0084 VECCORE_FORCE_INLINE
0085 VECCORE_ATT_HOST_DEVICE
0086 Scalar<T> const *End(T const &v)
0087 {
0088 return IteratorImplementation<T>::End(v);
0089 }
0090
0091
0092
0093 template <typename T>
0094 struct IndexingImplementation {
0095 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> Get(const T &v, size_t i) { return *(Begin(v) + i); }
0096
0097 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static void Set(T &v, size_t i, Scalar<T> const val)
0098 {
0099 *(Begin(v) + i) = val;
0100 }
0101 };
0102
0103 template <typename T>
0104 VECCORE_FORCE_INLINE
0105 VECCORE_ATT_HOST_DEVICE
0106 Scalar<T> Get(const T &v, size_t i)
0107 {
0108 return IndexingImplementation<T>::Get(v, i);
0109 }
0110
0111 template <typename T>
0112 VECCORE_FORCE_INLINE
0113 VECCORE_ATT_HOST_DEVICE
0114 void Set(T &v, size_t i, Scalar<T> const val)
0115 {
0116 IndexingImplementation<T>::Set(v, i, val);
0117 }
0118
0119
0120
0121 template <typename T>
0122 struct LoadStoreImplementation {
0123 template <typename S = Scalar<T>>
0124 VECCORE_FORCE_INLINE
0125 VECCORE_ATT_HOST_DEVICE
0126 static void Load(T &v, S const *ptr)
0127 {
0128 for (size_t i = 0; i < VectorSize<T>(); ++i)
0129 Set(v, i, ptr[i]);
0130 }
0131
0132 template <typename S = Scalar<T>>
0133 VECCORE_FORCE_INLINE
0134 VECCORE_ATT_HOST_DEVICE
0135 static void Store(T const &v, S *ptr)
0136 {
0137 for (size_t i = 0; i < VectorSize<T>(); ++i)
0138 ptr[i] = static_cast<S>(Get(v, i));
0139 }
0140 };
0141
0142 template <typename T>
0143 VECCORE_FORCE_INLINE
0144 VECCORE_ATT_HOST_DEVICE
0145 void Load(T &v, Scalar<T> const *ptr)
0146 {
0147 LoadStoreImplementation<T>::template Load(v, ptr);
0148 }
0149
0150 template <typename T>
0151 VECCORE_FORCE_INLINE
0152 VECCORE_ATT_HOST_DEVICE
0153 T Load(Scalar<T> const *ptr)
0154 {
0155 T v;
0156 LoadStoreImplementation<T>::template Load(v, ptr);
0157 return v;
0158 }
0159
0160 template <typename T>
0161 VECCORE_FORCE_INLINE
0162 VECCORE_ATT_HOST_DEVICE
0163 void Store(T const &v, Scalar<T> *ptr)
0164 {
0165 LoadStoreImplementation<T>::template Store(v, ptr);
0166 }
0167
0168
0169
0170 template <typename T>
0171 struct GatherScatterImplementation {
0172 template <typename S = Scalar<T>>
0173 VECCORE_FORCE_INLINE
0174 VECCORE_ATT_HOST_DEVICE
0175 static void Gather(T &v, S const *ptr, Index<T> const &idx)
0176 {
0177 for (size_t i = 0; i < VectorSize<T>(); ++i)
0178 Set(v, i, ptr[Get(idx, i)]);
0179 }
0180
0181 template <typename S = Scalar<T>>
0182 VECCORE_FORCE_INLINE
0183 VECCORE_ATT_HOST_DEVICE
0184 static void Scatter(T const &v, S *ptr, Index<T> const &idx)
0185 {
0186 for (size_t i = 0; i < VectorSize<T>(); ++i)
0187 ptr[Get(idx, i)] = Get(v, i);
0188 }
0189 };
0190
0191 template <typename T, typename S>
0192 VECCORE_FORCE_INLINE
0193 VECCORE_ATT_HOST_DEVICE
0194 T Gather(S const *ptr, Index<T> const &idx)
0195 {
0196 T v;
0197 GatherScatterImplementation<T>::template Gather<S>(v, ptr, idx);
0198 return v;
0199 }
0200
0201 template <typename T, typename S>
0202 VECCORE_FORCE_INLINE
0203 VECCORE_ATT_HOST_DEVICE
0204 void Scatter(T const &v, S *ptr, Index<T> const &idx)
0205 {
0206 GatherScatterImplementation<T>::template Scatter<S>(v, ptr, idx);
0207 }
0208
0209
0210
0211 template <typename M>
0212 VECCORE_ATT_HOST_DEVICE
0213 bool MaskFull(const M &mask)
0214 {
0215 for (size_t i = 0; i < VectorSize<M>(); i++)
0216 if (!Get(mask, i)) return false;
0217 return true;
0218 }
0219
0220 template <typename M>
0221 VECCORE_ATT_HOST_DEVICE
0222 bool MaskEmpty(const M &mask)
0223 {
0224 for (size_t i = 0; i < VectorSize<M>(); i++)
0225 if (Get(mask, i)) return false;
0226 return true;
0227 }
0228
0229
0230
0231 template <typename T, bool>
0232 struct GenericMaskingImplementation {
0233 VECCORE_FORCE_INLINE
0234 VECCORE_ATT_HOST_DEVICE
0235 static void Assign(T &dst, Mask<T> const &mask, T const &src)
0236 {
0237 for (size_t i = 0; i < VectorSize<T>(); i++)
0238 if (Get(mask, i)) Set(dst, i, Get(src, i));
0239 }
0240
0241 VECCORE_FORCE_INLINE
0242 VECCORE_ATT_HOST_DEVICE
0243 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2)
0244 {
0245 for (size_t i = 0; i < VectorSize<T>(); i++)
0246 Set(dst, i, Get(mask, i) ? Get(src1, i) : Get(src2, i));
0247 }
0248 };
0249
0250 template <typename T>
0251 struct GenericMaskingImplementation<T, true> {
0252 VECCORE_FORCE_INLINE
0253 VECCORE_ATT_HOST_DEVICE
0254 static void Assign(T &dst, Mask<T> const &mask, T const &src)
0255 {
0256 if (mask) dst = src;
0257 }
0258
0259 VECCORE_FORCE_INLINE
0260 VECCORE_ATT_HOST_DEVICE
0261 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2) { dst = mask ? src1 : src2; }
0262 };
0263
0264 template <typename T>
0265 struct MaskingImplementation {
0266 VECCORE_FORCE_INLINE
0267 VECCORE_ATT_HOST_DEVICE
0268 static void Assign(T &dst, Mask<T> const &mask, T const &src)
0269 {
0270 GenericMaskingImplementation<T, std::is_scalar<T>::value>::Assign(dst, mask, src);
0271 }
0272
0273 VECCORE_FORCE_INLINE
0274 VECCORE_ATT_HOST_DEVICE
0275 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2)
0276 {
0277 GenericMaskingImplementation<T, std::is_scalar<T>::value>::Blend(dst, mask, src1, src2);
0278 }
0279 };
0280
0281 template <typename T>
0282 VECCORE_FORCE_INLINE
0283 VECCORE_ATT_HOST_DEVICE
0284 void MaskedAssign(T &dst, const Mask<T> &mask, const T &src)
0285 {
0286 MaskingImplementation<T>::Assign(dst, mask, src);
0287 }
0288
0289 template <typename T>
0290 VECCORE_FORCE_INLINE
0291 VECCORE_ATT_HOST_DEVICE
0292 T Blend(const Mask<T> &mask, const T &src1, const T &src2)
0293 {
0294 T v;
0295 MaskingImplementation<T>::Blend(v, mask, src1, src2);
0296 return v;
0297 }
0298
0299
0300
0301 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE constexpr bool EarlyReturnAllowed()
0302 {
0303 #ifdef VECCORE_CUDA_DEVICE_COMPILATION
0304 return false;
0305 #else
0306 return true;
0307 #endif
0308 }
0309
0310 template <typename T>
0311 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE
0312 constexpr bool EarlyReturnMaxLength(T &, size_t n)
0313 {
0314 return EarlyReturnAllowed() && VectorSize<T>() <= n;
0315 }
0316
0317
0318
0319 template <typename T>
0320 VECCORE_FORCE_INLINE
0321 VECCORE_ATT_HOST_DEVICE
0322 Scalar<T> ReduceAdd(const T& v)
0323 {
0324 Scalar<T> result(0);
0325 for (size_t i = 0; i < VectorSize<T>(); ++i)
0326 result += Get(v, i);
0327 return result;
0328 }
0329
0330 template <typename T>
0331 VECCORE_FORCE_INLINE
0332 VECCORE_ATT_HOST_DEVICE
0333 Scalar<T> ReduceMin(const T& v)
0334 {
0335 Scalar<T> result(NumericLimits<Scalar<T>>::Max());
0336 for (size_t i = 0; i < VectorSize<T>(); ++i)
0337 if (Get(v, i) < result)
0338 result = Get(v, i);
0339 return result;
0340 }
0341
0342 template <typename T>
0343 VECCORE_FORCE_INLINE
0344 VECCORE_ATT_HOST_DEVICE
0345 Scalar<T> ReduceMax(const T& v)
0346 {
0347 Scalar<T> result(NumericLimits<Scalar<T>>::Lowest());
0348 for (size_t i = 0; i < VectorSize<T>(); ++i)
0349 if (Get(v, i) > result)
0350 result = Get(v, i);
0351 return result;
0352 }
0353
0354 template<typename Vout, typename Vin>
0355 Vout Convert(const Vin& v)
0356 {
0357 Vout out;
0358 static_assert(VectorSize<Vin>() == VectorSize<Vout>(),
0359 "Cannot convert SIMD vectors of different sizes");
0360 for (size_t i = 0; i < VectorSize<Vin>(); ++i)
0361 Set(out, i, Get(v, i));
0362 return out;
0363 }
0364
0365 }
0366
0367 #endif