File indexing completed on 2025-01-19 09:51:46
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021 #ifndef EIGEN_PACKET_MATH_SYCL_H
0022 #define EIGEN_PACKET_MATH_SYCL_H
0023 #include <type_traits>
0024 namespace Eigen {
0025
0026 namespace internal {
0027 #ifdef SYCL_DEVICE_ONLY
0028
0029 #define SYCL_PLOADT_RO(address_space_target) \
0030 template <typename packet_type, int Alignment> \
0031 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
0032 typename cl::sycl::multi_ptr< \
0033 const typename unpacket_traits<packet_type>::type, \
0034 cl::sycl::access::address_space::address_space_target>::pointer_t \
0035 from) { \
0036 typedef typename unpacket_traits<packet_type>::type scalar; \
0037 typedef cl::sycl::multi_ptr< \
0038 scalar, cl::sycl::access::address_space::address_space_target> \
0039 multi_ptr; \
0040 auto res = packet_type( \
0041 static_cast<typename unpacket_traits<packet_type>::type>(0)); \
0042 res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
0043 return res; \
0044 }
0045
0046 SYCL_PLOADT_RO(global_space)
0047 SYCL_PLOADT_RO(local_space)
0048 #undef SYCL_PLOADT_RO
0049 #endif
0050
0051 template <typename packet_type, int Alignment, typename T>
0052 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
0053 ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
0054 cl::sycl::access::mode::read_write, T>& from) {
0055 return ploadt_ro<packet_type, Alignment>(from.get_pointer());
0056 }
0057
0058 #ifdef SYCL_DEVICE_ONLY
0059 #define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
0060 template <typename packet_type> \
0061 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
0062 typename cl::sycl::multi_ptr< \
0063 const typename unpacket_traits<packet_type>::type, \
0064 cl::sycl::access::address_space::address_space_target>::pointer_t \
0065 from) { \
0066 return ploadt_ro<packet_type, Alignment>(from); \
0067 }
0068
0069
0070 SYCL_PLOAD(global_space, Unaligned, u)
0071 SYCL_PLOAD(global_space, Aligned, )
0072
0073 SYCL_PLOAD(local_space, Unaligned, u)
0074 SYCL_PLOAD(local_space, Aligned, )
0075
0076 #undef SYCL_PLOAD
0077 #endif
0078
0079 #define SYCL_PLOAD(Alignment, AlignedType) \
0080 template <typename packet_type> \
0081 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
0082 const Eigen::TensorSycl::internal::RangeAccess< \
0083 cl::sycl::access::mode::read_write, \
0084 typename unpacket_traits<packet_type>::type> \
0085 from) { \
0086 return ploadt_ro<packet_type, Alignment>(from); \
0087 }
0088 SYCL_PLOAD(Unaligned, u)
0089 SYCL_PLOAD(Aligned, )
0090 #undef SYCL_PLOAD
0091
0092 #ifdef SYCL_DEVICE_ONLY
0093
0094
0095 #define SYCL_PLOADT(address_space_target) \
0096 template <typename packet_type, int Alignment> \
0097 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
0098 typename cl::sycl::multi_ptr< \
0099 const typename unpacket_traits<packet_type>::type, \
0100 cl::sycl::access::address_space::address_space_target>::pointer_t \
0101 from) { \
0102 if (Alignment >= unpacket_traits<packet_type>::alignment) \
0103 return pload<packet_type>(from); \
0104 else \
0105 return ploadu<packet_type>(from); \
0106 }
0107
0108
0109 SYCL_PLOADT(global_space)
0110
0111 SYCL_PLOADT(local_space)
0112 #undef SYCL_PLOADT
0113 #endif
0114
0115 template <typename packet_type, int Alignment>
0116 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
0117 ploadt(const Eigen::TensorSycl::internal::RangeAccess<
0118 cl::sycl::access::mode::read_write,
0119 typename unpacket_traits<packet_type>::type>& from) {
0120 return ploadt<packet_type, Alignment>(from.get_pointer());
0121 }
0122 #ifdef SYCL_DEVICE_ONLY
0123
0124
0125 #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
0126 template <> \
0127 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
0128 ploadt_ro<packet_type, Alignment>( \
0129 const typename unpacket_traits<packet_type>::type* from) { \
0130 typedef typename unpacket_traits<packet_type>::type scalar; \
0131 auto res = packet_type(static_cast<scalar>(0)); \
0132 res.template load<cl::sycl::access::address_space::private_space>( \
0133 0, const_cast<scalar*>(from)); \
0134 return res; \
0135 }
0136
0137 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
0138 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
0139 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
0140 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
0141
0142 #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
0143 template <> \
0144 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
0145 const typename unpacket_traits<packet_type>::type* from) { \
0146 typedef typename unpacket_traits<packet_type>::type scalar; \
0147 auto res = packet_type(static_cast<scalar>(0)); \
0148 res.template load<cl::sycl::access::address_space::private_space>( \
0149 0, const_cast<scalar*>(from)); \
0150 return res; \
0151 }
0152 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
0153 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
0154 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
0155 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
0156
0157 #undef SYCL_PLOAD_SPECIAL
0158
0159 #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
0160 template <> \
0161 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
0162 typename cl::sycl::multi_ptr< \
0163 scalar, \
0164 cl::sycl::access::address_space::address_space_target>::pointer_t \
0165 to, \
0166 const packet_type& from) { \
0167 typedef cl::sycl::multi_ptr< \
0168 scalar, cl::sycl::access::address_space::address_space_target> \
0169 multi_ptr; \
0170 from.store(0, multi_ptr(to)); \
0171 }
0172
0173
0174 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
0175 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
0176 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
0177 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
0178 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
0179 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
0180 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
0181 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
0182
0183 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
0184 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
0185 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
0186 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
0187 #undef SYCL_PSTORE
0188
0189 #define SYCL_PSTORE_T(address_space_target) \
0190 template <typename scalar, typename packet_type, int Alignment> \
0191 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
0192 typename cl::sycl::multi_ptr< \
0193 scalar, \
0194 cl::sycl::access::address_space::address_space_target>::pointer_t \
0195 to, \
0196 const packet_type& from) { \
0197 if (Alignment) \
0198 pstore(to, from); \
0199 else \
0200 pstoreu(to, from); \
0201 }
0202
0203 SYCL_PSTORE_T(global_space)
0204
0205 SYCL_PSTORE_T(local_space)
0206
0207 #undef SYCL_PSTORE_T
0208
0209 #define SYCL_PSET1(packet_type) \
0210 template <> \
0211 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
0212 const typename unpacket_traits<packet_type>::type& from) { \
0213 return packet_type(from); \
0214 }
0215
0216
0217 SYCL_PSET1(cl::sycl::cl_float4)
0218 SYCL_PSET1(cl::sycl::cl_double2)
0219
0220 #undef SYCL_PSET1
0221
0222 template <typename packet_type>
0223 struct get_base_packet {
0224 template <typename sycl_multi_pointer>
0225 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
0226 get_ploaddup(sycl_multi_pointer) {}
0227
0228 template <typename sycl_multi_pointer>
0229 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
0230 get_pgather(sycl_multi_pointer, Index) {}
0231 };
0232
0233 template <>
0234 struct get_base_packet<cl::sycl::cl_float4> {
0235 template <typename sycl_multi_pointer>
0236 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
0237 sycl_multi_pointer from) {
0238 return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
0239 }
0240 template <typename sycl_multi_pointer>
0241 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
0242 sycl_multi_pointer from, Index stride) {
0243 return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
0244 from[2 * stride], from[3 * stride]);
0245 }
0246
0247 template <typename sycl_multi_pointer>
0248 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
0249 sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
0250 auto tmp = stride;
0251 to[0] = from.x();
0252 to[tmp] = from.y();
0253 to[tmp += stride] = from.z();
0254 to[tmp += stride] = from.w();
0255 }
0256 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
0257 const float& a) {
0258 return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
0259 static_cast<float>(a + 2),
0260 static_cast<float>(a + 3));
0261 }
0262 };
0263
0264 template <>
0265 struct get_base_packet<cl::sycl::cl_double2> {
0266 template <typename sycl_multi_pointer>
0267 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
0268 get_ploaddup(const sycl_multi_pointer from) {
0269 return cl::sycl::cl_double2(from[0], from[0]);
0270 }
0271
0272 template <typename sycl_multi_pointer, typename Index>
0273 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
0274 const sycl_multi_pointer from, Index stride) {
0275 return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
0276 }
0277
0278 template <typename sycl_multi_pointer>
0279 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
0280 sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
0281 to[0] = from.x();
0282 to[stride] = from.y();
0283 }
0284
0285 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
0286 const double& a) {
0287 return cl::sycl::cl_double2(static_cast<double>(a),
0288 static_cast<double>(a + 1));
0289 }
0290 };
0291
0292 #define SYCL_PLOAD_DUP(address_space_target) \
0293 template <typename packet_type> \
0294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
0295 typename cl::sycl::multi_ptr< \
0296 const typename unpacket_traits<packet_type>::type, \
0297 cl::sycl::access::address_space::address_space_target>::pointer_t \
0298 from) { \
0299 return get_base_packet<packet_type>::get_ploaddup(from); \
0300 }
0301
0302
0303 SYCL_PLOAD_DUP(global_space)
0304
0305 SYCL_PLOAD_DUP(local_space)
0306 #undef SYCL_PLOAD_DUP
0307
0308 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
0309 template <> \
0310 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
0311 const typename unpacket_traits<packet_type>::type* from) { \
0312 return get_base_packet<packet_type>::get_ploaddup(from); \
0313 }
0314
0315 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
0316 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
0317
0318 #undef SYCL_PLOAD_DUP_SPECILIZE
0319
0320 #define SYCL_PLSET(packet_type) \
0321 template <> \
0322 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
0323 const typename unpacket_traits<packet_type>::type& a) { \
0324 return get_base_packet<packet_type>::set_plset(a); \
0325 }
0326
0327 SYCL_PLSET(cl::sycl::cl_float4)
0328 SYCL_PLSET(cl::sycl::cl_double2)
0329
0330 #undef SYCL_PLSET
0331
0332 #define SYCL_PGATHER(address_space_target) \
0333 template <typename Scalar, typename packet_type> \
0334 EIGEN_DEVICE_FUNC inline packet_type pgather( \
0335 typename cl::sycl::multi_ptr< \
0336 const typename unpacket_traits<packet_type>::type, \
0337 cl::sycl::access::address_space::address_space_target>::pointer_t \
0338 from, \
0339 Index stride) { \
0340 return get_base_packet<packet_type>::get_pgather(from, stride); \
0341 }
0342
0343
0344 SYCL_PGATHER(global_space)
0345
0346 SYCL_PGATHER(local_space)
0347
0348 #undef SYCL_PGATHER
0349
0350 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
0351 template <> \
0352 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
0353 pgather<scalar, packet_type>( \
0354 const typename unpacket_traits<packet_type>::type* from, Index stride) { \
0355 return get_base_packet<packet_type>::get_pgather(from, stride); \
0356 }
0357
0358 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
0359 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
0360
0361 #undef SYCL_PGATHER_SPECILIZE
0362
0363 #define SYCL_PSCATTER(address_space_target) \
0364 template <typename Scalar, typename packet_type> \
0365 EIGEN_DEVICE_FUNC inline void pscatter( \
0366 typename cl::sycl::multi_ptr< \
0367 typename unpacket_traits<packet_type>::type, \
0368 cl::sycl::access::address_space::address_space_target>::pointer_t \
0369 to, \
0370 const packet_type& from, Index stride) { \
0371 get_base_packet<packet_type>::set_pscatter(to, from, stride); \
0372 }
0373
0374
0375 SYCL_PSCATTER(global_space)
0376
0377 SYCL_PSCATTER(local_space)
0378
0379 #undef SYCL_PSCATTER
0380
0381 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
0382 template <> \
0383 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
0384 typename unpacket_traits<packet_type>::type * to, \
0385 const packet_type& from, Index stride) { \
0386 get_base_packet<packet_type>::set_pscatter(to, from, stride); \
0387 }
0388
0389 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
0390 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
0391
0392 #undef SYCL_PSCATTER_SPECILIZE
0393
0394 #define SYCL_PMAD(packet_type) \
0395 template <> \
0396 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
0397 const packet_type& a, const packet_type& b, const packet_type& c) { \
0398 return cl::sycl::mad(a, b, c); \
0399 }
0400
0401 SYCL_PMAD(cl::sycl::cl_float4)
0402 SYCL_PMAD(cl::sycl::cl_double2)
0403 #undef SYCL_PMAD
0404
0405 template <>
0406 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
0407 const cl::sycl::cl_float4& a) {
0408 return a.x();
0409 }
0410 template <>
0411 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
0412 const cl::sycl::cl_double2& a) {
0413 return a.x();
0414 }
0415
0416 template <>
0417 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
0418 const cl::sycl::cl_float4& a) {
0419 return a.x() + a.y() + a.z() + a.w();
0420 }
0421
0422 template <>
0423 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
0424 const cl::sycl::cl_double2& a) {
0425 return a.x() + a.y();
0426 }
0427
0428 template <>
0429 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
0430 const cl::sycl::cl_float4& a) {
0431 return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
0432 cl::sycl::fmax(a.z(), a.w()));
0433 }
0434 template <>
0435 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
0436 const cl::sycl::cl_double2& a) {
0437 return cl::sycl::fmax(a.x(), a.y());
0438 }
0439
0440 template <>
0441 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
0442 const cl::sycl::cl_float4& a) {
0443 return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
0444 cl::sycl::fmin(a.z(), a.w()));
0445 }
0446 template <>
0447 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
0448 const cl::sycl::cl_double2& a) {
0449 return cl::sycl::fmin(a.x(), a.y());
0450 }
0451
0452 template <>
0453 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
0454 const cl::sycl::cl_float4& a) {
0455 return a.x() * a.y() * a.z() * a.w();
0456 }
0457 template <>
0458 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
0459 const cl::sycl::cl_double2& a) {
0460 return a.x() * a.y();
0461 }
0462
0463 template <>
0464 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
0465 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
0466 return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
0467 cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
0468 }
0469 template <>
0470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
0471 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
0472 return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
0473 }
0474
0475 template <typename Packet>
0476 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
0477 const Packet &b) {
0478 return ((a <= b)
0479 .template convert<typename unpacket_traits<Packet>::type,
0480 cl::sycl::rounding_mode::automatic>());
0481 }
0482
0483 template <typename Packet>
0484 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
0485 const Packet &b) {
0486 return ((a < b)
0487 .template convert<typename unpacket_traits<Packet>::type,
0488 cl::sycl::rounding_mode::automatic>());
0489 }
0490
0491 template <typename Packet>
0492 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
0493 const Packet &b) {
0494 return ((a == b)
0495 .template convert<typename unpacket_traits<Packet>::type,
0496 cl::sycl::rounding_mode::automatic>());
0497 }
0498
0499 #define SYCL_PCMP(OP, TYPE) \
0500 template <> \
0501 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
0502 const TYPE &b) { \
0503 return sycl_pcmp_##OP<TYPE>(a, b); \
0504 }
0505
0506 SYCL_PCMP(le, cl::sycl::cl_float4)
0507 SYCL_PCMP(lt, cl::sycl::cl_float4)
0508 SYCL_PCMP(eq, cl::sycl::cl_float4)
0509 SYCL_PCMP(le, cl::sycl::cl_double2)
0510 SYCL_PCMP(lt, cl::sycl::cl_double2)
0511 SYCL_PCMP(eq, cl::sycl::cl_double2)
0512 #undef SYCL_PCMP
0513
0514 template <typename T> struct convert_to_integer;
0515
0516 template <> struct convert_to_integer<float> {
0517 using type = std::int32_t;
0518 using packet_type = cl::sycl::cl_int4;
0519 };
0520 template <> struct convert_to_integer<double> {
0521 using type = std::int64_t;
0522 using packet_type = cl::sycl::cl_long2;
0523 };
0524
0525 template <typename PacketIn>
0526 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
0527 typename unpacket_traits<PacketIn>::type>::packet_type
0528 vector_as_int(const PacketIn &p) {
0529 return (
0530 p.template convert<typename convert_to_integer<
0531 typename unpacket_traits<PacketIn>::type>::type,
0532 cl::sycl::rounding_mode::automatic>());
0533 }
0534
0535 template <typename packetOut, typename PacketIn>
0536 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
0537 convert_vector(const PacketIn &p) {
0538 return (p.template convert<typename unpacket_traits<packetOut>::type,
0539 cl::sycl::rounding_mode::automatic>());
0540 }
0541
0542 #define SYCL_PAND(TYPE) \
0543 template <> \
0544 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \
0545 const TYPE &b) { \
0546 return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
0547 }
0548 SYCL_PAND(cl::sycl::cl_float4)
0549 SYCL_PAND(cl::sycl::cl_double2)
0550 #undef SYCL_PAND
0551
0552 #define SYCL_POR(TYPE) \
0553 template <> \
0554 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
0555 const TYPE &b) { \
0556 return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
0557 }
0558
0559 SYCL_POR(cl::sycl::cl_float4)
0560 SYCL_POR(cl::sycl::cl_double2)
0561 #undef SYCL_POR
0562
0563 #define SYCL_PXOR(TYPE) \
0564 template <> \
0565 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \
0566 const TYPE &b) { \
0567 return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
0568 }
0569
0570 SYCL_PXOR(cl::sycl::cl_float4)
0571 SYCL_PXOR(cl::sycl::cl_double2)
0572 #undef SYCL_PXOR
0573
0574 #define SYCL_PANDNOT(TYPE) \
0575 template <> \
0576 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \
0577 const TYPE &b) { \
0578 return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \
0579 }
0580 SYCL_PANDNOT(cl::sycl::cl_float4)
0581 SYCL_PANDNOT(cl::sycl::cl_double2)
0582 #undef SYCL_PANDNOT
0583
0584 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
0585 PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
0586 float tmp = kernel.packet[0].y();
0587 kernel.packet[0].y() = kernel.packet[1].x();
0588 kernel.packet[1].x() = tmp;
0589
0590 tmp = kernel.packet[0].z();
0591 kernel.packet[0].z() = kernel.packet[2].x();
0592 kernel.packet[2].x() = tmp;
0593
0594 tmp = kernel.packet[0].w();
0595 kernel.packet[0].w() = kernel.packet[3].x();
0596 kernel.packet[3].x() = tmp;
0597
0598 tmp = kernel.packet[1].z();
0599 kernel.packet[1].z() = kernel.packet[2].y();
0600 kernel.packet[2].y() = tmp;
0601
0602 tmp = kernel.packet[1].w();
0603 kernel.packet[1].w() = kernel.packet[3].y();
0604 kernel.packet[3].y() = tmp;
0605
0606 tmp = kernel.packet[2].w();
0607 kernel.packet[2].w() = kernel.packet[3].z();
0608 kernel.packet[3].z() = tmp;
0609 }
0610
0611 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
0612 PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
0613 double tmp = kernel.packet[0].y();
0614 kernel.packet[0].y() = kernel.packet[1].x();
0615 kernel.packet[1].x() = tmp;
0616 }
0617
0618 template <>
0619 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
0620 const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
0621 const cl::sycl::cl_float4& thenPacket,
0622 const cl::sycl::cl_float4& elsePacket) {
0623 cl::sycl::cl_int4 condition(
0624 ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
0625 ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
0626 return cl::sycl::select(thenPacket, elsePacket, condition);
0627 }
0628
0629 template <>
0630 inline cl::sycl::cl_double2 pblend(
0631 const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
0632 const cl::sycl::cl_double2& thenPacket,
0633 const cl::sycl::cl_double2& elsePacket) {
0634 cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
0635 ifPacket.select[1] ? 0 : -1);
0636 return cl::sycl::select(thenPacket, elsePacket, condition);
0637 }
0638 #endif
0639
0640 #define SYCL_PSTORE(alignment) \
0641 template <typename packet_type> \
0642 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
0643 const Eigen::TensorSycl::internal::RangeAccess< \
0644 cl::sycl::access::mode::read_write, \
0645 typename unpacket_traits<packet_type>::type>& to, \
0646 const packet_type& from) { \
0647 pstore##alignment(to.get_pointer(), from); \
0648 }
0649
0650
0651 SYCL_PSTORE()
0652 SYCL_PSTORE(u)
0653
0654 #undef SYCL_PSTORE
0655
0656 template <typename scalar, typename packet_type, int Alignment>
0657 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
0658 Eigen::TensorSycl::internal::RangeAccess<
0659 cl::sycl::access::mode::read_write,
0660 typename unpacket_traits<packet_type>::type>
0661 to,
0662 const packet_type& from) {
0663 pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
0664 }
0665
0666 }
0667
0668 }
0669
0670 #endif