Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:51:46

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Mehdi Goli    Codeplay Software Ltd.
0005 // Ralph Potter  Codeplay Software Ltd.
0006 // Luke Iwanski  Codeplay Software Ltd.
0007 // Contact: <eigen@codeplay.com>
0008 //
0009 // This Source Code Form is subject to the terms of the Mozilla
0010 // Public License v. 2.0. If a copy of the MPL was not distributed
0011 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0012 
0013 /*****************************************************************
0014  * PacketMath.h
0015  *
0016  * \brief:
0017  *  PacketMath
0018  *
0019  *****************************************************************/
0020 
0021 #ifndef EIGEN_PACKET_MATH_SYCL_H
0022 #define EIGEN_PACKET_MATH_SYCL_H
0023 #include <type_traits>
0024 namespace Eigen {
0025 
0026 namespace internal {
0027 #ifdef SYCL_DEVICE_ONLY
0028 
0029 #define SYCL_PLOADT_RO(address_space_target)                                 \
0030   template <typename packet_type, int Alignment>                             \
0031   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(               \
0032       typename cl::sycl::multi_ptr<                                          \
0033           const typename unpacket_traits<packet_type>::type,                 \
0034           cl::sycl::access::address_space::address_space_target>::pointer_t  \
0035           from) {                                                            \
0036     typedef typename unpacket_traits<packet_type>::type scalar;              \
0037     typedef cl::sycl::multi_ptr<                                             \
0038         scalar, cl::sycl::access::address_space::address_space_target>       \
0039         multi_ptr;                                                           \
0040     auto res = packet_type(                                                  \
0041         static_cast<typename unpacket_traits<packet_type>::type>(0));        \
0042     res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
0043     return res;                                                              \
0044   }
0045 
0046 SYCL_PLOADT_RO(global_space)
0047 SYCL_PLOADT_RO(local_space)
0048 #undef SYCL_PLOADT_RO
0049 #endif
0050 
0051 template <typename packet_type, int Alignment, typename T>
0052 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
0053 ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
0054           cl::sycl::access::mode::read_write, T>& from) {
0055   return ploadt_ro<packet_type, Alignment>(from.get_pointer());
0056 }
0057 
0058 #ifdef SYCL_DEVICE_ONLY
0059 #define SYCL_PLOAD(address_space_target, Alignment, AlignedType)            \
0060   template <typename packet_type>                                           \
0061   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType(     \
0062       typename cl::sycl::multi_ptr<                                         \
0063           const typename unpacket_traits<packet_type>::type,                \
0064           cl::sycl::access::address_space::address_space_target>::pointer_t \
0065           from) {                                                           \
0066     return ploadt_ro<packet_type, Alignment>(from);                         \
0067   }
0068 
0069 // global space
0070 SYCL_PLOAD(global_space, Unaligned, u)
0071 SYCL_PLOAD(global_space, Aligned, )
0072 // local space
0073 SYCL_PLOAD(local_space, Unaligned, u)
0074 SYCL_PLOAD(local_space, Aligned, )
0075 
0076 #undef SYCL_PLOAD
0077 #endif
0078 
0079 #define SYCL_PLOAD(Alignment, AlignedType)                              \
0080   template <typename packet_type>                                       \
0081   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
0082       const Eigen::TensorSycl::internal::RangeAccess<                   \
0083           cl::sycl::access::mode::read_write,                           \
0084           typename unpacket_traits<packet_type>::type>                  \
0085           from) {                                                       \
0086     return ploadt_ro<packet_type, Alignment>(from);                     \
0087   }
0088 SYCL_PLOAD(Unaligned, u)
0089 SYCL_PLOAD(Aligned, )
0090 #undef SYCL_PLOAD
0091 
0092 #ifdef SYCL_DEVICE_ONLY
0093 /** \internal \returns a packet version of \a *from.
0094  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
0095 #define SYCL_PLOADT(address_space_target)                                   \
0096   template <typename packet_type, int Alignment>                            \
0097   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(                 \
0098       typename cl::sycl::multi_ptr<                                         \
0099           const typename unpacket_traits<packet_type>::type,                \
0100           cl::sycl::access::address_space::address_space_target>::pointer_t \
0101           from) {                                                           \
0102     if (Alignment >= unpacket_traits<packet_type>::alignment)               \
0103       return pload<packet_type>(from);                                      \
0104     else                                                                    \
0105       return ploadu<packet_type>(from);                                     \
0106   }
0107 
0108 // global space
0109 SYCL_PLOADT(global_space)
0110 // local space
0111 SYCL_PLOADT(local_space)
0112 #undef SYCL_PLOADT
0113 #endif
0114 
0115 template <typename packet_type, int Alignment>
0116 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
0117 ploadt(const Eigen::TensorSycl::internal::RangeAccess<
0118        cl::sycl::access::mode::read_write,
0119        typename unpacket_traits<packet_type>::type>& from) {
0120   return ploadt<packet_type, Alignment>(from.get_pointer());
0121 }
0122 #ifdef SYCL_DEVICE_ONLY
0123 
0124 // private_space
0125 #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)                 \
0126   template <>                                                          \
0127   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type                    \
0128   ploadt_ro<packet_type, Alignment>(                                   \
0129       const typename unpacket_traits<packet_type>::type* from) {       \
0130     typedef typename unpacket_traits<packet_type>::type scalar;        \
0131     auto res = packet_type(static_cast<scalar>(0));                    \
0132     res.template load<cl::sycl::access::address_space::private_space>( \
0133         0, const_cast<scalar*>(from));                                 \
0134     return res;                                                        \
0135   }
0136 
0137 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
0138 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
0139 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
0140 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
0141 
0142 #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)                    \
0143   template <>                                                              \
0144   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
0145       const typename unpacket_traits<packet_type>::type* from) {           \
0146     typedef typename unpacket_traits<packet_type>::type scalar;            \
0147     auto res = packet_type(static_cast<scalar>(0));                        \
0148     res.template load<cl::sycl::access::address_space::private_space>(     \
0149         0, const_cast<scalar*>(from));                                     \
0150     return res;                                                            \
0151   }
0152 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
0153 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
0154 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
0155 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
0156 
0157 #undef SYCL_PLOAD_SPECIAL
0158 
0159 #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)   \
0160   template <>                                                               \
0161   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(             \
0162       typename cl::sycl::multi_ptr<                                         \
0163           scalar,                                                           \
0164           cl::sycl::access::address_space::address_space_target>::pointer_t \
0165           to,                                                               \
0166       const packet_type& from) {                                            \
0167     typedef cl::sycl::multi_ptr<                                            \
0168         scalar, cl::sycl::access::address_space::address_space_target>      \
0169         multi_ptr;                                                          \
0170     from.store(0, multi_ptr(to));                                           \
0171   }
0172 
0173 // global space
0174 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
0175 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
0176 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
0177 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
0178 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
0179 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
0180 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
0181 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
0182 
0183 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
0184 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
0185 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
0186 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
0187 #undef SYCL_PSTORE
0188 
0189 #define SYCL_PSTORE_T(address_space_target)                                 \
0190   template <typename scalar, typename packet_type, int Alignment>           \
0191   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(                       \
0192       typename cl::sycl::multi_ptr<                                         \
0193           scalar,                                                           \
0194           cl::sycl::access::address_space::address_space_target>::pointer_t \
0195           to,                                                               \
0196       const packet_type& from) {                                            \
0197     if (Alignment)                                                          \
0198       pstore(to, from);                                                     \
0199     else                                                                    \
0200       pstoreu(to, from);                                                    \
0201   }
0202 
0203 SYCL_PSTORE_T(global_space)
0204 
0205 SYCL_PSTORE_T(local_space)
0206 
0207 #undef SYCL_PSTORE_T
0208 
0209 #define SYCL_PSET1(packet_type)                                         \
0210   template <>                                                           \
0211   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
0212       const typename unpacket_traits<packet_type>::type& from) {        \
0213     return packet_type(from);                                           \
0214   }
0215 
0216 // global space
0217 SYCL_PSET1(cl::sycl::cl_float4)
0218 SYCL_PSET1(cl::sycl::cl_double2)
0219 
0220 #undef SYCL_PSET1
0221 
0222 template <typename packet_type>
0223 struct get_base_packet {
0224   template <typename sycl_multi_pointer>
0225   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
0226   get_ploaddup(sycl_multi_pointer) {}
0227 
0228   template <typename sycl_multi_pointer>
0229   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
0230   get_pgather(sycl_multi_pointer, Index) {}
0231 };
0232 
0233 template <>
0234 struct get_base_packet<cl::sycl::cl_float4> {
0235   template <typename sycl_multi_pointer>
0236   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
0237       sycl_multi_pointer from) {
0238     return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
0239   }
0240   template <typename sycl_multi_pointer>
0241   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
0242       sycl_multi_pointer from, Index stride) {
0243     return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
0244                                from[2 * stride], from[3 * stride]);
0245   }
0246 
0247   template <typename sycl_multi_pointer>
0248   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
0249       sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
0250     auto tmp = stride;
0251     to[0] = from.x();
0252     to[tmp] = from.y();
0253     to[tmp += stride] = from.z();
0254     to[tmp += stride] = from.w();
0255   }
0256   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
0257       const float& a) {
0258     return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
0259                                static_cast<float>(a + 2),
0260                                static_cast<float>(a + 3));
0261   }
0262 };
0263 
0264 template <>
0265 struct get_base_packet<cl::sycl::cl_double2> {
0266   template <typename sycl_multi_pointer>
0267   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
0268   get_ploaddup(const sycl_multi_pointer from) {
0269     return cl::sycl::cl_double2(from[0], from[0]);
0270   }
0271 
0272   template <typename sycl_multi_pointer, typename Index>
0273   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
0274       const sycl_multi_pointer from, Index stride) {
0275     return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
0276   }
0277 
0278   template <typename sycl_multi_pointer>
0279   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
0280       sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
0281     to[0] = from.x();
0282     to[stride] = from.y();
0283   }
0284 
0285   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
0286       const double& a) {
0287     return cl::sycl::cl_double2(static_cast<double>(a),
0288                                 static_cast<double>(a + 1));
0289   }
0290 };
0291 
0292 #define SYCL_PLOAD_DUP(address_space_target)                                \
0293   template <typename packet_type>                                           \
0294   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup(               \
0295       typename cl::sycl::multi_ptr<                                         \
0296           const typename unpacket_traits<packet_type>::type,                \
0297           cl::sycl::access::address_space::address_space_target>::pointer_t \
0298           from) {                                                           \
0299     return get_base_packet<packet_type>::get_ploaddup(from);                \
0300   }
0301 
0302 // global space
0303 SYCL_PLOAD_DUP(global_space)
0304 // local_space
0305 SYCL_PLOAD_DUP(local_space)
0306 #undef SYCL_PLOAD_DUP
0307 
0308 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type)                              \
0309   template <>                                                              \
0310   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
0311       const typename unpacket_traits<packet_type>::type* from) {           \
0312     return get_base_packet<packet_type>::get_ploaddup(from);               \
0313   }
0314 
0315 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
0316 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
0317 
0318 #undef SYCL_PLOAD_DUP_SPECILIZE
0319 
0320 #define SYCL_PLSET(packet_type)                                         \
0321   template <>                                                           \
0322   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
0323       const typename unpacket_traits<packet_type>::type& a) {           \
0324     return get_base_packet<packet_type>::set_plset(a);                  \
0325   }
0326 
0327 SYCL_PLSET(cl::sycl::cl_float4)
0328 SYCL_PLSET(cl::sycl::cl_double2)
0329 
0330 #undef SYCL_PLSET
0331 
0332 #define SYCL_PGATHER(address_space_target)                                  \
0333   template <typename Scalar, typename packet_type>                          \
0334   EIGEN_DEVICE_FUNC inline packet_type pgather(                             \
0335       typename cl::sycl::multi_ptr<                                         \
0336           const typename unpacket_traits<packet_type>::type,                \
0337           cl::sycl::access::address_space::address_space_target>::pointer_t \
0338           from,                                                             \
0339       Index stride) {                                                       \
0340     return get_base_packet<packet_type>::get_pgather(from, stride);         \
0341   }
0342 
0343 // global space
0344 SYCL_PGATHER(global_space)
0345 // local space
0346 SYCL_PGATHER(local_space)
0347 
0348 #undef SYCL_PGATHER
0349 
0350 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type)                            \
0351   template <>                                                                  \
0352   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type                            \
0353   pgather<scalar, packet_type>(                                                \
0354       const typename unpacket_traits<packet_type>::type* from, Index stride) { \
0355     return get_base_packet<packet_type>::get_pgather(from, stride);            \
0356   }
0357 
0358 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
0359 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
0360 
0361 #undef SYCL_PGATHER_SPECILIZE
0362 
0363 #define SYCL_PSCATTER(address_space_target)                                 \
0364   template <typename Scalar, typename packet_type>                          \
0365   EIGEN_DEVICE_FUNC inline void pscatter(                                   \
0366       typename cl::sycl::multi_ptr<                                         \
0367           typename unpacket_traits<packet_type>::type,                      \
0368           cl::sycl::access::address_space::address_space_target>::pointer_t \
0369           to,                                                               \
0370       const packet_type& from, Index stride) {                              \
0371     get_base_packet<packet_type>::set_pscatter(to, from, stride);           \
0372   }
0373 
0374 // global space
0375 SYCL_PSCATTER(global_space)
0376 // local space
0377 SYCL_PSCATTER(local_space)
0378 
0379 #undef SYCL_PSCATTER
0380 
0381 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)                        \
0382   template <>                                                               \
0383   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
0384       typename unpacket_traits<packet_type>::type * to,                     \
0385       const packet_type& from, Index stride) {                              \
0386     get_base_packet<packet_type>::set_pscatter(to, from, stride);           \
0387   }
0388 
0389 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
0390 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
0391 
0392 #undef SYCL_PSCATTER_SPECILIZE
0393 
0394 #define SYCL_PMAD(packet_type)                                            \
0395   template <>                                                             \
0396   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(                \
0397       const packet_type& a, const packet_type& b, const packet_type& c) { \
0398     return cl::sycl::mad(a, b, c);                                        \
0399   }
0400 
0401 SYCL_PMAD(cl::sycl::cl_float4)
0402 SYCL_PMAD(cl::sycl::cl_double2)
0403 #undef SYCL_PMAD
0404 
0405 template <>
0406 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
0407     const cl::sycl::cl_float4& a) {
0408   return a.x();
0409 }
0410 template <>
0411 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
0412     const cl::sycl::cl_double2& a) {
0413   return a.x();
0414 }
0415 
0416 template <>
0417 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
0418     const cl::sycl::cl_float4& a) {
0419   return a.x() + a.y() + a.z() + a.w();
0420 }
0421 
0422 template <>
0423 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
0424     const cl::sycl::cl_double2& a) {
0425   return a.x() + a.y();
0426 }
0427 
0428 template <>
0429 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
0430     const cl::sycl::cl_float4& a) {
0431   return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
0432                         cl::sycl::fmax(a.z(), a.w()));
0433 }
0434 template <>
0435 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
0436     const cl::sycl::cl_double2& a) {
0437   return cl::sycl::fmax(a.x(), a.y());
0438 }
0439 
0440 template <>
0441 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
0442     const cl::sycl::cl_float4& a) {
0443   return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
0444                         cl::sycl::fmin(a.z(), a.w()));
0445 }
0446 template <>
0447 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
0448     const cl::sycl::cl_double2& a) {
0449   return cl::sycl::fmin(a.x(), a.y());
0450 }
0451 
0452 template <>
0453 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
0454     const cl::sycl::cl_float4& a) {
0455   return a.x() * a.y() * a.z() * a.w();
0456 }
0457 template <>
0458 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
0459     const cl::sycl::cl_double2& a) {
0460   return a.x() * a.y();
0461 }
0462 
0463 template <>
0464 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
0465 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
0466   return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
0467                              cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
0468 }
0469 template <>
0470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
0471 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
0472   return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
0473 }
0474 
0475 template <typename Packet>
0476 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
0477                                                           const Packet &b) {
0478   return ((a <= b)
0479               .template convert<typename unpacket_traits<Packet>::type,
0480                                 cl::sycl::rounding_mode::automatic>());
0481 }
0482 
0483 template <typename Packet>
0484 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
0485                                                           const Packet &b) {
0486   return ((a < b)
0487               .template convert<typename unpacket_traits<Packet>::type,
0488                                 cl::sycl::rounding_mode::automatic>());
0489 }
0490 
0491 template <typename Packet>
0492 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
0493                                                           const Packet &b) {
0494   return ((a == b)
0495               .template convert<typename unpacket_traits<Packet>::type,
0496                                 cl::sycl::rounding_mode::automatic>());
0497 }
0498 
0499 #define SYCL_PCMP(OP, TYPE)                                                    \
0500   template <>                                                                  \
0501   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a,    \
0502                                                              const TYPE &b) {  \
0503     return sycl_pcmp_##OP<TYPE>(a, b);                                         \
0504   }
0505 
0506 SYCL_PCMP(le, cl::sycl::cl_float4)
0507 SYCL_PCMP(lt, cl::sycl::cl_float4)
0508 SYCL_PCMP(eq, cl::sycl::cl_float4)
0509 SYCL_PCMP(le, cl::sycl::cl_double2)
0510 SYCL_PCMP(lt, cl::sycl::cl_double2)
0511 SYCL_PCMP(eq, cl::sycl::cl_double2)
0512 #undef SYCL_PCMP
0513 
0514 template <typename T> struct convert_to_integer;
0515 
0516 template <> struct convert_to_integer<float> {
0517   using type = std::int32_t;
0518   using packet_type = cl::sycl::cl_int4;
0519 };
0520 template <> struct convert_to_integer<double> {
0521   using type = std::int64_t;
0522   using packet_type = cl::sycl::cl_long2;
0523 };
0524 
0525 template <typename PacketIn>
0526 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
0527     typename unpacket_traits<PacketIn>::type>::packet_type
0528 vector_as_int(const PacketIn &p) {
0529   return (
0530       p.template convert<typename convert_to_integer<
0531                              typename unpacket_traits<PacketIn>::type>::type,
0532                          cl::sycl::rounding_mode::automatic>());
0533 }
0534 
0535 template <typename packetOut, typename PacketIn>
0536 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
0537 convert_vector(const PacketIn &p) {
0538   return (p.template convert<typename unpacket_traits<packetOut>::type,
0539                              cl::sycl::rounding_mode::automatic>());
0540 }
0541 
0542 #define SYCL_PAND(TYPE)                                                        \
0543   template <>                                                                  \
0544   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a,         \
0545                                                         const TYPE &b) {       \
0546     return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b));          \
0547   }
0548 SYCL_PAND(cl::sycl::cl_float4)
0549 SYCL_PAND(cl::sycl::cl_double2)
0550 #undef SYCL_PAND
0551 
0552 #define SYCL_POR(TYPE)                                                         \
0553   template <>                                                                  \
0554   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a,          \
0555                                                        const TYPE &b) {        \
0556     return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b));          \
0557   }
0558 
0559 SYCL_POR(cl::sycl::cl_float4)
0560 SYCL_POR(cl::sycl::cl_double2)
0561 #undef SYCL_POR
0562 
0563 #define SYCL_PXOR(TYPE)                                                        \
0564   template <>                                                                  \
0565   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a,         \
0566                                                         const TYPE &b) {       \
0567     return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b));          \
0568   }
0569 
0570 SYCL_PXOR(cl::sycl::cl_float4)
0571 SYCL_PXOR(cl::sycl::cl_double2)
0572 #undef SYCL_PXOR
0573 
0574 #define SYCL_PANDNOT(TYPE)                                                     \
0575   template <>                                                                  \
0576   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a,      \
0577                                                            const TYPE &b) {    \
0578     return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b)));       \
0579   }
0580 SYCL_PANDNOT(cl::sycl::cl_float4)
0581 SYCL_PANDNOT(cl::sycl::cl_double2)
0582 #undef SYCL_PANDNOT
0583 
0584 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
0585     PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
0586   float tmp = kernel.packet[0].y();
0587   kernel.packet[0].y() = kernel.packet[1].x();
0588   kernel.packet[1].x() = tmp;
0589 
0590   tmp = kernel.packet[0].z();
0591   kernel.packet[0].z() = kernel.packet[2].x();
0592   kernel.packet[2].x() = tmp;
0593 
0594   tmp = kernel.packet[0].w();
0595   kernel.packet[0].w() = kernel.packet[3].x();
0596   kernel.packet[3].x() = tmp;
0597 
0598   tmp = kernel.packet[1].z();
0599   kernel.packet[1].z() = kernel.packet[2].y();
0600   kernel.packet[2].y() = tmp;
0601 
0602   tmp = kernel.packet[1].w();
0603   kernel.packet[1].w() = kernel.packet[3].y();
0604   kernel.packet[3].y() = tmp;
0605 
0606   tmp = kernel.packet[2].w();
0607   kernel.packet[2].w() = kernel.packet[3].z();
0608   kernel.packet[3].z() = tmp;
0609 }
0610 
0611 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
0612     PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
0613   double tmp = kernel.packet[0].y();
0614   kernel.packet[0].y() = kernel.packet[1].x();
0615   kernel.packet[1].x() = tmp;
0616 }
0617 
0618 template <>
0619 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
0620     const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
0621     const cl::sycl::cl_float4& thenPacket,
0622     const cl::sycl::cl_float4& elsePacket) {
0623   cl::sycl::cl_int4 condition(
0624       ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
0625       ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
0626   return cl::sycl::select(thenPacket, elsePacket, condition);
0627 }
0628 
0629 template <>
0630 inline cl::sycl::cl_double2 pblend(
0631     const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
0632     const cl::sycl::cl_double2& thenPacket,
0633     const cl::sycl::cl_double2& elsePacket) {
0634   cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
0635                                ifPacket.select[1] ? 0 : -1);
0636   return cl::sycl::select(thenPacket, elsePacket, condition);
0637 }
0638 #endif  // SYCL_DEVICE_ONLY
0639 
0640 #define SYCL_PSTORE(alignment)                                  \
0641   template <typename packet_type>                               \
0642   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
0643       const Eigen::TensorSycl::internal::RangeAccess<           \
0644           cl::sycl::access::mode::read_write,                   \
0645           typename unpacket_traits<packet_type>::type>& to,     \
0646       const packet_type& from) {                                \
0647     pstore##alignment(to.get_pointer(), from);                  \
0648   }
0649 
0650 // global space
0651 SYCL_PSTORE()
0652 SYCL_PSTORE(u)
0653 
0654 #undef SYCL_PSTORE
0655 
0656 template <typename scalar, typename packet_type, int Alignment>
0657 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
0658     Eigen::TensorSycl::internal::RangeAccess<
0659         cl::sycl::access::mode::read_write,
0660         typename unpacket_traits<packet_type>::type>
0661         to,
0662     const packet_type& from) {
0663   pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
0664 }
0665 
0666 }  // end namespace internal
0667 
0668 }  // end namespace Eigen
0669 
0670 #endif  // EIGEN_PACKET_MATH_SYCL_H