File indexing completed on 2025-01-19 09:51:42
0001 namespace Eigen {
0002 namespace internal {
0003
0004 #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
0005
0006
0007
0008
0009 template<>
0010 struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
0011 : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
0012 {
0013 EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
0014 {
0015
0016
0017 asm volatile(
0018 "vmla.f32 %q[r], %q[c], %q[alpha]"
0019 : [r] "+w" (r)
0020 : [c] "w" (c),
0021 [alpha] "w" (alpha)
0022 : );
0023 }
0024
0025 template <typename LaneIdType>
0026 EIGEN_STRONG_INLINE void madd(const Packet4f& a, const Packet4f& b,
0027 Packet4f& c, Packet4f& tmp,
0028 const LaneIdType&) const {
0029 acc(a, b, c);
0030 }
0031
0032 template <typename LaneIdType>
0033 EIGEN_STRONG_INLINE void madd(const Packet4f& a, const QuadPacket<Packet4f>& b,
0034 Packet4f& c, Packet4f& tmp,
0035 const LaneIdType& lane) const {
0036 madd(a, b.get(lane), c, tmp, lane);
0037 }
0038 };
0039
0040 #endif
0041
0042 #if EIGEN_ARCH_ARM64
0043
0044 template<>
0045 struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
0046 : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
0047 {
0048 typedef float RhsPacket;
0049 typedef float32x4_t RhsPacketx4;
0050
0051 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
0052 {
0053 dest = *b;
0054 }
0055
0056 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
0057 {
0058 dest = vld1q_f32(b);
0059 }
0060
0061 EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
0062 {
0063 dest = *b;
0064 }
0065
0066 EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
0067 {}
0068
0069 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
0070 {
0071 loadRhs(b,dest);
0072 }
0073
0074 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& , const FixedInt<0>&) const
0075 {
0076 c = vfmaq_n_f32(c, a, b);
0077 }
0078
0079
0080
0081
0082 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<0>&) const
0083 { madd_helper<0>(a, b, c); }
0084 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<1>&) const
0085 { madd_helper<1>(a, b, c); }
0086 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<2>&) const
0087 { madd_helper<2>(a, b, c); }
0088 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<3>&) const
0089 { madd_helper<3>(a, b, c); }
0090
0091 private:
0092 template<int LaneID>
0093 EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
0094 {
0095 #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
0096
0097
0098 if(LaneID==0) asm("fmla %0.4s, %1.4s, %2.s[0]\n" : "+w" (c) : "w" (a), "w" (b) : );
0099 else if(LaneID==1) asm("fmla %0.4s, %1.4s, %2.s[1]\n" : "+w" (c) : "w" (a), "w" (b) : );
0100 else if(LaneID==2) asm("fmla %0.4s, %1.4s, %2.s[2]\n" : "+w" (c) : "w" (a), "w" (b) : );
0101 else if(LaneID==3) asm("fmla %0.4s, %1.4s, %2.s[3]\n" : "+w" (c) : "w" (a), "w" (b) : );
0102 #else
0103 c = vfmaq_laneq_f32(c, a, b, LaneID);
0104 #endif
0105 }
0106 };
0107
0108
0109 template<>
0110 struct gebp_traits <double,double,false,false,Architecture::NEON>
0111 : gebp_traits<double,double,false,false,Architecture::Generic>
0112 {
0113 typedef double RhsPacket;
0114
0115 struct RhsPacketx4 {
0116 float64x2_t B_0, B_1;
0117 };
0118
0119 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
0120 {
0121 dest = *b;
0122 }
0123
0124 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
0125 {
0126 dest.B_0 = vld1q_f64(b);
0127 dest.B_1 = vld1q_f64(b+2);
0128 }
0129
0130 EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
0131 {
0132 loadRhs(b,dest);
0133 }
0134
0135 EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
0136 {}
0137
0138 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
0139 {
0140 loadRhs(b,dest);
0141 }
0142
0143 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& , const FixedInt<0>&) const
0144 {
0145 c = vfmaq_n_f64(c, a, b);
0146 }
0147
0148
0149
0150
0151 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<0>&) const
0152 { madd_helper<0>(a, b, c); }
0153 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<1>&) const
0154 { madd_helper<1>(a, b, c); }
0155 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<2>&) const
0156 { madd_helper<2>(a, b, c); }
0157 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& , const FixedInt<3>&) const
0158 { madd_helper<3>(a, b, c); }
0159
0160 private:
0161 template <int LaneID>
0162 EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
0163 {
0164 #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
0165
0166
0167 if(LaneID==0) asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_0) : );
0168 else if(LaneID==1) asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_0) : );
0169 else if(LaneID==2) asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_1) : );
0170 else if(LaneID==3) asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_1) : );
0171 #else
0172 if(LaneID==0) c = vfmaq_laneq_f64(c, a, b.B_0, 0);
0173 else if(LaneID==1) c = vfmaq_laneq_f64(c, a, b.B_0, 1);
0174 else if(LaneID==2) c = vfmaq_laneq_f64(c, a, b.B_1, 0);
0175 else if(LaneID==3) c = vfmaq_laneq_f64(c, a, b.B_1, 1);
0176 #endif
0177 }
0178 };
0179
0180 #endif
0181
0182 }
0183 }