File indexing completed on 2025-01-18 09:54:54
0001
0002
0003
0004
0005
0006 #ifndef CRYPTOPP_ARM_SIMD_H
0007 #define CRYPTOPP_ARM_SIMD_H
0008
0009 #include "config.h"
0010
0011 #if (CRYPTOPP_ARM_NEON_HEADER)
0012 # include <stdint.h>
0013 # include <arm_neon.h>
0014 #endif
0015
0016 #if (CRYPTOPP_ARM_ACLE_HEADER)
0017 # include <stdint.h>
0018 # include <arm_acle.h>
0019 #endif
0020
0021 #if (CRYPTOPP_ARM_CRC32_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
0022
0023
0024
0025
0026
0027
0028
0029
0030 inline uint32_t CRC32B (uint32_t crc, uint8_t val)
0031 {
0032 #if defined(CRYPTOPP_MSC_VERSION)
0033 return __crc32b(crc, val);
0034 #else
0035 __asm__ ("crc32b %w0, %w0, %w1 \n\t"
0036 :"+r" (crc) : "r" (val) );
0037 return crc;
0038 #endif
0039 }
0040
0041
0042
0043
0044
0045
0046 inline uint32_t CRC32W (uint32_t crc, uint32_t val)
0047 {
0048 #if defined(CRYPTOPP_MSC_VERSION)
0049 return __crc32w(crc, val);
0050 #else
0051 __asm__ ("crc32w %w0, %w0, %w1 \n\t"
0052 :"+r" (crc) : "r" (val) );
0053 return crc;
0054 #endif
0055 }
0056
0057
0058
0059
0060
0061
0062 inline uint32_t CRC32Wx4 (uint32_t crc, const uint32_t vals[4])
0063 {
0064 #if defined(CRYPTOPP_MSC_VERSION)
0065 return __crc32w(__crc32w(__crc32w(__crc32w(
0066 crc, vals[0]), vals[1]), vals[2]), vals[3]);
0067 #else
0068 __asm__ ("crc32w %w0, %w0, %w1 \n\t"
0069 "crc32w %w0, %w0, %w2 \n\t"
0070 "crc32w %w0, %w0, %w3 \n\t"
0071 "crc32w %w0, %w0, %w4 \n\t"
0072 :"+r" (crc) : "r" (vals[0]), "r" (vals[1]),
0073 "r" (vals[2]), "r" (vals[3]));
0074 return crc;
0075 #endif
0076 }
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086 inline uint32_t CRC32CB (uint32_t crc, uint8_t val)
0087 {
0088 #if defined(CRYPTOPP_MSC_VERSION)
0089 return __crc32cb(crc, val);
0090 #else
0091 __asm__ ("crc32cb %w0, %w0, %w1 \n\t"
0092 :"+r" (crc) : "r" (val) );
0093 return crc;
0094 #endif
0095 }
0096
0097
0098
0099
0100
0101
0102 inline uint32_t CRC32CW (uint32_t crc, uint32_t val)
0103 {
0104 #if defined(CRYPTOPP_MSC_VERSION)
0105 return __crc32cw(crc, val);
0106 #else
0107 __asm__ ("crc32cw %w0, %w0, %w1 \n\t"
0108 :"+r" (crc) : "r" (val) );
0109 return crc;
0110 #endif
0111 }
0112
0113
0114
0115
0116
0117
0118 inline uint32_t CRC32CWx4 (uint32_t crc, const uint32_t vals[4])
0119 {
0120 #if defined(CRYPTOPP_MSC_VERSION)
0121 return __crc32cw(__crc32cw(__crc32cw(__crc32cw(
0122 crc, vals[0]), vals[1]), vals[2]), vals[3]);
0123 #else
0124 __asm__ ("crc32cw %w0, %w0, %w1 \n\t"
0125 "crc32cw %w0, %w0, %w2 \n\t"
0126 "crc32cw %w0, %w0, %w3 \n\t"
0127 "crc32cw %w0, %w0, %w4 \n\t"
0128 :"+r" (crc) : "r" (vals[0]), "r" (vals[1]),
0129 "r" (vals[2]), "r" (vals[3]));
0130 return crc;
0131 #endif
0132 }
0133
0134 #endif
0135
0136 #if (CRYPTOPP_ARM_PMULL_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
0153 {
0154 #if defined(CRYPTOPP_MSC_VERSION)
0155 const __n64 x = { vgetq_lane_u64(a, 0) };
0156 const __n64 y = { vgetq_lane_u64(b, 0) };
0157 return vmull_p64(x, y);
0158 #elif defined(__GNUC__)
0159 uint64x2_t r;
0160 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
0161 :"=w" (r) : "w" (a), "w" (b) );
0162 return r;
0163 #else
0164 return (uint64x2_t)(vmull_p64(
0165 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
0166 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
0167 #endif
0168 }
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182 inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
0183 {
0184 #if defined(CRYPTOPP_MSC_VERSION)
0185 const __n64 x = { vgetq_lane_u64(a, 0) };
0186 const __n64 y = { vgetq_lane_u64(b, 1) };
0187 return vmull_p64(x, y);
0188 #elif defined(__GNUC__)
0189 uint64x2_t r;
0190 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
0191 :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
0192 return r;
0193 #else
0194 return (uint64x2_t)(vmull_p64(
0195 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
0196 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
0197 #endif
0198 }
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212 inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
0213 {
0214 #if defined(CRYPTOPP_MSC_VERSION)
0215 const __n64 x = { vgetq_lane_u64(a, 1) };
0216 const __n64 y = { vgetq_lane_u64(b, 0) };
0217 return vmull_p64(x, y);
0218 #elif defined(__GNUC__)
0219 uint64x2_t r;
0220 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
0221 :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
0222 return r;
0223 #else
0224 return (uint64x2_t)(vmull_p64(
0225 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
0226 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
0227 #endif
0228 }
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242 inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
0243 {
0244 #if defined(CRYPTOPP_MSC_VERSION)
0245 const __n64 x = { vgetq_lane_u64(a, 1) };
0246 const __n64 y = { vgetq_lane_u64(b, 1) };
0247 return vmull_p64(x, y);
0248 #elif defined(__GNUC__)
0249 uint64x2_t r;
0250 __asm__ ("pmull2 %0.1q, %1.2d, %2.2d \n\t"
0251 :"=w" (r) : "w" (a), "w" (b) );
0252 return r;
0253 #else
0254 return (uint64x2_t)(vmull_p64(
0255 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
0256 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
0257 #endif
0258 }
0259
0260
0261
0262
0263
0264
0265
0266
0267 inline uint64x2_t PMULL(const uint64x2_t a, const uint64x2_t b)
0268 {
0269 #if defined(CRYPTOPP_MSC_VERSION)
0270 const __n64 x = { vgetq_lane_u64(a, 0) };
0271 const __n64 y = { vgetq_lane_u64(b, 0) };
0272 return vmull_p64(x, y);
0273 #elif defined(__GNUC__)
0274 uint64x2_t r;
0275 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
0276 :"=w" (r) : "w" (a), "w" (b) );
0277 return r;
0278 #else
0279 return (uint64x2_t)(vmull_p64(
0280 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
0281 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
0282 #endif
0283 }
0284
0285
0286
0287
0288
0289
0290
0291
0292 inline uint64x2_t PMULL_HIGH(const uint64x2_t a, const uint64x2_t b)
0293 {
0294 #if defined(CRYPTOPP_MSC_VERSION)
0295 const __n64 x = { vgetq_lane_u64(a, 1) };
0296 const __n64 y = { vgetq_lane_u64(b, 1) };
0297 return vmull_p64(x, y);
0298 #elif defined(__GNUC__)
0299 uint64x2_t r;
0300 __asm__ ("pmull2 %0.1q, %1.2d, %2.2d \n\t"
0301 :"=w" (r) : "w" (a), "w" (b) );
0302 return r;
0303 #else
0304 return (uint64x2_t)(vmull_p64(
0305 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
0306 vgetq_lane_u64(vreinterpretq_u64_u8(b),1))));
0307 #endif
0308 }
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319 template <unsigned int C>
0320 inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
0321 {
0322
0323 #if defined(CRYPTOPP_MSC_VERSION)
0324 return vreinterpretq_u64_u8(vextq_u8(
0325 vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C));
0326 #else
0327 uint64x2_t r;
0328 __asm__ ("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
0329 :"=w" (r) : "w" (a), "w" (b), "I" (C) );
0330 return r;
0331 #endif
0332 }
0333
0334
0335 #endif
0336
0337 #if CRYPTOPP_ARM_SHA3_AVAILABLE || defined(CRYPTOPP_DOXYGEN_PROCESSING)
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350 inline uint64x2_t VEOR3(uint64x2_t a, uint64x2_t b, uint64x2_t c)
0351 {
0352 #if defined(CRYPTOPP_MSC_VERSION)
0353 return veor3q_u64(a, b, c);
0354 #else
0355 uint64x2_t r;
0356 __asm__ ("eor3 %0.16b, %1.16b, %2.16b, %3.16b \n\t"
0357 :"=w" (r) : "w" (a), "w" (b), "w" (c));
0358 return r;
0359 #endif
0360 }
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371 inline uint64x2_t VXAR(uint64x2_t a, uint64x2_t b, const int c)
0372 {
0373 #if defined(CRYPTOPP_MSC_VERSION)
0374 return vxarq_u64(a, b, c);
0375 #else
0376 uint64x2_t r;
0377 __asm__ ("xar %0.2d, %1.2d, %2.2d, %3 \n\t"
0378 :"=w" (r) : "w" (a), "w" (b), "I" (c));
0379 return r;
0380 #endif
0381 }
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392 template <unsigned int C>
0393 inline uint64x2_t VXAR(uint64x2_t a, uint64x2_t b)
0394 {
0395 #if defined(CRYPTOPP_MSC_VERSION)
0396 return vxarq_u64(a, b, C);
0397 #else
0398 uint64x2_t r;
0399 __asm__ ("xar %0.2d, %1.2d, %2.2d, %3 \n\t"
0400 :"=w" (r) : "w" (a), "w" (b), "I" (C));
0401 return r;
0402 #endif
0403 }
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413 inline uint64x2_t VRAX1(uint64x2_t a, uint64x2_t b)
0414 {
0415 #if defined(CRYPTOPP_MSC_VERSION)
0416 return vrax1q_u64(a, b);
0417 #else
0418 uint64x2_t r;
0419 __asm__ ("rax1 %0.2d, %1.2d, %2.2d \n\t"
0420 :"=w" (r) : "w" (a), "w" (b));
0421 return r;
0422 #endif
0423 }
0424
0425 #endif
0426
0427 #endif