Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:27:15

0001 // Copyright 2022 The Abseil Authors
0002 //
0003 // Licensed under the Apache License, Version 2.0 (the "License");
0004 // you may not use this file except in compliance with the License.
0005 // You may obtain a copy of the License at
0006 //
0007 //     https://www.apache.org/licenses/LICENSE-2.0
0008 //
0009 // Unless required by applicable law or agreed to in writing, software
0010 // distributed under the License is distributed on an "AS IS" BASIS,
0011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0012 // See the License for the specific language governing permissions and
0013 // limitations under the License.
0014 
0015 #ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
0016 #define ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
0017 
0018 #include "absl/base/config.h"
0019 
0020 #ifdef __aarch64__
0021 #include <arm_neon.h>
0022 
0023 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
0024 #define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x)
0025 #define vreinterpretq_s64_m128i(x) (x)
0026 
0027 // Guarantees that every preceding store is globally visible before any
0028 // subsequent store.
0029 // https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
0030 static inline __attribute__((always_inline)) void _mm_sfence(void) {
0031   __sync_synchronize();
0032 }
0033 
0034 // Load 128-bits of integer data from unaligned memory into dst. This intrinsic
0035 // may perform better than _mm_loadu_si128 when the data crosses a cache line
0036 // boundary.
0037 //
0038 //   dst[127:0] := MEM[mem_addr+127:mem_addr]
0039 //
0040 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128
0041 #define _mm_lddqu_si128 _mm_loadu_si128
0042 
0043 // Loads 128-bit value. :
0044 // https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx
0045 static inline __attribute__((always_inline)) __m128i _mm_loadu_si128(
0046     const __m128i *p) {
0047   return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *)p));
0048 }
0049 
0050 // Stores the data in a to the address p without polluting the caches.  If the
0051 // cache line containing address p is already in the cache, the cache will be
0052 // updated.
0053 // https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
0054 static inline __attribute__((always_inline)) void _mm_stream_si128(__m128i *p,
0055                                                                    __m128i a) {
0056 #if ABSL_HAVE_BUILTIN(__builtin_nontemporal_store)
0057   __builtin_nontemporal_store(a, p);
0058 #else
0059   vst1q_s64((int64_t *)p, vreinterpretq_s64_m128i(a));
0060 #endif
0061 }
0062 
0063 // Sets the 16 signed 8-bit integer values.
0064 // https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx
0065 static inline __attribute__((always_inline)) __m128i _mm_set_epi8(
0066     signed char b15, signed char b14, signed char b13, signed char b12,
0067     signed char b11, signed char b10, signed char b9, signed char b8,
0068     signed char b7, signed char b6, signed char b5, signed char b4,
0069     signed char b3, signed char b2, signed char b1, signed char b0) {
0070   int8_t __attribute__((aligned(16)))
0071   data[16] = {(int8_t)b0,  (int8_t)b1,  (int8_t)b2,  (int8_t)b3,
0072               (int8_t)b4,  (int8_t)b5,  (int8_t)b6,  (int8_t)b7,
0073               (int8_t)b8,  (int8_t)b9,  (int8_t)b10, (int8_t)b11,
0074               (int8_t)b12, (int8_t)b13, (int8_t)b14, (int8_t)b15};
0075   return (__m128i)vld1q_s8(data);
0076 }
0077 #endif  // __aarch64__
0078 
0079 #endif  // ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_