Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:00:11

0001 /**
0002  * Copyright (c) 2017-present, Facebook, Inc.
0003  * All rights reserved.
0004  *
0005  * This source code is licensed under the BSD-style license found in the
0006  * LICENSE file in the root directory of this source tree.
0007  */
0008 
0009 #pragma once
0010 
0011 #include "gloo/types.h"
0012 
0013 namespace gloo {
0014 
0015 template <typename T>
0016 void sum(void* c_, const void* a_, const void* b_, size_t n) {
0017   T* c = static_cast<T*>(c_);
0018   const T* a = static_cast<const T*>(a_);
0019   const T* b = static_cast<const T*>(b_);
0020   for (auto i = 0; i < n; i++) {
0021     c[i] = a[i] + b[i];
0022   }
0023 }
0024 
0025 template <typename T>
0026 void sum(T* a, const T* b, size_t n) {
0027   sum<T>(a, a, b, n);
0028 }
0029 
0030 template <typename T>
0031 void product(void* c_, const void* a_, const void* b_, size_t n) {
0032   T* c = static_cast<T*>(c_);
0033   const T* a = static_cast<const T*>(a_);
0034   const T* b = static_cast<const T*>(b_);
0035   for (auto i = 0; i < n; i++) {
0036     c[i] = a[i] * b[i];
0037   }
0038 }
0039 
0040 template <typename T>
0041 void product(T* a, const T* b, size_t n) {
0042   product<T>(a, a, b, n);
0043 }
0044 
0045 template <typename T>
0046 void max(void* c_, const void* a_, const void* b_, size_t n) {
0047   T* c = static_cast<T*>(c_);
0048   const T* a = static_cast<const T*>(a_);
0049   const T* b = static_cast<const T*>(b_);
0050   for (auto i = 0; i < n; i++) {
0051     c[i] = std::max(a[i], b[i]);
0052   }
0053 }
0054 
0055 template <typename T>
0056 void max(T* a, const T* b, size_t n) {
0057   max<T>(a, a, b, n);
0058 }
0059 
0060 template <typename T>
0061 void min(void* c_, const void* a_, const void* b_, size_t n) {
0062   T* c = static_cast<T*>(c_);
0063   const T* a = static_cast<const T*>(a_);
0064   const T* b = static_cast<const T*>(b_);
0065   for (auto i = 0; i < n; i++) {
0066     c[i] = std::min(a[i], b[i]);
0067   }
0068 }
0069 
0070 template <typename T>
0071 void min(T* a, const T* b, size_t n) {
0072   min<T>(a, a, b, n);
0073 }
0074 
0075 template <typename T>
0076 T roundUp(T value, T multiple) {
0077   T remainder = value % multiple;
0078   if (remainder == 0) {
0079     return value;
0080   }
0081   return value + multiple - remainder;
0082 }
0083 
0084 inline uint32_t log2ceil(uint32_t value) {
0085   uint32_t dim = 0;
0086 #if defined(__GNUC__)
0087   if (value <= 1)
0088     return 0;
0089   dim = 32 - __builtin_clz(value - 1);
0090 #else
0091   for (uint32_t size = 1; size < value; ++dim, size <<= 1)  /* empty */;
0092 #endif // defined(__GNUC__)
0093   return dim;
0094 }
0095 
0096 #if GLOO_USE_AVX
0097 
0098 template <>
0099 void sum<float16>(void* c, const void* a, const void* b, size_t n);
0100 extern template void
0101 sum<float16>(void* c, const void* a, const void* b, size_t n);
0102 
0103 template <>
0104 void product<float16>(void* c, const void* a, const void* b, size_t n);
0105 extern template void
0106 product<float16>(void* c, const void* a, const void* b, size_t n);
0107 
0108 template <>
0109 void max<float16>(void* c, const void* a, const void* b, size_t n);
0110 extern template void
0111 max<float16>(void* c, const void* a, const void* b, size_t n);
0112 
0113 template <>
0114 void min<float16>(void* c, const void* a, const void* b, size_t n);
0115 extern template void
0116 min<float16>(void* c, const void* a, const void* b, size_t n);
0117 
0118 #endif
0119 
0120 } // namespace gloo