Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:25:46

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
0029 #define VC_COMMON_GATHERIMPLEMENTATION_H_
0030 
0031 #include "macros.h"
0032 
0033 namespace Vc_VERSIONED_NAMESPACE
0034 {
0035 namespace Common
0036 {
0037 
0038 enum class GatherScatterImplementation : int {
0039     SimpleLoop,
0040     SetIndexZero,
0041     BitScanLoop,
0042     PopcntSwitch
0043 };
0044 
0045 using SimpleLoopT   = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
0046 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
0047 using BitScanLoopT  = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
0048 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
0049 
0050 template <typename V, typename MT, typename IT>
0051 Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT,
0052                                     V &v,
0053                                     const MT *mem,
0054                                     IT &&indexes_,
0055                                     typename V::MaskArgument mask)
0056 {
0057     auto indexes = std::forward<IT>(indexes_);
0058     indexes.setZeroInverted(static_cast<decltype(!indexes)>(mask));
0059     const V tmp(mem, indexes);
0060     where(mask) | v = tmp;
0061 }
0062 
0063 template <typename V, typename MT, typename IT>
0064 Vc_ALWAYS_INLINE void executeGather(SimpleLoopT, V &v, const MT *mem, const IT &indexes,
0065                                     const typename V::MaskArgument mask)
0066 {
0067     if (Vc_IS_UNLIKELY(mask.isEmpty())) {
0068         return;
0069     }
0070 #if defined Vc_GCC && Vc_GCC >= 0x40900
0071     // GCC 4.8 doesn't support dependent type and constexpr vector_size argument
0072     constexpr std::size_t Sizeof = sizeof(V);
0073     using Builtin [[gnu::vector_size(Sizeof)]] = typename V::value_type;
0074     Builtin tmp = reinterpret_cast<Builtin>(v.data());
0075     Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
0076         if (mask[i]) {
0077             tmp[i] = mem[indexes[i]];
0078         }
0079     });
0080     v.data() = reinterpret_cast<typename V::VectorType>(tmp);
0081 #else
0082     Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
0083         if (mask[i])
0084             v[i] = mem[indexes[i]];
0085     });
0086 #endif
0087 }
0088 
0089 template <typename V, typename MT, typename IT>
0090 Vc_ALWAYS_INLINE void executeGather(BitScanLoopT,
0091                                     V &v,
0092                                     const MT *mem,
0093                                     const IT &indexes,
0094                                     typename V::MaskArgument mask)
0095 {
0096 #ifdef Vc_GNU_ASM
0097     size_t bits = mask.toInt();
0098     while (Vc_IS_LIKELY(bits > 0)) {
0099         size_t i, j;
0100         asm("bsf %[bits],%[i]\n\t"
0101             "bsr %[bits],%[j]\n\t"
0102             "btr %[i],%[bits]\n\t"
0103             "btr %[j],%[bits]\n\t"
0104             : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
0105         v[i] = mem[indexes[i]];
0106         v[j] = mem[indexes[j]];
0107     }
0108 #else
0109     // Alternative from Vc::SSE (0.7)
0110     int bits = mask.toInt();
0111     while (bits) {
0112         const int i = _bit_scan_forward(bits);
0113     bits &= bits - 1;
0114     v[i] = mem[indexes[i]];
0115     }
0116 #endif  // Vc_GNU_ASM
0117 }
0118 
0119 template <typename V, typename MT, typename IT>
0120 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
0121                                     V &v,
0122                                     const MT *mem,
0123                                     const IT &indexes,
0124                                     typename V::MaskArgument mask,
0125                                     enable_if<V::Size == 16> = nullarg)
0126 {
0127     unsigned int bits = mask.toInt();
0128     unsigned int low, high = 0;
0129     switch (Vc::Detail::popcnt16(bits)) {
0130     case 16:
0131         v.gather(mem, indexes);
0132         break;
0133     case 15:
0134         low = _bit_scan_forward(bits);
0135         bits ^= 1 << low;
0136         v[low] = mem[indexes[low]];
0137         // fallthrough
0138     case 14:
0139         high = _bit_scan_reverse(bits);
0140         v[high] = mem[indexes[high]];
0141         high = (1 << high);
0142         // fallthrough
0143     case 13:
0144         low = _bit_scan_forward(bits);
0145         bits ^= high | (1 << low);
0146         v[low] = mem[indexes[low]];
0147         // fallthrough
0148     case 12:
0149         high = _bit_scan_reverse(bits);
0150         v[high] = mem[indexes[high]];
0151         high = (1 << high);
0152         // fallthrough
0153     case 11:
0154         low = _bit_scan_forward(bits);
0155         bits ^= high | (1 << low);
0156         v[low] = mem[indexes[low]];
0157         // fallthrough
0158     case 10:
0159         high = _bit_scan_reverse(bits);
0160         v[high] = mem[indexes[high]];
0161         high = (1 << high);
0162         // fallthrough
0163     case 9:
0164         low = _bit_scan_forward(bits);
0165         bits ^= high | (1 << low);
0166         v[low] = mem[indexes[low]];
0167         // fallthrough
0168     case 8:
0169         high = _bit_scan_reverse(bits);
0170         v[high] = mem[indexes[high]];
0171         high = (1 << high);
0172         // fallthrough
0173     case 7:
0174         low = _bit_scan_forward(bits);
0175         bits ^= high | (1 << low);
0176         v[low] = mem[indexes[low]];
0177         // fallthrough
0178     case 6:
0179         high = _bit_scan_reverse(bits);
0180         v[high] = mem[indexes[high]];
0181         high = (1 << high);
0182         // fallthrough
0183     case 5:
0184         low = _bit_scan_forward(bits);
0185         bits ^= high | (1 << low);
0186         v[low] = mem[indexes[low]];
0187         // fallthrough
0188     case 4:
0189         high = _bit_scan_reverse(bits);
0190         v[high] = mem[indexes[high]];
0191         high = (1 << high);
0192         // fallthrough
0193     case 3:
0194         low = _bit_scan_forward(bits);
0195         bits ^= high | (1 << low);
0196         v[low] = mem[indexes[low]];
0197         // fallthrough
0198     case 2:
0199         high = _bit_scan_reverse(bits);
0200         v[high] = mem[indexes[high]];
0201         // fallthrough
0202     case 1:
0203         low = _bit_scan_forward(bits);
0204         v[low] = mem[indexes[low]];
0205         // fallthrough
0206     case 0:
0207         break;
0208     }
0209 }
0210 template <typename V, typename MT, typename IT>
0211 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
0212                                     V &v,
0213                                     const MT *mem,
0214                                     const IT &indexes,
0215                                     typename V::MaskArgument mask,
0216                                     enable_if<V::Size == 8> = nullarg)
0217 {
0218     unsigned int bits = mask.toInt();
0219     unsigned int low, high = 0;
0220     switch (Vc::Detail::popcnt8(bits)) {
0221     case 8:
0222         v.gather(mem, indexes);
0223         break;
0224     case 7:
0225         low = _bit_scan_forward(bits);
0226         bits ^= 1 << low;
0227         v[low] = mem[indexes[low]];
0228         // fallthrough
0229     case 6:
0230         high = _bit_scan_reverse(bits);
0231         v[high] = mem[indexes[high]];
0232         high = (1 << high);
0233         // fallthrough
0234     case 5:
0235         low = _bit_scan_forward(bits);
0236         bits ^= high | (1 << low);
0237         v[low] = mem[indexes[low]];
0238         // fallthrough
0239     case 4:
0240         high = _bit_scan_reverse(bits);
0241         v[high] = mem[indexes[high]];
0242         high = (1 << high);
0243         // fallthrough
0244     case 3:
0245         low = _bit_scan_forward(bits);
0246         bits ^= high | (1 << low);
0247         v[low] = mem[indexes[low]];
0248         // fallthrough
0249     case 2:
0250         high = _bit_scan_reverse(bits);
0251         v[high] = mem[indexes[high]];
0252         // fallthrough
0253     case 1:
0254         low = _bit_scan_forward(bits);
0255         v[low] = mem[indexes[low]];
0256         // fallthrough
0257     case 0:
0258         break;
0259     }
0260 }
0261 template <typename V, typename MT, typename IT>
0262 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
0263                                     V &v,
0264                                     const MT *mem,
0265                                     const IT &indexes,
0266                                     typename V::MaskArgument mask,
0267                                     enable_if<V::Size == 4> = nullarg)
0268 {
0269     unsigned int bits = mask.toInt();
0270     unsigned int low, high = 0;
0271     switch (Vc::Detail::popcnt4(bits)) {
0272     case 4:
0273         v.gather(mem, indexes);
0274         break;
0275     case 3:
0276         low = _bit_scan_forward(bits);
0277         bits ^= 1 << low;
0278         v[low] = mem[indexes[low]];
0279         // fallthrough
0280     case 2:
0281         high = _bit_scan_reverse(bits);
0282         v[high] = mem[indexes[high]];
0283         // fallthrough
0284     case 1:
0285         low = _bit_scan_forward(bits);
0286         v[low] = mem[indexes[low]];
0287         // fallthrough
0288     case 0:
0289         break;
0290     }
0291 }
0292 template <typename V, typename MT, typename IT>
0293 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
0294                                     V &v,
0295                                     const MT *mem,
0296                                     const IT &indexes,
0297                                     typename V::MaskArgument mask,
0298                                     enable_if<V::Size == 2> = nullarg)
0299 {
0300     unsigned int bits = mask.toInt();
0301     unsigned int low;
0302     switch (Vc::Detail::popcnt4(bits)) {
0303     case 2:
0304         v.gather(mem, indexes);
0305         break;
0306     case 1:
0307         low = _bit_scan_forward(bits);
0308         v[low] = mem[indexes[low]];
0309         // fallthrough
0310     case 0:
0311         break;
0312     }
0313 }
0314 
0315 }  // namespace Common
0316 }  // namespace Vc
0317 
0318 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_