Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-25 10:27:24

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_COMMON_SET_H_
0029 #define VC_COMMON_SET_H_
0030 
0031 #include "macros.h"
0032 namespace Vc_VERSIONED_NAMESPACE
0033 {
0034 namespace
0035 {
0036     static Vc_INTRINSIC Vc_CONST __m128i set(unsigned short x0, unsigned short x1, unsigned short x2, unsigned short x3,
0037             unsigned short x4, unsigned short x5, unsigned short x6, unsigned short x7)
0038     {
0039 #if defined(Vc_GNU_ASM)
0040 #if 0 // defined(__x86_64__)
0041         // it appears that the 32bit variant is always faster
0042         __m128i r;
0043         unsigned long long tmp0 = x3; tmp0 = (tmp0 << 16) | x2;
0044         unsigned long long tmp1 = x1; tmp1 = (tmp1 << 16) | x0;
0045         asm("vmovq %1,%0" : "=x"(r) : "r"((tmp0 << 32) | tmp1));
0046         unsigned long long tmp2 = x7; tmp2 = (tmp2 << 16) | x6;
0047         unsigned long long tmp3 = x5; tmp3 = (tmp3 << 16) | x4;
0048         asm("vpinsrq $1,%1,%0,%0" : "+x"(r) : "r"((tmp2 << 32) | tmp3));
0049         return r;
0050 #elif defined(Vc_USE_VEX_CODING)
0051         __m128i r0, r1;
0052         unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
0053         unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
0054         unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
0055         unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
0056         asm("vmovd %1,%0" : "=x"(r0) : "r"(tmp0));
0057         asm("vpinsrd $1,%1,%0,%0" : "+x"(r0) : "r"(tmp1));
0058         asm("vmovd %1,%0" : "=x"(r1) : "r"(tmp2));
0059         asm("vpinsrd $1,%1,%0,%0" : "+x"(r1) : "r"(tmp3));
0060         asm("vpunpcklqdq %1,%0,%0" : "+x"(r0) : "x"(r1));
0061         return r0;
0062 #else
0063         __m128i r0, r1;
0064         unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
0065         unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
0066         unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
0067         unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
0068         asm("movd %1,%0" : "=x"(r0) : "r"(tmp0));
0069         asm("pinsrd $1,%1,%0" : "+x"(r0) : "r"(tmp1));
0070         asm("movd %1,%0" : "=x"(r1) : "r"(tmp2));
0071         asm("pinsrd $1,%1,%0" : "+x"(r1) : "r"(tmp3));
0072         asm("punpcklqdq %1,%0" : "+x"(r0) : "x"(r1));
0073         return r0;
0074 #endif
0075 #else
0076         unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
0077         unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
0078         unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
0079         unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
0080         return _mm_setr_epi32(tmp0, tmp1, tmp2, tmp3);
0081 #endif
0082     }
0083     static Vc_INTRINSIC Vc_CONST __m128i set(short x0, short x1, short x2, short x3, short x4, short x5, short x6, short x7)
0084     {
0085         return set(static_cast<unsigned short>(x0), static_cast<unsigned short>(x1), static_cast<unsigned short>(x2),
0086                 static_cast<unsigned short>(x3), static_cast<unsigned short>(x4), static_cast<unsigned short>(x5),
0087                 static_cast<unsigned short>(x6), static_cast<unsigned short>(x7));
0088     }
0089 }  // anonymous namespace
0090 }  // namespace Vc
0091 
0092 #endif // VC_COMMON_SET_H_