Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:25:46

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_COMMON_INTERLEAVEDMEMORY_H_
0029 #define VC_COMMON_INTERLEAVEDMEMORY_H_
0030 
0031 #include "macros.h"
0032 
0033 namespace Vc_VERSIONED_NAMESPACE
0034 {
0035 namespace Common
0036 {
0037 /**
0038  * \internal
0039  */
0040 template<typename V, typename I, bool Readonly> struct InterleavedMemoryAccessBase
0041 {
0042     // Partial specialization doesn't work for functions without partial specialization of the whole
0043     // class. Therefore we capture the contents of InterleavedMemoryAccessBase in a macro to easily
0044     // copy it into its specializations.
0045     typedef typename std::conditional<
0046         Readonly, typename std::add_const<typename V::EntryType>::type,
0047         typename V::EntryType>::type T;
0048     typedef typename V::AsArg VArg;
0049     typedef T Ta Vc_MAY_ALIAS;
0050     const I m_indexes;
0051     Ta *const m_data;
0052 
0053     Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
0054         : m_indexes(indexes), m_data(data)
0055     {
0056     }
0057 
0058     // implementations of the following are in {scalar,sse,avx}/detail.h
0059     template <typename... Vs> Vc_INTRINSIC void deinterleave(Vs &&... vs) const
0060     {
0061         Impl::deinterleave(m_data, m_indexes, std::forward<Vs>(vs)...);
0062     }
0063 
0064 protected:
0065     using Impl = Vc::Detail::InterleaveImpl<V, V::Size, sizeof(V)>;
0066 
0067     template <typename T, std::size_t... Indexes>
0068     Vc_INTRINSIC void callInterleave(T &&a, index_sequence<Indexes...>)
0069     {
0070         Impl::interleave(m_data, m_indexes, a[Indexes]...);
0071     }
0072 };
0073 
0074 /**
0075  * \internal
0076  */
0077 // delay execution of the deinterleaving gather until operator=
0078 template <size_t StructSize, typename V, typename I = typename V::IndexType,
0079           bool Readonly>
0080 struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase<V, I, Readonly>
0081 {
0082     typedef InterleavedMemoryAccessBase<V, I, Readonly> Base;
0083     typedef typename Base::Ta Ta;
0084 
0085     Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(Ta *data, typename I::AsArg indexes)
0086         : Base(StructSize == 1u
0087                    ? indexes
0088                    : StructSize == 2u
0089                          ? indexes << 1
0090                          : StructSize == 4u
0091                                ? indexes << 2
0092                                : StructSize == 8u
0093                                      ? indexes << 3
0094                                      : StructSize == 16u ? indexes << 4
0095                                                          : indexes * I(int(StructSize)),
0096                data)
0097     {
0098     }
0099 
0100     template <typename T, std::size_t... Indexes>
0101     Vc_ALWAYS_INLINE T deinterleave_unpack(index_sequence<Indexes...>) const
0102     {
0103         T r;
0104         Base::Impl::deinterleave(this->m_data, this->m_indexes, std::get<Indexes>(r)...);
0105         return r;
0106     }
0107 
0108     template <typename T,
0109               typename = enable_if<(std::is_default_constructible<T>::value &&
0110                                     std::is_same<V, Traits::decay<decltype(std::get<0>(
0111                                                         std::declval<T &>()))>>::value)>>
0112     Vc_ALWAYS_INLINE operator T() const
0113     {
0114         return deinterleave_unpack<T>(make_index_sequence<std::tuple_size<T>::value>());
0115     }
0116 };
0117 
0118 ///\internal Runtime check (NDEBUG) for asserting unique indexes.
0119 template<typename I> struct CheckIndexesUnique
0120 {
0121 #ifdef NDEBUG
0122     static Vc_INTRINSIC void test(const I &) {}
0123 #else
0124     static void test(const I &indexes)
0125     {
0126         const I test = indexes.sorted();
0127         Vc_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty())
0128     }
0129 #endif
0130 };
0131 ///\internal For SuccessiveEntries there can never be a problem.
0132 template<size_t S> struct CheckIndexesUnique<SuccessiveEntries<S> >
0133 {
0134     static Vc_INTRINSIC void test(const SuccessiveEntries<S> &) {}
0135 };
0136 
0137 /**
0138  * \internal
0139  */
0140 template <size_t StructSize, typename V, typename I = typename V::IndexType>
0141 struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess<StructSize, V, I, false>
0142 {
0143     typedef InterleavedMemoryAccessBase<V, I, false> Base;
0144     typedef typename Base::Ta Ta;
0145 
0146     Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
0147         : InterleavedMemoryReadAccess<StructSize, V, I, false>(data, indexes)
0148     {
0149         CheckIndexesUnique<I>::test(indexes);
0150     }
0151 
0152     template <int N> Vc_ALWAYS_INLINE void operator=(VectorReferenceArray<N, V> &&rhs)
0153     {
0154         static_assert(N <= StructSize,
0155                       "You_are_trying_to_scatter_more_data_into_the_struct_than_it_has");
0156         this->callInterleave(std::move(rhs), make_index_sequence<N>());
0157     }
0158     template <int N> Vc_ALWAYS_INLINE void operator=(VectorReferenceArray<N, const V> &&rhs)
0159     {
0160         static_assert(N <= StructSize,
0161                       "You_are_trying_to_scatter_more_data_into_the_struct_than_it_has");
0162         this->callInterleave(std::move(rhs), make_index_sequence<N>());
0163     }
0164 };
0165 
0166 /**
0167  * Wraps a pointer to memory with convenience functions to access it via vectors.
0168  *
0169  * \param S The type of the struct.
0170  * \param V The type of the vector to be returned when read. This should reflect the type of the
0171  * members inside the struct.
0172  *
0173  * \see operator[]
0174  * \ingroup Containers
0175  * \headerfile interleavedmemory.h <Vc/Memory>
0176  */
0177 template<typename S, typename V> class InterleavedMemoryWrapper
0178 {
0179     typedef typename std::conditional<std::is_const<S>::value,
0180                                       const typename V::EntryType,
0181                                       typename V::EntryType>::type T;
0182     typedef typename V::IndexType I;
0183     typedef typename V::AsArg VArg;
0184     typedef const I &IndexType;
0185     static constexpr std::size_t StructSize = sizeof(S) / sizeof(T);
0186     using ReadAccess = InterleavedMemoryReadAccess<StructSize, V>;
0187     using Access =
0188         typename std::conditional<std::is_const<T>::value, ReadAccess,
0189                                   InterleavedMemoryAccess<StructSize, V>>::type;
0190     using ReadSuccessiveEntries =
0191         InterleavedMemoryReadAccess<StructSize, V, SuccessiveEntries<StructSize>>;
0192     using AccessSuccessiveEntries = typename std::conditional<
0193         std::is_const<T>::value, ReadSuccessiveEntries,
0194         InterleavedMemoryAccess<StructSize, V, SuccessiveEntries<StructSize>>>::type;
0195     typedef T Ta Vc_MAY_ALIAS;
0196     Ta *const m_data;
0197 
0198     static_assert(StructSize * sizeof(T) == sizeof(S),
0199                   "InterleavedMemoryAccess_does_not_support_packed_structs");
0200 
0201 public:
0202     /**
0203      * Constructs the wrapper object.
0204      *
0205      * \param s A pointer to a C-array.
0206      */
0207     Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s)
0208         : m_data(reinterpret_cast<Ta *>(s))
0209     {
0210     }
0211 
0212     /**
0213      * Interleaved scatter/gather access.
0214      *
0215      * Assuming you have a struct of floats and a vector of \p indexes into the array, this function
0216      * can be used to access the struct entries as vectors using the minimal number of store or load
0217      * instructions.
0218      *
0219      * \param indexes Vector of indexes that determine the gather locations.
0220      *
0221      * \return A special (magic) object that executes the loads and deinterleave on assignment to a
0222      * vector tuple.
0223      *
0224      * Example:
0225      * \code
0226      * struct Foo {
0227      *   float x, y, z;
0228      * };
0229      *
0230      * void fillWithBar(Foo *_data, uint_v indexes)
0231      * {
0232      *   Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
0233      *   const float_v x = bar(1);
0234      *   const float_v y = bar(2);
0235      *   const float_v z = bar(3);
0236      *   data[indexes] = (x, y, z);
0237      *   // it's also possible to just store a subset at the front of the struct:
0238      *   data[indexes] = (x, y);
0239      *   // if you want to store a single entry, use scatter:
0240      *   z.scatter(_data, &Foo::x, indexes);
0241      * }
0242      *
0243      * float_v normalizeStuff(Foo *_data, uint_v indexes)
0244      * {
0245      *   Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
0246      *   float_v x, y, z;
0247      *   (x, y, z) = data[indexes];
0248      *   // it is also possible to just load a subset from the front of the struct:
0249      *   // (x, y) = data[indexes];
0250      *   return Vc::sqrt(x * x + y * y + z * z);
0251      * }
0252      * \endcode
0253      *
0254      * You may think of the gather operation (or scatter as the inverse) like this:
0255 \verbatim
0256              Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8}
0257             indexes: [5, 0, 1, 7]
0258 Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7})
0259 \endverbatim
0260      *
0261      * \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If
0262      * \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique.
0263      */
0264     template <typename IT>
0265     Vc_ALWAYS_INLINE enable_if<!std::is_convertible<IT, size_t>::value &&
0266                                    std::is_convertible<IT, IndexType>::value &&
0267                                    !std::is_const<S>::value,
0268                                Access>
0269     operator[](IT indexes)
0270     {
0271         return Access(m_data, indexes);
0272     }
0273 
0274     /// const overload (gathers only) of the above function
0275     Vc_ALWAYS_INLINE ReadAccess operator[](IndexType indexes) const
0276     {
0277         return ReadAccess(m_data, indexes);
0278     }
0279 
0280     /// alias of the above function
0281     Vc_ALWAYS_INLINE ReadAccess gather(IndexType indexes) const { return operator[](indexes); }
0282 
0283     /**
0284      * Interleaved access.
0285      *
0286      * This function is an optimization of the function above, for cases where the index vector
0287      * contains consecutive values. It will load \p V::Size consecutive entries from memory and
0288      * deinterleave them into Vc vectors.
0289      *
0290      * \param first The first of \p V::Size indizes to be accessed.
0291      *
0292      * \return A special (magic) object that executes the loads and deinterleave on assignment to a
0293      * vector tuple.
0294      *
0295      * Example:
0296      * \code
0297      * struct Foo {
0298      *   float x, y, z;
0299      * };
0300      *
0301      * void foo(Foo *_data)
0302      * {
0303      *   Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
0304      *   for (size_t i = 0; i < 32U; i += float_v::Size) {
0305      *     float_v x, y, z;
0306      *     (x, y, z) = data[i];
0307      *     // now:
0308      *     // x = { _data[i].x, _data[i + 1].x, _data[i + 2].x, ... }
0309      *     // y = { _data[i].y, _data[i + 1].y, _data[i + 2].y, ... }
0310      *     // z = { _data[i].z, _data[i + 1].z, _data[i + 2].z, ... }
0311      *     ...
0312      *   }
0313      * }
0314      * \endcode
0315      */
0316     Vc_ALWAYS_INLINE ReadSuccessiveEntries operator[](size_t first) const
0317     {
0318         return ReadSuccessiveEntries(m_data, first);
0319     }
0320 
0321     Vc_ALWAYS_INLINE AccessSuccessiveEntries operator[](size_t first)
0322     {
0323         return AccessSuccessiveEntries(m_data, first);
0324     }
0325 
0326     //Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1);
0327 };
0328 }  // namespace Common
0329 
0330 using Common::InterleavedMemoryWrapper;
0331 
0332 /**
0333  * Creates an adapter around a given array of structure (AoS) that enables optimized loads
0334  * + deinterleaving operations / interleaving operations + stores for vector access (using
0335  * \p V).
0336  *
0337  * \tparam V The `Vc::Vector<T>` type to use per element of the structure.
0338  * \param s A pointer to an array of structures containing data members of type `T`.
0339  *
0340  * \see Vc::Common::InterleavedMemoryWrapper
0341  *
0342  * \todo Support destructuring via structured bindings.
0343  */
0344 template <typename V, typename S>
0345 inline Common::InterleavedMemoryWrapper<S, V> make_interleave_wrapper(S *s)
0346 {
0347     return Common::InterleavedMemoryWrapper<S, V>(s);
0348 }
0349 }  // namespace Vc
0350 
0351 #endif // VC_COMMON_INTERLEAVEDMEMORY_H_