Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:30:03

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
0012 #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
0013 
0014 #include <algorithm>
0015 
0016 #include <boost/compute/types.hpp>
0017 #include <boost/compute/buffer.hpp>
0018 #include <boost/compute/kernel.hpp>
0019 #include <boost/compute/context.hpp>
0020 #include <boost/compute/program.hpp>
0021 #include <boost/compute/command_queue.hpp>
0022 #include <boost/compute/algorithm/transform.hpp>
0023 #include <boost/compute/detail/iterator_range_size.hpp>
0024 #include <boost/compute/utility/program_cache.hpp>
0025 #include <boost/compute/container/vector.hpp>
0026 #include <boost/compute/iterator/discard_iterator.hpp>
0027 
0028 namespace boost {
0029 namespace compute {
0030 
0031 /// \class threefry_engine
0032 /// \brief Threefry pseudorandom number generator.
0033 template<class T = uint_>
0034 class threefry_engine
0035 {
0036 public:
0037     typedef T result_type;
0038     static const ulong_ default_seed = 0UL;
0039 
0040     /// Creates a new threefry_engine and seeds it with \p value.
0041     explicit threefry_engine(command_queue &queue,
0042                              ulong_ value = default_seed)
0043         : m_key(value),
0044           m_counter(0),
0045           m_context(queue.get_context())
0046     {
0047         // Load program
0048         load_program();
0049     }
0050 
0051     /// Creates a new threefry_engine object as a copy of \p other.
0052     threefry_engine(const threefry_engine<T> &other)
0053         : m_key(other.m_key),
0054           m_counter(other.m_counter),
0055           m_context(other.m_context),
0056           m_program(other.m_program)
0057     {
0058     }
0059 
0060     /// Copies \p other to \c *this.
0061     threefry_engine<T>& operator=(const threefry_engine<T> &other)
0062     {
0063         if(this != &other){
0064             m_key = other.m_key;
0065             m_counter = other.m_counter;
0066             m_context = other.m_context;
0067             m_program = other.m_program;
0068         }
0069 
0070         return *this;
0071     }
0072 
0073     /// Destroys the threefry_engine object.
0074     ~threefry_engine()
0075     {
0076     }
0077 
0078     /// Seeds the random number generator with \p value.
0079     ///
0080     /// \param value seed value for the random-number generator
0081     /// \param queue command queue to perform the operation
0082     ///
0083     /// If no seed value is provided, \c default_seed is used.
0084     void seed(ulong_ value, command_queue &queue)
0085     {
0086         (void) queue;
0087         m_key = value;
0088         // Reset counter
0089         m_counter = 0;
0090     }
0091 
0092     /// \overload
0093     void seed(command_queue &queue)
0094     {
0095         seed(default_seed, queue);
0096     }
0097 
0098     /// Generates random numbers and stores them to the range [\p first, \p last).
0099     template<class OutputIterator>
0100     void generate(OutputIterator first, OutputIterator last, command_queue &queue)
0101     {
0102         const size_t size = detail::iterator_range_size(first, last);
0103 
0104         kernel fill_kernel(m_program, "fill");
0105         fill_kernel.set_arg(0, first.get_buffer());
0106         fill_kernel.set_arg(1, static_cast<const uint_>(size));
0107         fill_kernel.set_arg(2, m_key);
0108         fill_kernel.set_arg(3, m_counter);
0109 
0110         queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0);
0111 
0112         discard(size, queue);
0113     }
0114 
0115     /// \internal_
0116     void generate(discard_iterator first, discard_iterator last, command_queue &queue)
0117     {
0118         (void) queue;
0119         ulong_ offset = std::distance(first, last);
0120         m_counter += offset;
0121     }
0122 
0123     /// Generates random numbers, transforms them with \p op, and then stores
0124     /// them to the range [\p first, \p last).
0125     template<class OutputIterator, class Function>
0126     void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
0127     {
0128         vector<T> tmp(std::distance(first, last), queue.get_context());
0129         generate(tmp.begin(), tmp.end(), queue);
0130         ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue);
0131     }
0132 
0133     /// Generates \p z random numbers and discards them.
0134     void discard(size_t z, command_queue &queue)
0135     {
0136         generate(discard_iterator(0), discard_iterator(z), queue);
0137     }
0138 
0139 private:
0140     void load_program()
0141     {
0142         boost::shared_ptr<program_cache> cache =
0143             program_cache::get_global_cache(m_context);
0144         std::string cache_key =
0145             std::string("__boost_threefry_engine_32x2");
0146 
0147         // Copyright 2010-2012, D. E. Shaw Research.
0148         // All rights reserved.
0149 
0150         // Redistribution and use in source and binary forms, with or without
0151         // modification, are permitted provided that the following conditions are
0152         // met:
0153 
0154         // * Redistributions of source code must retain the above copyright
0155         //   notice, this list of conditions, and the following disclaimer.
0156 
0157         // * Redistributions in binary form must reproduce the above copyright
0158         //   notice, this list of conditions, and the following disclaimer in the
0159         //   documentation and/or other materials provided with the distribution.
0160 
0161         // * Neither the name of D. E. Shaw Research nor the names of its
0162         //   contributors may be used to endorse or promote products derived from
0163         //   this software without specific prior written permission.
0164 
0165         // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0166         // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0167         // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0168         // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0169         // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0170         // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0171         // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0172         // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0173         // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0174         // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0175         // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0176         const char source[] =
0177             "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
0178             "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
0179 
0180             "enum r123_enum_threefry32x2 {\n"
0181             "    R_32x2_0_0=13,\n"
0182             "    R_32x2_1_0=15,\n"
0183             "    R_32x2_2_0=26,\n"
0184             "    R_32x2_3_0= 6,\n"
0185             "    R_32x2_4_0=17,\n"
0186             "    R_32x2_5_0=29,\n"
0187             "    R_32x2_6_0=16,\n"
0188             "    R_32x2_7_0=24\n"
0189             "};\n"
0190 
0191             "static uint RotL_32(uint x, uint N)\n"
0192             "{\n"
0193             "    return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
0194             "}\n"
0195 
0196             "struct r123array2x32 {\n"
0197             "    uint v[2];\n"
0198             "};\n"
0199             "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
0200             "typedef struct r123array2x32 threefry2x32_key_t;\n"
0201 
0202             "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
0203             "{\n"
0204             "    threefry2x32_ctr_t X;\n"
0205             "    uint ks[3];\n"
0206             "    uint  i; \n"
0207             "    ks[2] =  SKEIN_KS_PARITY_32;\n"
0208             "    for (i=0;i < 2; i++) {\n"
0209             "        ks[i] = k.v[i];\n"
0210             "        X.v[i]  = in.v[i];\n"
0211             "        ks[2] ^= k.v[i];\n"
0212             "    }\n"
0213             "    X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0214             "    if(Nrounds>0){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0215             "    if(Nrounds>1){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0216             "    if(Nrounds>2){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0217             "    if(Nrounds>3){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0218             "    if(Nrounds>3){\n"
0219             "        X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0220             "        X.v[1] += 1;\n"
0221             "    }\n"
0222             "    if(Nrounds>4){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0223             "    if(Nrounds>5){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0224             "    if(Nrounds>6){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0225             "    if(Nrounds>7){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0226             "    if(Nrounds>7){\n"
0227             "        X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0228             "        X.v[1] += 2;\n"
0229             "    }\n"
0230             "    if(Nrounds>8){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0231             "    if(Nrounds>9){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0232             "    if(Nrounds>10){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0233             "    if(Nrounds>11){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0234             "    if(Nrounds>11){\n"
0235             "        X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0236             "        X.v[1] += 3;\n"
0237             "    }\n"
0238             "    if(Nrounds>12){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0239             "    if(Nrounds>13){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0240             "    if(Nrounds>14){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0241             "    if(Nrounds>15){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0242             "    if(Nrounds>15){\n"
0243             "        X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0244             "        X.v[1] += 4;\n"
0245             "    }\n"
0246             "    if(Nrounds>16){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0247             "    if(Nrounds>17){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0248             "    if(Nrounds>18){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0249             "    if(Nrounds>19){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0250             "    if(Nrounds>19){\n"
0251             "        X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0252             "        X.v[1] += 5;\n"
0253             "    }\n"
0254             "    if(Nrounds>20){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0255             "    if(Nrounds>21){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0256             "    if(Nrounds>22){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0257             "    if(Nrounds>23){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0258             "    if(Nrounds>23){\n"
0259             "        X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0260             "        X.v[1] += 6;\n"
0261             "    }\n"
0262             "    if(Nrounds>24){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0263             "    if(Nrounds>25){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0264             "    if(Nrounds>26){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0265             "    if(Nrounds>27){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0266             "    if(Nrounds>27){\n"
0267             "        X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0268             "        X.v[1] += 7;\n"
0269             "    }\n"
0270             "    if(Nrounds>28){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0271             "    if(Nrounds>29){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0272             "    if(Nrounds>30){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0273             "    if(Nrounds>31){  X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0274             "    if(Nrounds>31){\n"
0275             "        X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0276             "        X.v[1] += 8;\n"
0277             "    }\n"
0278             "    return X;\n"
0279             "}\n"
0280             "__kernel void fill(__global uint * output,\n"
0281             "                   const uint output_size,\n"
0282             "                   const uint2 key,\n"
0283             "                   const uint2 counter)\n"
0284             "{\n"
0285             "    uint gid = get_global_id(0);\n"
0286             "    threefry2x32_ctr_t c;\n"
0287             "    c.v[0] = counter.x + gid;\n"
0288             "    c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n"
0289             "\n"
0290             "    threefry2x32_key_t k = { {key.x, key.y} };\n"
0291             "\n"
0292             "    threefry2x32_ctr_t result;\n"
0293             "    result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n"
0294             "\n"
0295             "    if(gid < output_size/2)\n"
0296             "    {\n"
0297             "       output[2 * gid] = result.v[0];\n"
0298             "       output[2 * gid + 1] = result.v[1];\n"
0299             "    }\n"
0300             "    else if(gid < (output_size+1)/2)\n"
0301             "       output[2 * gid] = result.v[0];\n"
0302             "}\n";
0303 
0304         m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
0305     }
0306 
0307     // Engine state
0308     ulong_ m_key; // 2 x 32bit
0309     ulong_ m_counter;
0310     // OpenCL
0311     context m_context;
0312     program m_program;
0313 };
0314 
0315 } // end compute namespace
0316 } // end boost namespace
0317 
0318 #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP