File indexing completed on 2025-01-18 09:30:03
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
0012 #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
0013
0014 #include <algorithm>
0015
0016 #include <boost/compute/types.hpp>
0017 #include <boost/compute/buffer.hpp>
0018 #include <boost/compute/kernel.hpp>
0019 #include <boost/compute/context.hpp>
0020 #include <boost/compute/program.hpp>
0021 #include <boost/compute/command_queue.hpp>
0022 #include <boost/compute/algorithm/transform.hpp>
0023 #include <boost/compute/detail/iterator_range_size.hpp>
0024 #include <boost/compute/utility/program_cache.hpp>
0025 #include <boost/compute/container/vector.hpp>
0026 #include <boost/compute/iterator/discard_iterator.hpp>
0027
0028 namespace boost {
0029 namespace compute {
0030
0031
0032
0033 template<class T = uint_>
0034 class threefry_engine
0035 {
0036 public:
0037 typedef T result_type;
0038 static const ulong_ default_seed = 0UL;
0039
0040
0041 explicit threefry_engine(command_queue &queue,
0042 ulong_ value = default_seed)
0043 : m_key(value),
0044 m_counter(0),
0045 m_context(queue.get_context())
0046 {
0047
0048 load_program();
0049 }
0050
0051
0052 threefry_engine(const threefry_engine<T> &other)
0053 : m_key(other.m_key),
0054 m_counter(other.m_counter),
0055 m_context(other.m_context),
0056 m_program(other.m_program)
0057 {
0058 }
0059
0060
0061 threefry_engine<T>& operator=(const threefry_engine<T> &other)
0062 {
0063 if(this != &other){
0064 m_key = other.m_key;
0065 m_counter = other.m_counter;
0066 m_context = other.m_context;
0067 m_program = other.m_program;
0068 }
0069
0070 return *this;
0071 }
0072
0073
0074 ~threefry_engine()
0075 {
0076 }
0077
0078
0079
0080
0081
0082
0083
0084 void seed(ulong_ value, command_queue &queue)
0085 {
0086 (void) queue;
0087 m_key = value;
0088
0089 m_counter = 0;
0090 }
0091
0092
0093 void seed(command_queue &queue)
0094 {
0095 seed(default_seed, queue);
0096 }
0097
0098
0099 template<class OutputIterator>
0100 void generate(OutputIterator first, OutputIterator last, command_queue &queue)
0101 {
0102 const size_t size = detail::iterator_range_size(first, last);
0103
0104 kernel fill_kernel(m_program, "fill");
0105 fill_kernel.set_arg(0, first.get_buffer());
0106 fill_kernel.set_arg(1, static_cast<const uint_>(size));
0107 fill_kernel.set_arg(2, m_key);
0108 fill_kernel.set_arg(3, m_counter);
0109
0110 queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0);
0111
0112 discard(size, queue);
0113 }
0114
0115
0116 void generate(discard_iterator first, discard_iterator last, command_queue &queue)
0117 {
0118 (void) queue;
0119 ulong_ offset = std::distance(first, last);
0120 m_counter += offset;
0121 }
0122
0123
0124
0125 template<class OutputIterator, class Function>
0126 void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
0127 {
0128 vector<T> tmp(std::distance(first, last), queue.get_context());
0129 generate(tmp.begin(), tmp.end(), queue);
0130 ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue);
0131 }
0132
0133
0134 void discard(size_t z, command_queue &queue)
0135 {
0136 generate(discard_iterator(0), discard_iterator(z), queue);
0137 }
0138
0139 private:
0140 void load_program()
0141 {
0142 boost::shared_ptr<program_cache> cache =
0143 program_cache::get_global_cache(m_context);
0144 std::string cache_key =
0145 std::string("__boost_threefry_engine_32x2");
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176 const char source[] =
0177 "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
0178 "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
0179
0180 "enum r123_enum_threefry32x2 {\n"
0181 " R_32x2_0_0=13,\n"
0182 " R_32x2_1_0=15,\n"
0183 " R_32x2_2_0=26,\n"
0184 " R_32x2_3_0= 6,\n"
0185 " R_32x2_4_0=17,\n"
0186 " R_32x2_5_0=29,\n"
0187 " R_32x2_6_0=16,\n"
0188 " R_32x2_7_0=24\n"
0189 "};\n"
0190
0191 "static uint RotL_32(uint x, uint N)\n"
0192 "{\n"
0193 " return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
0194 "}\n"
0195
0196 "struct r123array2x32 {\n"
0197 " uint v[2];\n"
0198 "};\n"
0199 "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
0200 "typedef struct r123array2x32 threefry2x32_key_t;\n"
0201
0202 "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
0203 "{\n"
0204 " threefry2x32_ctr_t X;\n"
0205 " uint ks[3];\n"
0206 " uint i; \n"
0207 " ks[2] = SKEIN_KS_PARITY_32;\n"
0208 " for (i=0;i < 2; i++) {\n"
0209 " ks[i] = k.v[i];\n"
0210 " X.v[i] = in.v[i];\n"
0211 " ks[2] ^= k.v[i];\n"
0212 " }\n"
0213 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0214 " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0215 " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0216 " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0217 " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0218 " if(Nrounds>3){\n"
0219 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0220 " X.v[1] += 1;\n"
0221 " }\n"
0222 " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0223 " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0224 " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0225 " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0226 " if(Nrounds>7){\n"
0227 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0228 " X.v[1] += 2;\n"
0229 " }\n"
0230 " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0231 " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0232 " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0233 " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0234 " if(Nrounds>11){\n"
0235 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0236 " X.v[1] += 3;\n"
0237 " }\n"
0238 " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0239 " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0240 " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0241 " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0242 " if(Nrounds>15){\n"
0243 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0244 " X.v[1] += 4;\n"
0245 " }\n"
0246 " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0247 " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0248 " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0249 " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0250 " if(Nrounds>19){\n"
0251 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0252 " X.v[1] += 5;\n"
0253 " }\n"
0254 " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0255 " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0256 " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0257 " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0258 " if(Nrounds>23){\n"
0259 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
0260 " X.v[1] += 6;\n"
0261 " }\n"
0262 " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
0263 " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
0264 " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
0265 " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
0266 " if(Nrounds>27){\n"
0267 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
0268 " X.v[1] += 7;\n"
0269 " }\n"
0270 " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
0271 " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
0272 " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
0273 " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
0274 " if(Nrounds>31){\n"
0275 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
0276 " X.v[1] += 8;\n"
0277 " }\n"
0278 " return X;\n"
0279 "}\n"
0280 "__kernel void fill(__global uint * output,\n"
0281 " const uint output_size,\n"
0282 " const uint2 key,\n"
0283 " const uint2 counter)\n"
0284 "{\n"
0285 " uint gid = get_global_id(0);\n"
0286 " threefry2x32_ctr_t c;\n"
0287 " c.v[0] = counter.x + gid;\n"
0288 " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n"
0289 "\n"
0290 " threefry2x32_key_t k = { {key.x, key.y} };\n"
0291 "\n"
0292 " threefry2x32_ctr_t result;\n"
0293 " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n"
0294 "\n"
0295 " if(gid < output_size/2)\n"
0296 " {\n"
0297 " output[2 * gid] = result.v[0];\n"
0298 " output[2 * gid + 1] = result.v[1];\n"
0299 " }\n"
0300 " else if(gid < (output_size+1)/2)\n"
0301 " output[2 * gid] = result.v[0];\n"
0302 "}\n";
0303
0304 m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
0305 }
0306
0307
0308 ulong_ m_key;
0309 ulong_ m_counter;
0310
0311 context m_context;
0312 program m_program;
0313 };
0314
0315 }
0316 }
0317
0318 #endif