File indexing completed on 2024-11-15 09:58:34
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef __TBB_parallel_scan_H
0018 #define __TBB_parallel_scan_H
0019
0020 #define __TBB_parallel_scan_H_include_area
0021 #include "internal/_warning_suppress_enable_notice.h"
0022
0023 #include "task.h"
0024 #include "aligned_space.h"
0025 #include <new>
0026 #include "partitioner.h"
0027
0028 namespace tbb {
0029
0030
0031
0032 struct pre_scan_tag {
0033 static bool is_final_scan() {return false;}
0034 operator bool() {return is_final_scan();}
0035 };
0036
0037
0038
0039 struct final_scan_tag {
0040 static bool is_final_scan() {return true;}
0041 operator bool() {return is_final_scan();}
0042 };
0043
0044
0045 namespace internal {
0046
0047
0048
0049 template<typename Range, typename Body>
0050 class final_sum: public task {
0051 public:
0052 Body my_body;
0053 private:
0054 aligned_space<Range> my_range;
0055
0056 Body* my_stuff_last;
0057 public:
0058 final_sum( Body& body_ ) :
0059 my_body(body_,split())
0060 {
0061 poison_pointer(my_stuff_last);
0062 }
0063 ~final_sum() {
0064 my_range.begin()->~Range();
0065 }
0066 void finish_construction( const Range& range_, Body* stuff_last_ ) {
0067 new( my_range.begin() ) Range(range_);
0068 my_stuff_last = stuff_last_;
0069 }
0070 private:
0071 task* execute() __TBB_override {
0072 my_body( *my_range.begin(), final_scan_tag() );
0073 if( my_stuff_last )
0074 my_stuff_last->assign(my_body);
0075 return NULL;
0076 }
0077 };
0078
0079
0080
0081 template<typename Range, typename Body>
0082 class sum_node: public task {
0083 typedef final_sum<Range,Body> final_sum_type;
0084 public:
0085 final_sum_type *my_incoming;
0086 final_sum_type *my_body;
0087 Body *my_stuff_last;
0088 private:
0089 final_sum_type *my_left_sum;
0090 sum_node *my_left;
0091 sum_node *my_right;
0092 bool my_left_is_final;
0093 Range my_range;
0094 sum_node( const Range range_, bool left_is_final_ ) :
0095 my_stuff_last(NULL),
0096 my_left_sum(NULL),
0097 my_left(NULL),
0098 my_right(NULL),
0099 my_left_is_final(left_is_final_),
0100 my_range(range_)
0101 {
0102
0103 poison_pointer(my_body);
0104 poison_pointer(my_incoming);
0105 }
0106 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
0107 if( !n ) {
0108 f.recycle_as_child_of( *this );
0109 f.finish_construction( range_, stuff_last_ );
0110 return &f;
0111 } else {
0112 n->my_body = &f;
0113 n->my_incoming = incoming_;
0114 n->my_stuff_last = stuff_last_;
0115 return n;
0116 }
0117 }
0118 task* execute() __TBB_override {
0119 if( my_body ) {
0120 if( my_incoming )
0121 my_left_sum->my_body.reverse_join( my_incoming->my_body );
0122 recycle_as_continuation();
0123 sum_node& c = *this;
0124 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
0125 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
0126 set_ref_count( (a!=NULL)+(b!=NULL) );
0127 my_body = NULL;
0128 if( a ) spawn(*b);
0129 else a = b;
0130 return a;
0131 } else {
0132 return NULL;
0133 }
0134 }
0135 template<typename Range_,typename Body_,typename Partitioner_>
0136 friend class start_scan;
0137
0138 template<typename Range_,typename Body_>
0139 friend class finish_scan;
0140 };
0141
0142
0143
0144 template<typename Range, typename Body>
0145 class finish_scan: public task {
0146 typedef sum_node<Range,Body> sum_node_type;
0147 typedef final_sum<Range,Body> final_sum_type;
0148 final_sum_type** const my_sum;
0149 sum_node_type*& my_return_slot;
0150 public:
0151 final_sum_type* my_right_zombie;
0152 sum_node_type& my_result;
0153
0154 task* execute() __TBB_override {
0155 __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
0156 if( my_result.my_left )
0157 my_result.my_left_is_final = false;
0158 if( my_right_zombie && my_sum )
0159 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
0160 __TBB_ASSERT( !my_return_slot, NULL );
0161 if( my_right_zombie || my_result.my_right ) {
0162 my_return_slot = &my_result;
0163 } else {
0164 destroy( my_result );
0165 }
0166 if( my_right_zombie && !my_sum && !my_result.my_right ) {
0167 destroy(*my_right_zombie);
0168 my_right_zombie = NULL;
0169 }
0170 return NULL;
0171 }
0172
0173 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
0174 my_sum(sum_),
0175 my_return_slot(return_slot_),
0176 my_right_zombie(NULL),
0177 my_result(result_)
0178 {
0179 __TBB_ASSERT( !my_return_slot, NULL );
0180 }
0181 };
0182
0183
0184
0185 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
0186 class start_scan: public task {
0187 typedef sum_node<Range,Body> sum_node_type;
0188 typedef final_sum<Range,Body> final_sum_type;
0189 final_sum_type* my_body;
0190
0191 final_sum_type** my_sum;
0192 sum_node_type** my_return_slot;
0193
0194 sum_node_type* my_parent_sum;
0195 bool my_is_final;
0196 bool my_is_right_child;
0197 Range my_range;
0198 typename Partitioner::partition_type my_partition;
0199 task* execute() __TBB_override ;
0200 public:
0201 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
0202 my_body(parent_.my_body),
0203 my_sum(parent_.my_sum),
0204 my_return_slot(&return_slot_),
0205 my_parent_sum(parent_sum_),
0206 my_is_final(parent_.my_is_final),
0207 my_is_right_child(false),
0208 my_range(parent_.my_range,split()),
0209 my_partition(parent_.my_partition,split())
0210 {
0211 __TBB_ASSERT( !*my_return_slot, NULL );
0212 }
0213
0214 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
0215 my_body(&body_),
0216 my_sum(NULL),
0217 my_return_slot(&return_slot_),
0218 my_parent_sum(NULL),
0219 my_is_final(true),
0220 my_is_right_child(false),
0221 my_range(range_),
0222 my_partition(partitioner_)
0223 {
0224 __TBB_ASSERT( !*my_return_slot, NULL );
0225 }
0226
0227 static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
0228 if( !range_.empty() ) {
0229 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
0230 internal::sum_node<Range,Body>* root = NULL;
0231 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
0232 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
0233 root,
0234 range_,
0235 *temp_body,
0236 partitioner_ );
0237 temp_body->my_body.reverse_join(body_);
0238 task::spawn_root_and_wait( pass1 );
0239 if( root ) {
0240 root->my_body = temp_body;
0241 root->my_incoming = NULL;
0242 root->my_stuff_last = &body_;
0243 task::spawn_root_and_wait( *root );
0244 } else {
0245 body_.assign(temp_body->my_body);
0246 temp_body->finish_construction( range_, NULL );
0247 temp_body->destroy(*temp_body);
0248 }
0249 }
0250 }
0251 };
0252
0253 template<typename Range, typename Body, typename Partitioner>
0254 task* start_scan<Range,Body,Partitioner>::execute() {
0255 typedef internal::finish_scan<Range,Body> finish_pass1_type;
0256 finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
0257
0258
0259
0260 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
0261 if( treat_as_stolen ) {
0262
0263 p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
0264 my_is_final = false;
0265 }
0266 task* next_task = NULL;
0267 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
0268 if( my_is_final )
0269 (my_body->my_body)( my_range, final_scan_tag() );
0270 else if( my_sum )
0271 (my_body->my_body)( my_range, pre_scan_tag() );
0272 if( my_sum )
0273 *my_sum = my_body;
0274 __TBB_ASSERT( !*my_return_slot, NULL );
0275 } else {
0276 sum_node_type* result;
0277 if( my_parent_sum )
0278 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,my_is_final);
0279 else
0280 result = new(task::allocate_root()) sum_node_type(my_range,my_is_final);
0281 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
0282
0283 start_scan& b = *new( c.allocate_child() ) start_scan( result->my_right, *this, result );
0284 b.my_is_right_child = true;
0285
0286
0287
0288 recycle_as_child_of(c);
0289 c.set_ref_count(2);
0290 c.spawn(b);
0291 my_sum = &result->my_left_sum;
0292 my_return_slot = &result->my_left;
0293 my_is_right_child = false;
0294 next_task = this;
0295 my_parent_sum = result;
0296 __TBB_ASSERT( !*my_return_slot, NULL );
0297 }
0298 return next_task;
0299 }
0300
0301 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0302 class lambda_scan_body : no_assign {
0303 Value my_sum;
0304 const Value& identity_element;
0305 const Scan& my_scan;
0306 const ReverseJoin& my_reverse_join;
0307 public:
0308 lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
0309 : my_sum(identity)
0310 , identity_element(identity)
0311 , my_scan(scan)
0312 , my_reverse_join(rev_join) {}
0313
0314 lambda_scan_body( lambda_scan_body& b, split )
0315 : my_sum(b.identity_element)
0316 , identity_element(b.identity_element)
0317 , my_scan(b.my_scan)
0318 , my_reverse_join(b.my_reverse_join) {}
0319
0320 template<typename Tag>
0321 void operator()( const Range& r, Tag tag ) {
0322 my_sum = my_scan(r, my_sum, tag);
0323 }
0324
0325 void reverse_join( lambda_scan_body& a ) {
0326 my_sum = my_reverse_join(a.my_sum, my_sum);
0327 }
0328
0329 void assign( lambda_scan_body& b ) {
0330 my_sum = b.my_sum;
0331 }
0332
0333 Value result() const {
0334 return my_sum;
0335 }
0336 };
0337 }
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362 template<typename Range, typename Body>
0363 void parallel_scan( const Range& range, Body& body ) {
0364 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
0365 }
0366
0367
0368
0369 template<typename Range, typename Body>
0370 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0371 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
0372 }
0373
0374
0375
0376 template<typename Range, typename Body>
0377 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
0378 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
0379 }
0380
0381
0382
0383 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0384 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
0385 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0386 tbb::parallel_scan(range,body,__TBB_DEFAULT_PARTITIONER());
0387 return body.result();
0388 }
0389
0390
0391
0392 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0393 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
0394 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0395 tbb::parallel_scan(range,body,partitioner);
0396 return body.result();
0397 }
0398
0399
0400
0401 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0402 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
0403 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0404 tbb::parallel_scan(range,body,partitioner);
0405 return body.result();
0406 }
0407
0408
0409
0410 }
0411
0412 #include "internal/_warning_suppress_disable_notice.h"
0413 #undef __TBB_parallel_scan_H_include_area
0414
0415 #endif
0416