Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-11-15 09:58:34

0001 /*
0002     Copyright (c) 2005-2020 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_parallel_scan_H
0018 #define __TBB_parallel_scan_H
0019 
0020 #define __TBB_parallel_scan_H_include_area
0021 #include "internal/_warning_suppress_enable_notice.h"
0022 
0023 #include "task.h"
0024 #include "aligned_space.h"
0025 #include <new>
0026 #include "partitioner.h"
0027 
0028 namespace tbb {
0029 
0030 //! Used to indicate that the initial scan is being performed.
0031 /** @ingroup algorithms */
0032 struct pre_scan_tag {
0033     static bool is_final_scan() {return false;}
0034     operator bool() {return is_final_scan();}
0035 };
0036 
0037 //! Used to indicate that the final scan is being performed.
0038 /** @ingroup algorithms */
0039 struct final_scan_tag {
0040     static bool is_final_scan() {return true;}
0041     operator bool() {return is_final_scan();}
0042 };
0043 
0044 //! @cond INTERNAL
0045 namespace internal {
0046 
0047     //! Performs final scan for a leaf
0048     /** @ingroup algorithms */
0049     template<typename Range, typename Body>
0050     class final_sum: public task {
0051     public:
0052         Body my_body;
0053     private:
0054         aligned_space<Range> my_range;
0055         //! Where to put result of last subrange, or NULL if not last subrange.
0056         Body* my_stuff_last;
0057     public:
0058         final_sum( Body& body_ ) :
0059             my_body(body_,split())
0060         {
0061             poison_pointer(my_stuff_last);
0062         }
0063         ~final_sum() {
0064             my_range.begin()->~Range();
0065         }
0066         void finish_construction( const Range& range_, Body* stuff_last_ ) {
0067             new( my_range.begin() ) Range(range_);
0068             my_stuff_last = stuff_last_;
0069         }
0070     private:
0071         task* execute() __TBB_override {
0072             my_body( *my_range.begin(), final_scan_tag() );
0073             if( my_stuff_last )
0074                 my_stuff_last->assign(my_body);
0075             return NULL;
0076         }
0077     };
0078 
0079     //! Split work to be done in the scan.
0080     /** @ingroup algorithms */
0081     template<typename Range, typename Body>
0082     class sum_node: public task {
0083         typedef final_sum<Range,Body> final_sum_type;
0084     public:
0085         final_sum_type *my_incoming;
0086         final_sum_type *my_body;
0087         Body *my_stuff_last;
0088     private:
0089         final_sum_type *my_left_sum;
0090         sum_node *my_left;
0091         sum_node *my_right;
0092         bool my_left_is_final;
0093         Range my_range;
0094         sum_node( const Range range_, bool left_is_final_ ) :
0095             my_stuff_last(NULL),
0096             my_left_sum(NULL),
0097             my_left(NULL),
0098             my_right(NULL),
0099             my_left_is_final(left_is_final_),
0100             my_range(range_)
0101         {
0102             // Poison fields that will be set by second pass.
0103             poison_pointer(my_body);
0104             poison_pointer(my_incoming);
0105         }
0106         task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
0107             if( !n ) {
0108                 f.recycle_as_child_of( *this );
0109                 f.finish_construction( range_, stuff_last_ );
0110                 return &f;
0111             } else {
0112                 n->my_body = &f;
0113                 n->my_incoming = incoming_;
0114                 n->my_stuff_last = stuff_last_;
0115                 return n;
0116             }
0117         }
0118         task* execute() __TBB_override {
0119             if( my_body ) {
0120                 if( my_incoming )
0121                     my_left_sum->my_body.reverse_join( my_incoming->my_body );
0122                 recycle_as_continuation();
0123                 sum_node& c = *this;
0124                 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
0125                 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
0126                 set_ref_count( (a!=NULL)+(b!=NULL) );
0127                 my_body = NULL;
0128                 if( a ) spawn(*b);
0129                 else a = b;
0130                 return a;
0131             } else {
0132                 return NULL;
0133             }
0134         }
0135         template<typename Range_,typename Body_,typename Partitioner_>
0136         friend class start_scan;
0137 
0138         template<typename Range_,typename Body_>
0139         friend class finish_scan;
0140     };
0141 
0142     //! Combine partial results
0143     /** @ingroup algorithms */
0144     template<typename Range, typename Body>
0145     class finish_scan: public task {
0146         typedef sum_node<Range,Body> sum_node_type;
0147         typedef final_sum<Range,Body> final_sum_type;
0148         final_sum_type** const my_sum;
0149         sum_node_type*& my_return_slot;
0150     public:
0151         final_sum_type* my_right_zombie;
0152         sum_node_type& my_result;
0153 
0154         task* execute() __TBB_override {
0155             __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
0156             if( my_result.my_left )
0157                 my_result.my_left_is_final = false;
0158             if( my_right_zombie && my_sum )
0159                 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
0160             __TBB_ASSERT( !my_return_slot, NULL );
0161             if( my_right_zombie || my_result.my_right ) {
0162                 my_return_slot = &my_result;
0163             } else {
0164                 destroy( my_result );
0165             }
0166             if( my_right_zombie && !my_sum && !my_result.my_right ) {
0167                 destroy(*my_right_zombie);
0168                 my_right_zombie = NULL;
0169             }
0170             return NULL;
0171         }
0172 
0173         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
0174             my_sum(sum_),
0175             my_return_slot(return_slot_),
0176             my_right_zombie(NULL),
0177             my_result(result_)
0178         {
0179             __TBB_ASSERT( !my_return_slot, NULL );
0180         }
0181     };
0182 
0183     //! Initial task to split the work
0184     /** @ingroup algorithms */
0185     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
0186     class start_scan: public task {
0187         typedef sum_node<Range,Body> sum_node_type;
0188         typedef final_sum<Range,Body> final_sum_type;
0189         final_sum_type* my_body;
0190         /** Non-null if caller is requesting total. */
0191         final_sum_type** my_sum;
0192         sum_node_type** my_return_slot;
0193         /** Null if computing root. */
0194         sum_node_type* my_parent_sum;
0195         bool my_is_final;
0196         bool my_is_right_child;
0197         Range my_range;
0198         typename Partitioner::partition_type my_partition;
0199         task* execute() __TBB_override ;
0200     public:
0201         start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
0202             my_body(parent_.my_body),
0203             my_sum(parent_.my_sum),
0204             my_return_slot(&return_slot_),
0205             my_parent_sum(parent_sum_),
0206             my_is_final(parent_.my_is_final),
0207             my_is_right_child(false),
0208             my_range(parent_.my_range,split()),
0209             my_partition(parent_.my_partition,split())
0210         {
0211             __TBB_ASSERT( !*my_return_slot, NULL );
0212         }
0213 
0214         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
0215             my_body(&body_),
0216             my_sum(NULL),
0217             my_return_slot(&return_slot_),
0218             my_parent_sum(NULL),
0219             my_is_final(true),
0220             my_is_right_child(false),
0221             my_range(range_),
0222             my_partition(partitioner_)
0223         {
0224             __TBB_ASSERT( !*my_return_slot, NULL );
0225         }
0226 
0227         static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
0228             if( !range_.empty() ) {
0229                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
0230                 internal::sum_node<Range,Body>* root = NULL;
0231                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
0232                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
0233                     /*my_return_slot=*/root,
0234                     range_,
0235                     *temp_body,
0236                     partitioner_ );
0237                 temp_body->my_body.reverse_join(body_);
0238                 task::spawn_root_and_wait( pass1 );
0239                 if( root ) {
0240                     root->my_body = temp_body;
0241                     root->my_incoming = NULL;
0242                     root->my_stuff_last = &body_;
0243                     task::spawn_root_and_wait( *root );
0244                 } else {
0245                     body_.assign(temp_body->my_body);
0246                     temp_body->finish_construction( range_, NULL );
0247                     temp_body->destroy(*temp_body);
0248                 }
0249             }
0250         }
0251     };
0252 
0253     template<typename Range, typename Body, typename Partitioner>
0254     task* start_scan<Range,Body,Partitioner>::execute() {
0255         typedef internal::finish_scan<Range,Body> finish_pass1_type;
0256         finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
0257         // Inspecting p->result.left_sum would ordinarily be a race condition.
0258         // But we inspect it only if we are not a stolen task, in which case we
0259         // know that task assigning to p->result.left_sum has completed.
0260         bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
0261         if( treat_as_stolen ) {
0262             // Invocation is for right child that has been really stolen or needs to be virtually stolen
0263             p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
0264             my_is_final = false;
0265         }
0266         task* next_task = NULL;
0267         if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
0268             if( my_is_final )
0269                 (my_body->my_body)( my_range, final_scan_tag() );
0270             else if( my_sum )
0271                 (my_body->my_body)( my_range, pre_scan_tag() );
0272             if( my_sum )
0273                 *my_sum = my_body;
0274             __TBB_ASSERT( !*my_return_slot, NULL );
0275         } else {
0276             sum_node_type* result;
0277             if( my_parent_sum )
0278                 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
0279             else
0280                 result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
0281             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
0282             // Split off right child
0283             start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
0284             b.my_is_right_child = true;
0285             // Left child is recycling of *this.  Must recycle this before spawning b,
0286             // otherwise b might complete and decrement c.ref_count() to zero, which
0287             // would cause c.execute() to run prematurely.
0288             recycle_as_child_of(c);
0289             c.set_ref_count(2);
0290             c.spawn(b);
0291             my_sum = &result->my_left_sum;
0292             my_return_slot = &result->my_left;
0293             my_is_right_child = false;
0294             next_task = this;
0295             my_parent_sum = result;
0296             __TBB_ASSERT( !*my_return_slot, NULL );
0297         }
0298         return next_task;
0299     }
0300 
0301     template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0302     class lambda_scan_body : no_assign {
0303         Value               my_sum;
0304         const Value&        identity_element;
0305         const Scan&         my_scan;
0306         const ReverseJoin&  my_reverse_join;
0307     public:
0308         lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
0309             : my_sum(identity)
0310             , identity_element(identity)
0311             , my_scan(scan)
0312             , my_reverse_join(rev_join) {}
0313 
0314         lambda_scan_body( lambda_scan_body& b, split )
0315             : my_sum(b.identity_element)
0316             , identity_element(b.identity_element)
0317             , my_scan(b.my_scan)
0318             , my_reverse_join(b.my_reverse_join) {}
0319 
0320         template<typename Tag>
0321         void operator()( const Range& r, Tag tag ) {
0322             my_sum = my_scan(r, my_sum, tag);
0323         }
0324 
0325         void reverse_join( lambda_scan_body& a ) {
0326             my_sum = my_reverse_join(a.my_sum, my_sum);
0327         }
0328 
0329         void assign( lambda_scan_body& b ) {
0330             my_sum = b.my_sum;
0331         }
0332 
0333         Value result() const {
0334             return my_sum;
0335         }
0336     };
0337 } // namespace internal
0338 //! @endcond
0339 
0340 // Requirements on Range concept are documented in blocked_range.h
0341 
0342 /** \page parallel_scan_body_req Requirements on parallel_scan body
0343     Class \c Body implementing the concept of parallel_scan body must define:
0344     - \code Body::Body( Body&, split ); \endcode    Splitting constructor.
0345                                                     Split \c b so that \c this and \c b can accumulate separately
0346     - \code Body::~Body(); \endcode                 Destructor
0347     - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
0348                                                     Preprocess iterations for range \c r
0349     - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
0350                                                     Do final processing for iterations of range \c r
0351     - \code void Body::reverse_join( Body& a ); \endcode
0352                                                     Merge preprocessing state of \c a into \c this, where \c a was
0353                                                     created earlier from \c b by b's splitting constructor
0354 **/
0355 
0356 /** \name parallel_scan
0357     See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
0358 //@{
0359 
0360 //! Parallel prefix with default partitioner
0361 /** @ingroup algorithms **/
0362 template<typename Range, typename Body>
0363 void parallel_scan( const Range& range, Body& body ) {
0364     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
0365 }
0366 
0367 //! Parallel prefix with simple_partitioner
0368 /** @ingroup algorithms **/
0369 template<typename Range, typename Body>
0370 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0371     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
0372 }
0373 
0374 //! Parallel prefix with auto_partitioner
0375 /** @ingroup algorithms **/
0376 template<typename Range, typename Body>
0377 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
0378     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
0379 }
0380 
0381 //! Parallel prefix with default partitioner
0382 /** @ingroup algorithms **/
0383 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0384 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
0385     internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0386     tbb::parallel_scan(range,body,__TBB_DEFAULT_PARTITIONER());
0387     return body.result();
0388 }
0389 
0390 //! Parallel prefix with simple_partitioner
0391 /** @ingroup algorithms **/
0392 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0393 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
0394     internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0395     tbb::parallel_scan(range,body,partitioner);
0396     return body.result();
0397 }
0398 
0399 //! Parallel prefix with auto_partitioner
0400 /** @ingroup algorithms **/
0401 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
0402 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
0403     internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
0404     tbb::parallel_scan(range,body,partitioner);
0405     return body.result();
0406 }
0407 
0408 //@}
0409 
0410 } // namespace tbb
0411 
0412 #include "internal/_warning_suppress_disable_notice.h"
0413 #undef __TBB_parallel_scan_H_include_area
0414 
0415 #endif /* __TBB_parallel_scan_H */
0416