boost/mpi/collectives.hpp

0001 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
0002
0003 // Use, modification and distribution is subject to the Boost Software
0004 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
0005 // http://www.boost.org/LICENSE_1_0.txt)
0006
0007 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
0008
0009 /** @file collectives.hpp
0010  *
0011  *  This header contains MPI collective operations, which implement
0012  *  various parallel algorithms that require the coordination of all
0013  *  processes within a communicator. The header @c collectives_fwd.hpp
0014  *  provides forward declarations for each of these operations. To
0015  *  include only specific collective algorithms, use the headers @c
0016  *  boost/mpi/collectives/algorithm_name.hpp.
0017  */
0018 #ifndef BOOST_MPI_COLLECTIVES_HPP
0019 #define BOOST_MPI_COLLECTIVES_HPP
0020
0021 #include <boost/mpi/communicator.hpp>
0022 #include <boost/mpi/inplace.hpp>
0023 #include <vector>
0024
0025 namespace boost { namespace mpi {
0026 /**
0027  *  @brief Gather the values stored at every process into vectors of
0028  *  values from each process.
0029  *
0030  *  @c all_gather is a collective algorithm that collects the values
0031  *  stored at each process into a vector of values indexed by the
0032  *  process number they came from. The type @c T of the values may be
0033  *  any type that is serializable or has an associated MPI data type.
0034  *
0035  *  When the type @c T has an associated MPI data type, this routine
0036  *  invokes @c MPI_Allgather to gather the values.
0037  *
0038  *    @param comm The communicator over which the all-gather will
0039  *    occur.
0040  *
0041  *    @param in_value The value to be transmitted by each process. To
0042  *    gather an array of values, @c in_values points to the @c n local
0043  *    values to be transmitted.
0044  *
0045  *    @param out_values A vector or pointer to storage that will be
0046  *    populated with the values from each process, indexed by the
0047  *    process ID number. If it is a vector, the vector will be resized
0048  *    accordingly.
0049  */
0050 template<typename T>
0051 void
0052 all_gather(const communicator& comm, const T& in_value,
0053            std::vector<T>& out_values);
0054
0055 /**
0056  * \overload
0057  */
0058 template<typename T>
0059 void
0060 all_gather(const communicator& comm, const T& in_value, T* out_values);
0061
0062 /**
0063  * \overload
0064  */
0065 template<typename T>
0066 void
0067 all_gather(const communicator& comm, const T* in_values, int n,
0068            std::vector<T>& out_values);
0069
0070 /**
0071  * \overload
0072  */
0073 template<typename T>
0074 void
0075 all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
0076
0077 /**
0078  * \overload
0079  */
0080 template<typename T>
0081 void
0082 all_gatherv(const communicator& comm, const T& in_value, T* out_values,
0083             const std::vector<int>& sizes);
0084
0085 /**
0086  * \overload
0087  */
0088 template<typename T>
0089 void
0090 all_gatherv(const communicator& comm, const T* in_values, T* out_values,
0091             const std::vector<int>& sizes);
0092
0093 /**
0094  * \overload
0095  */
0096 template<typename T>
0097 void
0098 all_gatherv(const communicator& comm, std::vector<T> const& in_values,  std::vector<T>& out_values,
0099            const std::vector<int>& sizes);
0100
0101 /**
0102  * \overload
0103  */
0104 template<typename T>
0105 void
0106 all_gatherv(const communicator& comm, const T& in_value, T* out_values,
0107             const std::vector<int>& sizes, const std::vector<int>& displs);
0108
0109 /**
0110  * \overload
0111  */
0112 template<typename T>
0113 void
0114 all_gatherv(const communicator& comm, const T* in_values, T* out_values,
0115             const std::vector<int>& sizes, const std::vector<int>& displs);
0116
0117 /**
0118  * \overload
0119  */
0120 template<typename T>
0121 void
0122 all_gatherv(const communicator& comm, std::vector<T> const& in_values, std::vector<T>& out_values,
0123             const std::vector<int>& sizes, const std::vector<int>& displs);
0124
0125 /**
0126  *  @brief Combine the values stored by each process into a single
0127  *  value available to all processes.
0128  *
0129  *  @c all_reduce is a collective algorithm that combines the values
0130  *  stored by each process into a single value available to all
0131  *  processes. The values are combined in a user-defined way,
0132  *  specified via a function object. The type @c T of the values may
0133  *  be any type that is serializable or has an associated MPI data
0134  *  type. One can think of this operation as a @c all_gather, followed
0135  *  by an @c std::accumulate() over the gather values and using the
0136  *  operation @c op.
0137  *
0138  *  When the type @c T has an associated MPI data type, this routine
0139  *  invokes @c MPI_Allreduce to perform the reduction. If possible,
0140  *  built-in MPI operations will be used; otherwise, @c all_reduce()
0141  *  will create a custom MPI_Op for the call to MPI_Allreduce.
0142  *
0143  *    @param comm The communicator over which the reduction will
0144  *    occur.
0145  *    @param value The local value to be combined with the local
0146  *    values of every other process. For reducing arrays, @c in_values
0147  *    is a pointer to the local values to be reduced and @c n is the
0148  *    number of values to reduce. See @c reduce for more information.
0149  *
0150  *    If wrapped in a @c inplace_t object, combine the usage of both
0151  *    input and $c out_value and the local value will be overwritten
0152  *    (a convenience function @c inplace is provided for the wrapping).
0153  *
0154  *    @param out_value Will receive the result of the reduction
0155  *    operation. If this parameter is omitted, the outgoing value will
0156  *    instead be returned.
0157  *
0158  *    @param op The binary operation that combines two values of type
0159  *    @c T and returns a third value of type @c T. For types @c T that has
0160  *    ssociated MPI data types, @c op will either be translated into
0161  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
0162  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
0163  *    operations.hpp header for more details on this mapping. For any
0164  *    non-built-in operation, commutativity will be determined by the
0165  *    @c is_commmutative trait (also in @c operations.hpp): users are
0166  *    encouraged to mark commutative operations as such, because it
0167  *    gives the implementation additional lattitude to optimize the
0168  *    reduction operation.
0169  *
0170  *    @param n Indicated the size of the buffers of array type.
0171  *    @returns If no @p out_value parameter is supplied, returns the
0172  *    result of the reduction operation.
0173  */
0174 template<typename T, typename Op>
0175 void
0176 all_reduce(const communicator& comm, const T* value, int n, T* out_value,
0177            Op op);
0178 /**
0179  * \overload
0180  */
0181 template<typename T, typename Op>
0182 void
0183 all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
0184 /**
0185  * \overload
0186  */
0187 template<typename T, typename Op>
0188 T all_reduce(const communicator& comm, const T& value, Op op);
0189
0190 /**
0191  * \overload
0192  */
0193 template<typename T, typename Op>
0194 void
0195 all_reduce(const communicator& comm, inplace_t<T*> value, int n,
0196            Op op);
0197 /**
0198  * \overload
0199  */
0200 template<typename T, typename Op>
0201 void
0202 all_reduce(const communicator& comm, inplace_t<T> value, Op op);
0203
0204 /**
0205  *  @brief Send data from every process to every other process.
0206  *
0207  *  @c all_to_all is a collective algorithm that transmits @c p values
0208  *  from every process to every other process. On process i, jth value
0209  *  of the @p in_values vector is sent to process j and placed in the
0210  *  ith position of the @p out_values vector in process @p j. The type
0211  *  @c T of the values may be any type that is serializable or has an
0212  *  associated MPI data type. If @c n is provided, then arrays of @p n
0213  *  values will be transferred from one process to another.
0214  *
0215  *  When the type @c T has an associated MPI data type, this routine
0216  *  invokes @c MPI_Alltoall to scatter the values.
0217  *
0218  *    @param comm The communicator over which the all-to-all
0219  *    communication will occur.
0220  *
0221  *    @param in_values A vector or pointer to storage that contains
0222  *    the values to send to each process, indexed by the process ID
0223  *    number.
0224  *
0225  *    @param out_values A vector or pointer to storage that will be
0226  *    updated to contain the values received from other processes. The
0227  *    jth value in @p out_values will come from the procss with rank j.
0228  */
0229 template<typename T>
0230 void
0231 all_to_all(const communicator& comm, const std::vector<T>& in_values,
0232            std::vector<T>& out_values);
0233
0234 /**
0235  * \overload
0236  */
0237 template<typename T>
0238 void all_to_all(const communicator& comm, const T* in_values, T* out_values);
0239
0240 /**
0241  * \overload
0242  */
0243 template<typename T>
0244 void
0245 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
0246            std::vector<T>& out_values);
0247
0248 /**
0249  * \overload
0250  */
0251 template<typename T>
0252 void
0253 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
0254
0255 /**
0256  * @brief Broadcast a value from a root process to all other
0257  * processes.
0258  *
0259  * @c broadcast is a collective algorithm that transfers a value from
0260  * an arbitrary @p root process to every other process that is part of
0261  * the given communicator. The @c broadcast algorithm can transmit any
0262  * Serializable value, values that have associated MPI data types,
0263  * packed archives, skeletons, and the content of skeletons; see the
0264  * @c send primitive for communicators for a complete list. The type
0265  * @c T shall be the same for all processes that are a part of the
0266  * communicator @p comm, unless packed archives are being transferred:
0267  * with packed archives, the root sends a @c packed_oarchive or @c
0268  * packed_skeleton_oarchive whereas the other processes receive a
0269  * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
0270  *
0271  * When the type @c T has an associated MPI data type, this routine
0272  * invokes @c MPI_Bcast to perform the broadcast.
0273  *
0274  *   @param comm The communicator over which the broadcast will
0275  *   occur.
0276  *
0277  *   @param value The value (or values, if @p n is provided) to be
0278  *   transmitted (if the rank of @p comm is equal to @p root) or
0279  *   received (if the rank of @p comm is not equal to @p root). When
0280  *   the @p value is a @c skeleton_proxy, only the skeleton of the
0281  *   object will be broadcast. In this case, the @p root will build a
0282  *   skeleton from the object help in the proxy and all of the
0283  *   non-roots will reshape the objects held in their proxies based on
0284  *   the skeleton sent from the root.
0285  *
0286  *   @param n When supplied, the number of values that the pointer @p
0287  *   values points to, for broadcasting an array of values. The value
0288  *   of @p n must be the same for all processes in @p comm.
0289  *
0290  *   @param root The rank/process ID of the process that will be
0291  *   transmitting the value.
0292  */
0293 template<typename T>
0294 void broadcast(const communicator& comm, T& value, int root);
0295
0296 /**
0297  * \overload
0298  */
0299 template<typename T>
0300 void broadcast(const communicator& comm, T* values, int n, int root);
0301
0302 /**
0303  * \overload
0304  */
0305 template<typename T>
0306 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
0307
0308 /**
0309  * \overload
0310  */
0311 template<typename T>
0312 void
0313 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
0314
0315 /**
0316  *  @brief Gather the values stored at every process into a vector at
0317  *  the root process.
0318  *
0319  *  @c gather is a collective algorithm that collects the values
0320  *  stored at each process into a vector of values at the @p root
0321  *  process. This vector is indexed by the process number that the
0322  *  value came from. The type @c T of the values may be any type that
0323  *  is serializable or has an associated MPI data type.
0324  *
0325  *  When the type @c T has an associated MPI data type, this routine
0326  *  invokes @c MPI_Gather to gather the values.
0327  *
0328  *    @param comm The communicator over which the gather will occur.
0329  *
0330  *    @param in_value The value to be transmitted by each process. For
0331  *    gathering arrays of values, @c in_values points to storage for
0332  *    @c n*comm.size() values.
0333  *
0334  *    @param out_values A vector or pointer to storage that will be
0335  *    populated with the values from each process, indexed by the
0336  *    process ID number. If it is a vector, it will be resized
0337  *    accordingly. For non-root processes, this parameter may be
0338  *    omitted. If it is still provided, however, it will be unchanged.
0339  *
0340  *    @param root The process ID number that will collect the
0341  *    values. This value must be the same on all processes.
0342  */
0343 template<typename T>
0344 void
0345 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
0346        int root);
0347
0348 /**
0349  * \overload
0350  */
0351 template<typename T>
0352 void
0353 gather(const communicator& comm, const T& in_value, T* out_values, int root);
0354
0355 /**
0356  * \overload
0357  */
0358 template<typename T>
0359 void gather(const communicator& comm, const T& in_value, int root);
0360
0361 /**
0362  * \overload
0363  */
0364 template<typename T>
0365 void
0366 gather(const communicator& comm, const T* in_values, int n,
0367        std::vector<T>& out_values, int root);
0368
0369 /**
0370  * \overload
0371  */
0372 template<typename T>
0373 void
0374 gather(const communicator& comm, const T* in_values, int n, T* out_values,
0375        int root);
0376
0377 /**
0378  * \overload
0379  */
0380 template<typename T>
0381 void gather(const communicator& comm, const T* in_values, int n, int root);
0382
0383 /**
0384  *  @brief Similar to boost::mpi::gather with the difference that the number
0385  *  of values to be send by non-root processes can vary.
0386  *
0387  *    @param comm The communicator over which the gather will occur.
0388  *
0389  *    @param in_values The array of values to be transmitted by each process.
0390  *
0391  *    @param in_size For each non-root process this specifies the size
0392  *    of @p in_values.
0393  *
0394  *    @param out_values A pointer to storage that will be populated with
0395  *    the values from each process. For non-root processes, this parameter
0396  *    may be omitted. If it is still provided, however, it will be unchanged.
0397  *
0398  *    @param sizes A vector containing the number of elements each non-root
0399  *    process will send.
0400  *
0401  *    @param displs A vector such that the i-th entry specifies the
0402  *    displacement (relative to @p out_values) from which to take the ingoing
0403  *    data at the @p root process. Overloaded versions for which @p displs is
0404  *    omitted assume that the data is to be placed contiguously at the root process.
0405  *
0406  *    @param root The process ID number that will collect the
0407  *    values. This value must be the same on all processes.
0408  */
0409 template<typename T>
0410 void
0411 gatherv(const communicator& comm, const std::vector<T>& in_values,
0412         T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
0413         int root);
0414
0415 /**
0416  * \overload
0417  */
0418 template<typename T>
0419 void
0420 gatherv(const communicator& comm, const T* in_values, int in_size,
0421         T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
0422         int root);
0423
0424 /**
0425  * \overload
0426  */
0427 template<typename T>
0428 void gatherv(const communicator& comm, const std::vector<T>& in_values, int root);
0429
0430 /**
0431  * \overload
0432  */
0433 template<typename T>
0434 void gatherv(const communicator& comm, const T* in_values, int in_size, int root);
0435
0436 /**
0437  * \overload
0438  */
0439 template<typename T>
0440 void
0441 gatherv(const communicator& comm, const T* in_values, int in_size,
0442         T* out_values, const std::vector<int>& sizes, int root);
0443
0444 /**
0445  * \overload
0446  */
0447 template<typename T>
0448 void
0449 gatherv(const communicator& comm, const std::vector<T>& in_values,
0450         T* out_values, const std::vector<int>& sizes, int root);
0451
0452 /**
0453  *  @brief Scatter the values stored at the root to all processes
0454  *  within the communicator.
0455  *
0456  *  @c scatter is a collective algorithm that scatters the values
0457  *  stored in the @p root process (inside a vector) to all of the
0458  *  processes in the communicator. The vector @p out_values (only
0459  *  significant at the @p root) is indexed by the process number to
0460  *  which the corresponding value will be sent. The type @c T of the
0461  *  values may be any type that is serializable or has an associated
0462  *  MPI data type.
0463  *
0464  *  When the type @c T has an associated MPI data type, this routine
0465  *  invokes @c MPI_Scatter to scatter the values.
0466  *
0467  *    @param comm The communicator over which the scatter will occur.
0468  *
0469  *    @param in_values A vector or pointer to storage that will contain
0470  *    the values to send to each process, indexed by the process rank.
0471  *    For non-root processes, this parameter may be omitted. If it is
0472  *    still provided, however, it will be unchanged.
0473  *
0474  *    @param out_value The value received by each process. When
0475  *    scattering an array of values, @p out_values points to the @p n
0476  *    values that will be received by each process.
0477  *
0478  *    @param root The process ID number that will scatter the
0479  *    values. This value must be the same on all processes.
0480  */
0481 template<typename T>
0482 void
0483 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
0484         int root);
0485
0486 /**
0487  * \overload
0488  */
0489 template<typename T>
0490 void
0491 scatter(const communicator& comm, const T* in_values, T& out_value, int root);
0492
0493 /**
0494  * \overload
0495  */
0496 template<typename T>
0497 void scatter(const communicator& comm, T& out_value, int root);
0498
0499 /**
0500  * \overload
0501  */
0502 template<typename T>
0503 void
0504 scatter(const communicator& comm, const std::vector<T>& in_values,
0505         T* out_values, int n, int root);
0506
0507 /**
0508  * \overload
0509  */
0510 template<typename T>
0511 void
0512 scatter(const communicator& comm, const T* in_values, T* out_values, int n,
0513         int root);
0514
0515 /**
0516  * \overload
0517  */
0518 template<typename T>
0519 void scatter(const communicator& comm, T* out_values, int n, int root);
0520
0521 /**
0522  *  @brief Similar to boost::mpi::scatter with the difference that the number
0523  *  of values stored at the root process does not need to be a multiple of
0524  *  the communicator's size.
0525  *
0526  *    @param comm The communicator over which the scatter will occur.
0527  *
0528  *    @param in_values A vector or pointer to storage that will contain
0529  *    the values to send to each process, indexed by the process rank.
0530  *    For non-root processes, this parameter may be omitted. If it is
0531  *    still provided, however, it will be unchanged.
0532  *
0533  *    @param sizes A vector containing the number of elements each non-root
0534  *    process will receive.
0535  *
0536  *    @param displs A vector such that the i-th entry specifies the
0537  *    displacement (relative to @p in_values) from which to take the outgoing
0538  *    data to process i. Overloaded versions for which @p displs is omitted
0539  *    assume that the data is contiguous at the @p root process.
0540  *
0541  *    @param out_values The array of values received by each process.
0542  *
0543  *    @param out_size For each non-root process this will contain the size
0544  *    of @p out_values.
0545  *
0546  *    @param root The process ID number that will scatter the
0547  *    values. This value must be the same on all processes.
0548  */
0549 template<typename T>
0550 void
0551 scatterv(const communicator& comm, const std::vector<T>& in_values,
0552          const std::vector<int>& sizes, const std::vector<int>& displs,
0553          T* out_values, int out_size, int root);
0554
0555 /**
0556  * \overload
0557  */
0558 template<typename T>
0559 void
0560 scatterv(const communicator& comm, const T* in_values,
0561          const std::vector<int>& sizes, const std::vector<int>& displs,
0562          T* out_values, int out_size, int root);
0563
0564 /**
0565  * \overload
0566  */
0567 template<typename T>
0568 void scatterv(const communicator& comm, T* out_values, int out_size, int root);
0569
0570 /**
0571  * \overload
0572  */
0573 template<typename T>
0574 void
0575 scatterv(const communicator& comm, const T* in_values,
0576          const std::vector<int>& sizes, T* out_values, int root);
0577
0578 /**
0579  * \overload
0580  */
0581 template<typename T>
0582 void
0583 scatterv(const communicator& comm, const std::vector<T>& in_values,
0584          const std::vector<int>& sizes, T* out_values, int root);
0585
0586 /**
0587  *  @brief Combine the values stored by each process into a single
0588  *  value at the root.
0589  *
0590  *  @c reduce is a collective algorithm that combines the values
0591  *  stored by each process into a single value at the @c root. The
0592  *  values can be combined arbitrarily, specified via a function
0593  *  object. The type @c T of the values may be any type that is
0594  *  serializable or has an associated MPI data type. One can think of
0595  *  this operation as a @c gather to the @p root, followed by an @c
0596  *  std::accumulate() over the gathered values and using the operation
0597  *  @c op.
0598  *
0599  *  When the type @c T has an associated MPI data type, this routine
0600  *  invokes @c MPI_Reduce to perform the reduction. If possible,
0601  *  built-in MPI operations will be used; otherwise, @c reduce() will
0602  *  create a custom MPI_Op for the call to MPI_Reduce.
0603  *
0604  *    @param comm The communicator over which the reduction will
0605  *    occur.
0606  *
0607  *    @param in_value The local value to be combined with the local
0608  *    values of every other process. For reducing arrays, @c in_values
0609  *    contains a pointer to the local values. In this case, @c n is
0610  *    the number of values that will be reduced. Reduction occurs
0611  *    independently for each of the @p n values referenced by @p
0612  *    in_values, e.g., calling reduce on an array of @p n values is
0613  *    like calling @c reduce @p n separate times, one for each
0614  *    location in @p in_values and @p out_values.
0615  *
0616  *    @param out_value Will receive the result of the reduction
0617  *    operation, but only for the @p root process. Non-root processes
0618  *    may omit if parameter; if they choose to supply the parameter,
0619  *    it will be unchanged. For reducing arrays, @c out_values
0620  *    contains a pointer to the storage for the output values.
0621  *
0622  *    @param op The binary operation that combines two values of type
0623  *    @c T into a third value of type @c T. For types @c T that has
0624  *    ssociated MPI data types, @c op will either be translated into
0625  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
0626  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
0627  *    operations.hpp header for more details on this mapping. For any
0628  *    non-built-in operation, commutativity will be determined by the
0629  *    @c is_commmutative trait (also in @c operations.hpp): users are
0630  *    encouraged to mark commutative operations as such, because it
0631  *    gives the implementation additional lattitude to optimize the
0632  *    reduction operation.
0633  *
0634  *    @param root The process ID number that will receive the final,
0635  *    combined value. This value must be the same on all processes.
0636  */
0637 template<typename T, typename Op>
0638 void
0639 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
0640        int root);
0641
0642 /**
0643  * \overload
0644  */
0645 template<typename T, typename Op>
0646 void reduce(const communicator& comm, const T& in_value, Op op, int root);
0647
0648 /**
0649  * \overload
0650  */
0651 template<typename T, typename Op>
0652 void
0653 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
0654        Op op, int root);
0655
0656 /**
0657  * \overload
0658  */
0659 template<typename T, typename Op>
0660 void
0661 reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
0662
0663 /**
0664  *  @brief Compute a prefix reduction of values from all processes in
0665  *  the communicator.
0666  *
0667  *  @c scan is a collective algorithm that combines the values stored
0668  *  by each process with the values of all processes with a smaller
0669  *  rank. The values can be arbitrarily combined, specified via a
0670  *  function object @p op. The type @c T of the values may be any type
0671  *  that is serializable or has an associated MPI data type. One can
0672  *  think of this operation as a @c gather to some process, followed
0673  *  by an @c std::prefix_sum() over the gathered values using the
0674  *  operation @c op. The ith process returns the ith value emitted by
0675  *  @c std::prefix_sum().
0676  *
0677  *  When the type @c T has an associated MPI data type, this routine
0678  *  invokes @c MPI_Scan to perform the reduction. If possible,
0679  *  built-in MPI operations will be used; otherwise, @c scan() will
0680  *  create a custom @c MPI_Op for the call to MPI_Scan.
0681  *
0682  *    @param comm The communicator over which the prefix reduction
0683  *    will occur.
0684  *
0685  *    @param in_value The local value to be combined with the local
0686  *    values of other processes. For the array variant, the @c
0687  *    in_values parameter points to the @c n local values that will be
0688  *    combined.
0689  *
0690  *    @param out_value If provided, the ith process will receive the
0691  *    value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
0692  *    ... )). For the array variant, @c out_values contains a pointer
0693  *    to storage for the @c n output values. The prefix reduction
0694  *    occurs independently for each of the @p n values referenced by
0695  *    @p in_values, e.g., calling scan on an array of @p n values is
0696  *    like calling @c scan @p n separate times, one for each location
0697  *    in @p in_values and @p out_values.
0698  *
0699  *    @param op The binary operation that combines two values of type
0700  *    @c T into a third value of type @c T. For types @c T that has
0701  *    ssociated MPI data types, @c op will either be translated into
0702  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
0703  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
0704  *    operations.hpp header for more details on this mapping. For any
0705  *    non-built-in operation, commutativity will be determined by the
0706  *    @c is_commmutative trait (also in @c operations.hpp).
0707  *
0708  *    @returns If no @p out_value parameter is provided, returns the
0709  *    result of prefix reduction.
0710  */
0711 template<typename T, typename Op>
0712 void
0713 scan(const communicator& comm, const T& in_value, T& out_value, Op op);
0714
0715 /**
0716  * \overload
0717  */
0718 template<typename T, typename Op>
0719 T
0720 scan(const communicator& comm, const T& in_value, Op op);
0721
0722 /**
0723  * \overload
0724  */
0725 template<typename T, typename Op>
0726 void
0727 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
0728
0729 } } // end namespace boost::mpi
0730 #endif // BOOST_MPI_COLLECTIVES_HPP
0731
0732 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
0733 // Include implementations of each of the collectives
0734 #  include <boost/mpi/collectives/all_gather.hpp>
0735 #  include <boost/mpi/collectives/all_gatherv.hpp>
0736 #  include <boost/mpi/collectives/all_reduce.hpp>
0737 #  include <boost/mpi/collectives/all_to_all.hpp>
0738 #  include <boost/mpi/collectives/broadcast.hpp>
0739 #  include <boost/mpi/collectives/gather.hpp>
0740 #  include <boost/mpi/collectives/gatherv.hpp>
0741 #  include <boost/mpi/collectives/scatter.hpp>
0742 #  include <boost/mpi/collectives/scatterv.hpp>
0743 #  include <boost/mpi/collectives/reduce.hpp>
0744 #  include <boost/mpi/collectives/scan.hpp>
0745 #endif
0746