Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:30:12

0001 #ifndef PTHREADPOOL_H_
0002 #define PTHREADPOOL_H_
0003 
0004 #include <stddef.h>
0005 #include <stdint.h>
0006 
0007 typedef struct pthreadpool* pthreadpool_t;
0008 
0009 typedef void (*pthreadpool_task_1d_t)(void*, size_t);
0010 typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t);
0011 typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t);
0012 typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t);
0013 typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t);
0014 typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t);
0015 typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t);
0016 typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t);
0017 typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t);
0018 typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t);
0019 typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
0020 typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t);
0021 typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
0022 typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
0023 typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
0024 typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
0025 typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
0026 
0027 typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t);
0028 typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t);
0029 typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t);
0030 typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t);
0031 
0032 
0033 /**
0034  * Disable support for denormalized numbers to the maximum extent possible for
0035  * the duration of the computation.
0036  *
0037  * Handling denormalized floating-point numbers is often implemented in
0038  * microcode, and incurs significant performance degradation. This hint
0039  * instructs the thread pool to disable support for denormalized numbers before
0040  * running the computation by manipulating architecture-specific control
0041  * registers, and restore the initial value of control registers after the
0042  * computation is complete. The thread pool temporary disables denormalized
0043  * numbers on all threads involved in the computation (i.e. the caller threads,
0044  * and potentially worker threads).
0045  *
0046  * Disabling denormalized numbers may have a small negative effect on results'
0047  * accuracy. As various architectures differ in capabilities to control
0048  * processing of denormalized numbers, using this flag may also hurt results'
0049  * reproducibility across different instruction set architectures.
0050  */
0051 #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
0052 
0053 /**
0054  * Yield worker threads to the system scheduler after the operation is finished.
0055  *
0056  * Force workers to use kernel wait (instead of active spin-wait by default) for
0057  * new commands after this command is processed. This flag affects only the
0058  * immediate next operation on this thread pool. To make the thread pool always
0059  * use kernel wait, pass this flag to all parallelization functions.
0060  */
0061 #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
0062 
0063 #ifdef __cplusplus
0064 extern "C" {
0065 #endif
0066 
0067 /**
0068  * Create a thread pool with the specified number of threads.
0069  *
0070  * @param  threads_count  the number of threads in the thread pool.
0071  *    A value of 0 has special interpretation: it creates a thread pool with as
0072  *    many threads as there are logical processors in the system.
0073  *
0074  * @returns  A pointer to an opaque thread pool object if the call is
0075  *    successful, or NULL pointer if the call failed.
0076  */
0077 pthreadpool_t pthreadpool_create(size_t threads_count);
0078 
0079 /**
0080  * Query the number of threads in a thread pool.
0081  *
0082  * @param  threadpool  the thread pool to query.
0083  *
0084  * @returns  The number of threads in the thread pool.
0085  */
0086 size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
0087 
0088 /**
0089  * Process items on a 1D grid.
0090  *
0091  * The function implements a parallel version of the following snippet:
0092  *
0093  *   for (size_t i = 0; i < range; i++)
0094  *     function(context, i);
0095  *
0096  * When the function returns, all items have been processed and the thread pool
0097  * is ready for a new task.
0098  *
0099  * @note If multiple threads call this function with the same thread pool, the
0100  *    calls are serialized.
0101  *
0102  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0103  *    is NULL, all items are processed serially on the calling thread.
0104  * @param function    the function to call for each item.
0105  * @param context     the first argument passed to the specified function.
0106  * @param range       the number of items on the 1D grid to process. The
0107  *    specified function will be called once for each item.
0108  * @param flags       a bitwise combination of zero or more optional flags
0109  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0110  */
0111 void pthreadpool_parallelize_1d(
0112     pthreadpool_t threadpool,
0113     pthreadpool_task_1d_t function,
0114     void* context,
0115     size_t range,
0116     uint32_t flags);
0117 
0118 /**
0119  * Process items on a 1D grid using a microarchitecture-aware task function.
0120  *
0121  * The function implements a parallel version of the following snippet:
0122  *
0123  *   uint32_t uarch_index = cpuinfo_initialize() ?
0124  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
0125  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
0126  *   for (size_t i = 0; i < range; i++)
0127  *     function(context, uarch_index, i);
0128  *
0129  * When the function returns, all items have been processed and the thread pool
0130  * is ready for a new task.
0131  *
0132  * @note If multiple threads call this function with the same thread pool, the
0133  *    calls are serialized.
0134  *
0135  * @param threadpool           the thread pool to use for parallelisation. If
0136  *    threadpool is NULL, all items are processed serially on the calling
0137  *    thread.
0138  * @param function             the function to call for each item.
0139  * @param context              the first argument passed to the specified
0140  *    function.
0141  * @param default_uarch_index  the microarchitecture index to use when
0142  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
0143  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
0144  *    max_uarch_index value.
0145  * @param max_uarch_index      the maximum microarchitecture index expected by
0146  *    the specified function. If the index returned by
0147  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
0148  *    will be used instead. default_uarch_index can exceed max_uarch_index.
0149  * @param range                the number of items on the 1D grid to process.
0150  *    The specified function will be called once for each item.
0151  * @param flags                a bitwise combination of zero or more optional
0152  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
0153  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
0154  */
0155 void pthreadpool_parallelize_1d_with_uarch(
0156     pthreadpool_t threadpool,
0157     pthreadpool_task_1d_with_id_t function,
0158     void* context,
0159     uint32_t default_uarch_index,
0160     uint32_t max_uarch_index,
0161     size_t range,
0162     uint32_t flags);
0163 
0164 /**
0165  * Process items on a 1D grid with specified maximum tile size.
0166  *
0167  * The function implements a parallel version of the following snippet:
0168  *
0169  *   for (size_t i = 0; i < range; i += tile)
0170  *     function(context, i, min(range - i, tile));
0171  *
0172  * When the call returns, all items have been processed and the thread pool is
0173  * ready for a new task.
0174  *
0175  * @note If multiple threads call this function with the same thread pool,
0176  *    the calls are serialized.
0177  *
0178  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0179  *    is NULL, all items are processed serially on the calling thread.
0180  * @param function    the function to call for each tile.
0181  * @param context     the first argument passed to the specified function.
0182  * @param range       the number of items on the 1D grid to process.
0183  * @param tile        the maximum number of items on the 1D grid to process in
0184  *    one function call.
0185  * @param flags       a bitwise combination of zero or more optional flags
0186  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0187  */
0188 void pthreadpool_parallelize_1d_tile_1d(
0189     pthreadpool_t threadpool,
0190     pthreadpool_task_1d_tile_1d_t function,
0191     void* context,
0192     size_t range,
0193     size_t tile,
0194     uint32_t flags);
0195 
0196 /**
0197  * Process items on a 2D grid.
0198  *
0199  * The function implements a parallel version of the following snippet:
0200  *
0201  *   for (size_t i = 0; i < range_i; i++)
0202  *     for (size_t j = 0; j < range_j; j++)
0203  *       function(context, i, j);
0204  *
0205  * When the function returns, all items have been processed and the thread pool
0206  * is ready for a new task.
0207  *
0208  * @note If multiple threads call this function with the same thread pool, the
0209  *    calls are serialized.
0210  *
0211  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0212  *    is NULL, all items are processed serially on the calling thread.
0213  * @param function    the function to call for each item.
0214  * @param context     the first argument passed to the specified function.
0215  * @param range_i     the number of items to process along the first dimension
0216  *    of the 2D grid.
0217  * @param range_j     the number of items to process along the second dimension
0218  *    of the 2D grid.
0219  * @param flags       a bitwise combination of zero or more optional flags
0220  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0221  */
0222 void pthreadpool_parallelize_2d(
0223     pthreadpool_t threadpool,
0224     pthreadpool_task_2d_t function,
0225     void* context,
0226     size_t range_i,
0227     size_t range_j,
0228     uint32_t flags);
0229 
0230 /**
0231  * Process items on a 2D grid with the specified maximum tile size along the
0232  * last grid dimension.
0233  *
0234  * The function implements a parallel version of the following snippet:
0235  *
0236  *   for (size_t i = 0; i < range_i; i++)
0237  *     for (size_t j = 0; j < range_j; j += tile_j)
0238  *       function(context, i, j, min(range_j - j, tile_j));
0239  *
0240  * When the function returns, all items have been processed and the thread pool
0241  * is ready for a new task.
0242  *
0243  * @note If multiple threads call this function with the same thread pool, the
0244  *    calls are serialized.
0245  *
0246  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0247  *    is NULL, all items are processed serially on the calling thread.
0248  * @param function    the function to call for each tile.
0249  * @param context     the first argument passed to the specified function.
0250  * @param range_i     the number of items to process along the first dimension
0251  *    of the 2D grid.
0252  * @param range_j     the number of items to process along the second dimension
0253  *    of the 2D grid.
0254  * @param tile_j      the maximum number of items along the second dimension of
0255  *    the 2D grid to process in one function call.
0256  * @param flags       a bitwise combination of zero or more optional flags
0257  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0258  */
0259 void pthreadpool_parallelize_2d_tile_1d(
0260     pthreadpool_t threadpool,
0261     pthreadpool_task_2d_tile_1d_t function,
0262     void* context,
0263     size_t range_i,
0264     size_t range_j,
0265     size_t tile_j,
0266     uint32_t flags);
0267 
0268 /**
0269  * Process items on a 2D grid with the specified maximum tile size along each
0270  * grid dimension.
0271  *
0272  * The function implements a parallel version of the following snippet:
0273  *
0274  *   for (size_t i = 0; i < range_i; i += tile_i)
0275  *     for (size_t j = 0; j < range_j; j += tile_j)
0276  *       function(context, i, j,
0277  *         min(range_i - i, tile_i), min(range_j - j, tile_j));
0278  *
0279  * When the function returns, all items have been processed and the thread pool
0280  * is ready for a new task.
0281  *
0282  * @note If multiple threads call this function with the same thread pool, the
0283  *    calls are serialized.
0284  *
0285  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0286  *    is NULL, all items are processed serially on the calling thread.
0287  * @param function    the function to call for each tile.
0288  * @param context     the first argument passed to the specified function.
0289  * @param range_i     the number of items to process along the first dimension
0290  *    of the 2D grid.
0291  * @param range_j     the number of items to process along the second dimension
0292  *    of the 2D grid.
0293  * @param tile_j      the maximum number of items along the first dimension of
0294  *    the 2D grid to process in one function call.
0295  * @param tile_j      the maximum number of items along the second dimension of
0296  *    the 2D grid to process in one function call.
0297  * @param flags       a bitwise combination of zero or more optional flags
0298  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0299  */
0300 void pthreadpool_parallelize_2d_tile_2d(
0301     pthreadpool_t threadpool,
0302     pthreadpool_task_2d_tile_2d_t function,
0303     void* context,
0304     size_t range_i,
0305     size_t range_j,
0306     size_t tile_i,
0307     size_t tile_j,
0308     uint32_t flags);
0309 
0310 /**
0311  * Process items on a 2D grid with the specified maximum tile size along each
0312  * grid dimension using a microarchitecture-aware task function.
0313  *
0314  * The function implements a parallel version of the following snippet:
0315  *
0316  *   uint32_t uarch_index = cpuinfo_initialize() ?
0317  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
0318  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
0319  *   for (size_t i = 0; i < range_i; i += tile_i)
0320  *     for (size_t j = 0; j < range_j; j += tile_j)
0321  *       function(context, uarch_index, i, j,
0322  *         min(range_i - i, tile_i), min(range_j - j, tile_j));
0323  *
0324  * When the function returns, all items have been processed and the thread pool
0325  * is ready for a new task.
0326  *
0327  * @note If multiple threads call this function with the same thread pool, the
0328  *    calls are serialized.
0329  *
0330  * @param threadpool           the thread pool to use for parallelisation. If
0331  *    threadpool is NULL, all items are processed serially on the calling
0332  *    thread.
0333  * @param function             the function to call for each tile.
0334  * @param context              the first argument passed to the specified
0335  *    function.
0336  * @param default_uarch_index  the microarchitecture index to use when
0337  *                             pthreadpool is configured without cpuinfo,
0338  *                             cpuinfo initialization failed, or index returned
0339  *                             by cpuinfo_get_current_uarch_index() exceeds
0340  *                             the max_uarch_index value.
0341  * @param max_uarch_index      the maximum microarchitecture index expected
0342  *                             by the specified function. If the index returned
0343  *                             by cpuinfo_get_current_uarch_index() exceeds this
0344  *                             value, default_uarch_index will be used instead.
0345  *                             default_uarch_index can exceed max_uarch_index.
0346  * @param range_i              the number of items to process along the first
0347  *    dimension of the 2D grid.
0348  * @param range_j              the number of items to process along the second
0349  *    dimension of the 2D grid.
0350  * @param tile_j               the maximum number of items along the first
0351  *    dimension of the 2D grid to process in one function call.
0352  * @param tile_j               the maximum number of items along the second
0353  *    dimension of the 2D grid to process in one function call.
0354  * @param flags                a bitwise combination of zero or more optional
0355  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
0356  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
0357  */
0358 void pthreadpool_parallelize_2d_tile_2d_with_uarch(
0359     pthreadpool_t threadpool,
0360     pthreadpool_task_2d_tile_2d_with_id_t function,
0361     void* context,
0362     uint32_t default_uarch_index,
0363     uint32_t max_uarch_index,
0364     size_t range_i,
0365     size_t range_j,
0366     size_t tile_i,
0367     size_t tile_j,
0368     uint32_t flags);
0369 
0370 /**
0371  * Process items on a 3D grid.
0372  *
0373  * The function implements a parallel version of the following snippet:
0374  *
0375  *   for (size_t i = 0; i < range_i; i++)
0376  *     for (size_t j = 0; j < range_j; j++)
0377  *       for (size_t k = 0; k < range_k; k++)
0378  *         function(context, i, j, k);
0379  *
0380  * When the function returns, all items have been processed and the thread pool
0381  * is ready for a new task.
0382  *
0383  * @note If multiple threads call this function with the same thread pool, the
0384  *    calls are serialized.
0385  *
0386  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0387  *    is NULL, all items are processed serially on the calling thread.
0388  * @param function    the function to call for each tile.
0389  * @param context     the first argument passed to the specified function.
0390  * @param range_i     the number of items to process along the first dimension
0391  *    of the 3D grid.
0392  * @param range_j     the number of items to process along the second dimension
0393  *    of the 3D grid.
0394  * @param range_k     the number of items to process along the third dimension
0395  *    of the 3D grid.
0396  * @param flags       a bitwise combination of zero or more optional flags
0397  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0398  */
0399 void pthreadpool_parallelize_3d(
0400     pthreadpool_t threadpool,
0401     pthreadpool_task_3d_t function,
0402     void* context,
0403     size_t range_i,
0404     size_t range_j,
0405     size_t range_k,
0406     uint32_t flags);
0407 
0408 /**
0409  * Process items on a 3D grid with the specified maximum tile size along the
0410  * last grid dimension.
0411  *
0412  * The function implements a parallel version of the following snippet:
0413  *
0414  *   for (size_t i = 0; i < range_i; i++)
0415  *     for (size_t j = 0; j < range_j; j++)
0416  *       for (size_t k = 0; k < range_k; k += tile_k)
0417  *         function(context, i, j, k, min(range_k - k, tile_k));
0418  *
0419  * When the function returns, all items have been processed and the thread pool
0420  * is ready for a new task.
0421  *
0422  * @note If multiple threads call this function with the same thread pool, the
0423  *    calls are serialized.
0424  *
0425  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0426  *    is NULL, all items are processed serially on the calling thread.
0427  * @param function    the function to call for each tile.
0428  * @param context     the first argument passed to the specified function.
0429  * @param range_i     the number of items to process along the first dimension
0430  *    of the 3D grid.
0431  * @param range_j     the number of items to process along the second dimension
0432  *    of the 3D grid.
0433  * @param range_k     the number of items to process along the third dimension
0434  *    of the 3D grid.
0435  * @param tile_k      the maximum number of items along the third dimension of
0436  *    the 3D grid to process in one function call.
0437  * @param flags       a bitwise combination of zero or more optional flags
0438  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0439  */
0440 void pthreadpool_parallelize_3d_tile_1d(
0441     pthreadpool_t threadpool,
0442     pthreadpool_task_3d_tile_1d_t function,
0443     void* context,
0444     size_t range_i,
0445     size_t range_j,
0446     size_t range_k,
0447     size_t tile_k,
0448     uint32_t flags);
0449 
0450 /**
0451  * Process items on a 3D grid with the specified maximum tile size along the
0452  * last two grid dimensions.
0453  *
0454  * The function implements a parallel version of the following snippet:
0455  *
0456  *   for (size_t i = 0; i < range_i; i++)
0457  *     for (size_t j = 0; j < range_j; j += tile_j)
0458  *       for (size_t k = 0; k < range_k; k += tile_k)
0459  *         function(context, i, j, k,
0460  *           min(range_j - j, tile_j), min(range_k - k, tile_k));
0461  *
0462  * When the function returns, all items have been processed and the thread pool
0463  * is ready for a new task.
0464  *
0465  * @note If multiple threads call this function with the same thread pool, the
0466  *    calls are serialized.
0467  *
0468  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0469  *    is NULL, all items are processed serially on the calling thread.
0470  * @param function    the function to call for each tile.
0471  * @param context     the first argument passed to the specified function.
0472  * @param range_i     the number of items to process along the first dimension
0473  *    of the 3D grid.
0474  * @param range_j     the number of items to process along the second dimension
0475  *    of the 3D grid.
0476  * @param range_k     the number of items to process along the third dimension
0477  *    of the 3D grid.
0478  * @param tile_j      the maximum number of items along the second dimension of
0479  *    the 3D grid to process in one function call.
0480  * @param tile_k      the maximum number of items along the third dimension of
0481  *    the 3D grid to process in one function call.
0482  * @param flags       a bitwise combination of zero or more optional flags
0483  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0484  */
0485 void pthreadpool_parallelize_3d_tile_2d(
0486     pthreadpool_t threadpool,
0487     pthreadpool_task_3d_tile_2d_t function,
0488     void* context,
0489     size_t range_i,
0490     size_t range_j,
0491     size_t range_k,
0492     size_t tile_j,
0493     size_t tile_k,
0494     uint32_t flags);
0495 
0496 /**
0497  * Process items on a 3D grid with the specified maximum tile size along the
0498  * last two grid dimensions using a microarchitecture-aware task function.
0499  *
0500  * The function implements a parallel version of the following snippet:
0501  *
0502  *   uint32_t uarch_index = cpuinfo_initialize() ?
0503  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
0504  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
0505  *   for (size_t i = 0; i < range_i; i++)
0506  *     for (size_t j = 0; j < range_j; j += tile_j)
0507  *       for (size_t k = 0; k < range_k; k += tile_k)
0508  *         function(context, uarch_index, i, j, k,
0509  *           min(range_j - j, tile_j), min(range_k - k, tile_k));
0510  *
0511  * When the function returns, all items have been processed and the thread pool
0512  * is ready for a new task.
0513  *
0514  * @note If multiple threads call this function with the same thread pool, the
0515  *    calls are serialized.
0516  *
0517  * @param threadpool           the thread pool to use for parallelisation. If
0518  *    threadpool is NULL, all items are processed serially on the calling
0519  *    thread.
0520  * @param function             the function to call for each tile.
0521  * @param context              the first argument passed to the specified
0522  *    function.
0523  * @param default_uarch_index  the microarchitecture index to use when
0524  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
0525  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
0526  *    max_uarch_index value.
0527  * @param max_uarch_index      the maximum microarchitecture index expected by
0528  *    the specified function. If the index returned by
0529  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
0530  *    will be used instead. default_uarch_index can exceed max_uarch_index.
0531  * @param range_i              the number of items to process along the first
0532  *    dimension of the 3D grid.
0533  * @param range_j              the number of items to process along the second
0534  *    dimension of the 3D grid.
0535  * @param range_k              the number of items to process along the third
0536  *    dimension of the 3D grid.
0537  * @param tile_j               the maximum number of items along the second
0538  *    dimension of the 3D grid to process in one function call.
0539  * @param tile_k               the maximum number of items along the third
0540  *    dimension of the 3D grid to process in one function call.
0541  * @param flags                a bitwise combination of zero or more optional
0542  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
0543  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
0544  */
0545 void pthreadpool_parallelize_3d_tile_2d_with_uarch(
0546     pthreadpool_t threadpool,
0547     pthreadpool_task_3d_tile_2d_with_id_t function,
0548     void* context,
0549     uint32_t default_uarch_index,
0550     uint32_t max_uarch_index,
0551     size_t range_i,
0552     size_t range_j,
0553     size_t range_k,
0554     size_t tile_j,
0555     size_t tile_k,
0556     uint32_t flags);
0557 
0558 /**
0559  * Process items on a 4D grid.
0560  *
0561  * The function implements a parallel version of the following snippet:
0562  *
0563  *   for (size_t i = 0; i < range_i; i++)
0564  *     for (size_t j = 0; j < range_j; j++)
0565  *       for (size_t k = 0; k < range_k; k++)
0566  *         for (size_t l = 0; l < range_l; l++)
0567  *           function(context, i, j, k, l);
0568  *
0569  * When the function returns, all items have been processed and the thread pool
0570  * is ready for a new task.
0571  *
0572  * @note If multiple threads call this function with the same thread pool, the
0573  *    calls are serialized.
0574  *
0575  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0576  *    is NULL, all items are processed serially on the calling thread.
0577  * @param function    the function to call for each tile.
0578  * @param context     the first argument passed to the specified function.
0579  * @param range_i     the number of items to process along the first dimension
0580  *    of the 4D grid.
0581  * @param range_j     the number of items to process along the second dimension
0582  *    of the 4D grid.
0583  * @param range_k     the number of items to process along the third dimension
0584  *    of the 4D grid.
0585  * @param range_l     the number of items to process along the fourth dimension
0586  *    of the 4D grid.
0587  * @param flags       a bitwise combination of zero or more optional flags
0588  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0589  */
0590 void pthreadpool_parallelize_4d(
0591     pthreadpool_t threadpool,
0592     pthreadpool_task_4d_t function,
0593     void* context,
0594     size_t range_i,
0595     size_t range_j,
0596     size_t range_k,
0597     size_t range_l,
0598     uint32_t flags);
0599 
0600 /**
0601  * Process items on a 4D grid with the specified maximum tile size along the
0602  * last grid dimension.
0603  *
0604  * The function implements a parallel version of the following snippet:
0605  *
0606  *   for (size_t i = 0; i < range_i; i++)
0607  *     for (size_t j = 0; j < range_j; j++)
0608  *       for (size_t k = 0; k < range_k; k++)
0609  *         for (size_t l = 0; l < range_l; l += tile_l)
0610  *           function(context, i, j, k, l, min(range_l - l, tile_l));
0611  *
0612  * When the function returns, all items have been processed and the thread pool
0613  * is ready for a new task.
0614  *
0615  * @note If multiple threads call this function with the same thread pool, the
0616  *    calls are serialized.
0617  *
0618  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0619  *    is NULL, all items are processed serially on the calling thread.
0620  * @param function    the function to call for each tile.
0621  * @param context     the first argument passed to the specified function.
0622  * @param range_i     the number of items to process along the first dimension
0623  *    of the 4D grid.
0624  * @param range_j     the number of items to process along the second dimension
0625  *    of the 4D grid.
0626  * @param range_k     the number of items to process along the third dimension
0627  *    of the 4D grid.
0628  * @param range_l     the number of items to process along the fourth dimension
0629  *    of the 4D grid.
0630  * @param tile_l      the maximum number of items along the fourth dimension of
0631  *    the 4D grid to process in one function call.
0632  * @param flags       a bitwise combination of zero or more optional flags
0633  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0634  */
0635 void pthreadpool_parallelize_4d_tile_1d(
0636     pthreadpool_t threadpool,
0637     pthreadpool_task_4d_tile_1d_t function,
0638     void* context,
0639     size_t range_i,
0640     size_t range_j,
0641     size_t range_k,
0642     size_t range_l,
0643     size_t tile_l,
0644     uint32_t flags);
0645 
0646 /**
0647  * Process items on a 4D grid with the specified maximum tile size along the
0648  * last two grid dimensions.
0649  *
0650  * The function implements a parallel version of the following snippet:
0651  *
0652  *   for (size_t i = 0; i < range_i; i++)
0653  *     for (size_t j = 0; j < range_j; j++)
0654  *       for (size_t k = 0; k < range_k; k += tile_k)
0655  *         for (size_t l = 0; l < range_l; l += tile_l)
0656  *           function(context, i, j, k, l,
0657  *             min(range_k - k, tile_k), min(range_l - l, tile_l));
0658  *
0659  * When the function returns, all items have been processed and the thread pool
0660  * is ready for a new task.
0661  *
0662  * @note If multiple threads call this function with the same thread pool, the
0663  *    calls are serialized.
0664  *
0665  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0666  *    is NULL, all items are processed serially on the calling thread.
0667  * @param function    the function to call for each tile.
0668  * @param context     the first argument passed to the specified function.
0669  * @param range_i     the number of items to process along the first dimension
0670  *    of the 4D grid.
0671  * @param range_j     the number of items to process along the second dimension
0672  *    of the 4D grid.
0673  * @param range_k     the number of items to process along the third dimension
0674  *    of the 4D grid.
0675  * @param range_l     the number of items to process along the fourth dimension
0676  *    of the 4D grid.
0677  * @param tile_k      the maximum number of items along the third dimension of
0678  *    the 4D grid to process in one function call.
0679  * @param tile_l      the maximum number of items along the fourth dimension of
0680  *    the 4D grid to process in one function call.
0681  * @param flags       a bitwise combination of zero or more optional flags
0682  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0683  */
0684 void pthreadpool_parallelize_4d_tile_2d(
0685     pthreadpool_t threadpool,
0686     pthreadpool_task_4d_tile_2d_t function,
0687     void* context,
0688     size_t range_i,
0689     size_t range_j,
0690     size_t range_k,
0691     size_t range_l,
0692     size_t tile_k,
0693     size_t tile_l,
0694     uint32_t flags);
0695 
0696 /**
0697  * Process items on a 4D grid with the specified maximum tile size along the
0698  * last two grid dimensions using a microarchitecture-aware task function.
0699  *
0700  * The function implements a parallel version of the following snippet:
0701  *
0702  *   uint32_t uarch_index = cpuinfo_initialize() ?
0703  *       cpuinfo_get_current_uarch_index() : default_uarch_index;
0704  *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
0705  *   for (size_t i = 0; i < range_i; i++)
0706  *     for (size_t j = 0; j < range_j; j++)
0707  *       for (size_t k = 0; k < range_k; k += tile_k)
0708  *         for (size_t l = 0; l < range_l; l += tile_l)
0709  *           function(context, uarch_index, i, j, k, l,
0710  *             min(range_k - k, tile_k), min(range_l - l, tile_l));
0711  *
0712  * When the function returns, all items have been processed and the thread pool
0713  * is ready for a new task.
0714  *
0715  * @note If multiple threads call this function with the same thread pool, the
0716  *    calls are serialized.
0717  *
0718  * @param threadpool           the thread pool to use for parallelisation. If
0719  *    threadpool is NULL, all items are processed serially on the calling
0720  *    thread.
0721  * @param function             the function to call for each tile.
0722  * @param context              the first argument passed to the specified
0723  *    function.
0724  * @param default_uarch_index  the microarchitecture index to use when
0725  *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
0726  *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
0727  *    max_uarch_index value.
0728  * @param max_uarch_index      the maximum microarchitecture index expected by
0729  *    the specified function. If the index returned by
0730  *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
0731  *    will be used instead. default_uarch_index can exceed max_uarch_index.
0732  * @param range_i              the number of items to process along the first
0733  *    dimension of the 4D grid.
0734  * @param range_j              the number of items to process along the second
0735  *    dimension of the 4D grid.
0736  * @param range_k              the number of items to process along the third
0737  *    dimension of the 4D grid.
0738  * @param range_l              the number of items to process along the fourth
0739  *    dimension of the 4D grid.
0740  * @param tile_k               the maximum number of items along the third
0741  *    dimension of the 4D grid to process in one function call.
0742  * @param tile_l               the maximum number of items along the fourth
0743  *    dimension of the 4D grid to process in one function call.
0744  * @param flags                a bitwise combination of zero or more optional
0745  *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
0746  *    PTHREADPOOL_FLAG_YIELD_WORKERS)
0747  */
0748 void pthreadpool_parallelize_4d_tile_2d_with_uarch(
0749     pthreadpool_t threadpool,
0750     pthreadpool_task_4d_tile_2d_with_id_t function,
0751     void* context,
0752     uint32_t default_uarch_index,
0753     uint32_t max_uarch_index,
0754     size_t range_i,
0755     size_t range_j,
0756     size_t range_k,
0757     size_t range_l,
0758     size_t tile_k,
0759     size_t tile_l,
0760     uint32_t flags);
0761 
0762 /**
0763  * Process items on a 5D grid.
0764  *
0765  * The function implements a parallel version of the following snippet:
0766  *
0767  *   for (size_t i = 0; i < range_i; i++)
0768  *     for (size_t j = 0; j < range_j; j++)
0769  *       for (size_t k = 0; k < range_k; k++)
0770  *         for (size_t l = 0; l < range_l; l++)
0771  *           for (size_t m = 0; m < range_m; m++)
0772  *             function(context, i, j, k, l, m);
0773  *
0774  * When the function returns, all items have been processed and the thread pool
0775  * is ready for a new task.
0776  *
0777  * @note If multiple threads call this function with the same thread pool, the
0778  *    calls are serialized.
0779  *
0780  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0781  *    is NULL, all items are processed serially on the calling thread.
0782  * @param function    the function to call for each tile.
0783  * @param context     the first argument passed to the specified function.
0784  * @param range_i     the number of items to process along the first dimension
0785  *    of the 5D grid.
0786  * @param range_j     the number of items to process along the second dimension
0787  *    of the 5D grid.
0788  * @param range_k     the number of items to process along the third dimension
0789  *    of the 5D grid.
0790  * @param range_l     the number of items to process along the fourth dimension
0791  *    of the 5D grid.
0792  * @param range_m     the number of items to process along the fifth dimension
0793  *    of the 5D grid.
0794  * @param flags       a bitwise combination of zero or more optional flags
0795  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0796  */
0797 void pthreadpool_parallelize_5d(
0798     pthreadpool_t threadpool,
0799     pthreadpool_task_5d_t function,
0800     void* context,
0801     size_t range_i,
0802     size_t range_j,
0803     size_t range_k,
0804     size_t range_l,
0805     size_t range_m,
0806     uint32_t flags);
0807 
0808 /**
0809  * Process items on a 5D grid with the specified maximum tile size along the
0810  * last grid dimension.
0811  *
0812  * The function implements a parallel version of the following snippet:
0813  *
0814  *   for (size_t i = 0; i < range_i; i++)
0815  *     for (size_t j = 0; j < range_j; j++)
0816  *       for (size_t k = 0; k < range_k; k++)
0817  *         for (size_t l = 0; l < range_l; l++)
0818  *           for (size_t m = 0; m < range_m; m += tile_m)
0819  *             function(context, i, j, k, l, m, min(range_m - m, tile_m));
0820  *
0821  * When the function returns, all items have been processed and the thread pool
0822  * is ready for a new task.
0823  *
0824  * @note If multiple threads call this function with the same thread pool, the
0825  *    calls are serialized.
0826  *
0827  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0828  *    is NULL, all items are processed serially on the calling thread.
0829  * @param function    the function to call for each tile.
0830  * @param context     the first argument passed to the specified function.
0831  * @param range_i     the number of items to process along the first dimension
0832  *    of the 5D grid.
0833  * @param range_j     the number of items to process along the second dimension
0834  *    of the 5D grid.
0835  * @param range_k     the number of items to process along the third dimension
0836  *    of the 5D grid.
0837  * @param range_l     the number of items to process along the fourth dimension
0838  *    of the 5D grid.
0839  * @param range_m     the number of items to process along the fifth dimension
0840  *    of the 5D grid.
0841  * @param tile_m      the maximum number of items along the fifth dimension of
0842  *    the 5D grid to process in one function call.
0843  * @param flags       a bitwise combination of zero or more optional flags
0844  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0845  */
0846 void pthreadpool_parallelize_5d_tile_1d(
0847     pthreadpool_t threadpool,
0848     pthreadpool_task_5d_tile_1d_t function,
0849     void* context,
0850     size_t range_i,
0851     size_t range_j,
0852     size_t range_k,
0853     size_t range_l,
0854     size_t range_m,
0855     size_t tile_m,
0856     uint32_t flags);
0857 
0858 /**
0859  * Process items on a 5D grid with the specified maximum tile size along the
0860  * last two grid dimensions.
0861  *
0862  * The function implements a parallel version of the following snippet:
0863  *
0864  *   for (size_t i = 0; i < range_i; i++)
0865  *     for (size_t j = 0; j < range_j; j++)
0866  *       for (size_t k = 0; k < range_k; k++)
0867  *         for (size_t l = 0; l < range_l; l += tile_l)
0868  *           for (size_t m = 0; m < range_m; m += tile_m)
0869  *             function(context, i, j, k, l, m,
0870  *               min(range_l - l, tile_l), min(range_m - m, tile_m));
0871  *
0872  * When the function returns, all items have been processed and the thread pool
0873  * is ready for a new task.
0874  *
0875  * @note If multiple threads call this function with the same thread pool, the
0876  *    calls are serialized.
0877  *
0878  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0879  *    is NULL, all items are processed serially on the calling thread.
0880  * @param function    the function to call for each tile.
0881  * @param context     the first argument passed to the specified function.
0882  * @param range_i     the number of items to process along the first dimension
0883  *    of the 5D grid.
0884  * @param range_j     the number of items to process along the second dimension
0885  *    of the 5D grid.
0886  * @param range_k     the number of items to process along the third dimension
0887  *    of the 5D grid.
0888  * @param range_l     the number of items to process along the fourth dimension
0889  *    of the 5D grid.
0890  * @param range_m     the number of items to process along the fifth dimension
0891  *    of the 5D grid.
0892  * @param tile_l      the maximum number of items along the fourth dimension of
0893  *    the 5D grid to process in one function call.
0894  * @param tile_m      the maximum number of items along the fifth dimension of
0895  *    the 5D grid to process in one function call.
0896  * @param flags       a bitwise combination of zero or more optional flags
0897  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0898  */
0899 void pthreadpool_parallelize_5d_tile_2d(
0900     pthreadpool_t threadpool,
0901     pthreadpool_task_5d_tile_2d_t function,
0902     void* context,
0903     size_t range_i,
0904     size_t range_j,
0905     size_t range_k,
0906     size_t range_l,
0907     size_t range_m,
0908     size_t tile_l,
0909     size_t tile_m,
0910     uint32_t flags);
0911 
0912 /**
0913  * Process items on a 6D grid.
0914  *
0915  * The function implements a parallel version of the following snippet:
0916  *
0917  *   for (size_t i = 0; i < range_i; i++)
0918  *     for (size_t j = 0; j < range_j; j++)
0919  *       for (size_t k = 0; k < range_k; k++)
0920  *         for (size_t l = 0; l < range_l; l++)
0921  *           for (size_t m = 0; m < range_m; m++)
0922  *             for (size_t n = 0; n < range_n; n++)
0923  *               function(context, i, j, k, l, m, n);
0924  *
0925  * When the function returns, all items have been processed and the thread pool
0926  * is ready for a new task.
0927  *
0928  * @note If multiple threads call this function with the same thread pool, the
0929  *    calls are serialized.
0930  *
0931  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0932  *    is NULL, all items are processed serially on the calling thread.
0933  * @param function    the function to call for each tile.
0934  * @param context     the first argument passed to the specified function.
0935  * @param range_i     the number of items to process along the first dimension
0936  *    of the 6D grid.
0937  * @param range_j     the number of items to process along the second dimension
0938  *    of the 6D grid.
0939  * @param range_k     the number of items to process along the third dimension
0940  *    of the 6D grid.
0941  * @param range_l     the number of items to process along the fourth dimension
0942  *    of the 6D grid.
0943  * @param range_m     the number of items to process along the fifth dimension
0944  *    of the 6D grid.
0945  * @param range_n     the number of items to process along the sixth dimension
0946  *    of the 6D grid.
0947  * @param tile_n      the maximum number of items along the sixth dimension of
0948  *    the 6D grid to process in one function call.
0949  * @param flags       a bitwise combination of zero or more optional flags
0950  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
0951  */
0952 void pthreadpool_parallelize_6d(
0953   pthreadpool_t threadpool,
0954   pthreadpool_task_6d_t function,
0955   void* context,
0956   size_t range_i,
0957   size_t range_j,
0958   size_t range_k,
0959   size_t range_l,
0960   size_t range_m,
0961   size_t range_n,
0962   uint32_t flags);
0963 
0964 /**
0965  * Process items on a 6D grid with the specified maximum tile size along the
0966  * last grid dimension.
0967  *
0968  * The function implements a parallel version of the following snippet:
0969  *
0970  *   for (size_t i = 0; i < range_i; i++)
0971  *     for (size_t j = 0; j < range_j; j++)
0972  *       for (size_t k = 0; k < range_k; k++)
0973  *         for (size_t l = 0; l < range_l; l++)
0974  *           for (size_t m = 0; m < range_m; m++)
0975  *             for (size_t n = 0; n < range_n; n += tile_n)
0976  *               function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
0977  *
0978  * When the function returns, all items have been processed and the thread pool
0979  * is ready for a new task.
0980  *
0981  * @note If multiple threads call this function with the same thread pool, the
0982  *    calls are serialized.
0983  *
0984  * @param threadpool  the thread pool to use for parallelisation. If threadpool
0985  *    is NULL, all items are processed serially on the calling thread.
0986  * @param function    the function to call for each tile.
0987  * @param context     the first argument passed to the specified function.
0988  * @param range_i     the number of items to process along the first dimension
0989  *    of the 6D grid.
0990  * @param range_j     the number of items to process along the second dimension
0991  *    of the 6D grid.
0992  * @param range_k     the number of items to process along the third dimension
0993  *    of the 6D grid.
0994  * @param range_l     the number of items to process along the fourth dimension
0995  *    of the 6D grid.
0996  * @param range_m     the number of items to process along the fifth dimension
0997  *    of the 6D grid.
0998  * @param range_n     the number of items to process along the sixth dimension
0999  *    of the 6D grid.
1000  * @param tile_n      the maximum number of items along the sixth dimension of
1001  *    the 6D grid to process in one function call.
1002  * @param flags       a bitwise combination of zero or more optional flags
1003  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1004  */
1005 void pthreadpool_parallelize_6d_tile_1d(
1006   pthreadpool_t threadpool,
1007   pthreadpool_task_6d_tile_1d_t function,
1008   void* context,
1009   size_t range_i,
1010   size_t range_j,
1011   size_t range_k,
1012   size_t range_l,
1013   size_t range_m,
1014   size_t range_n,
1015   size_t tile_n,
1016   uint32_t flags);
1017 
1018 /**
1019  * Process items on a 6D grid with the specified maximum tile size along the
1020  * last two grid dimensions.
1021  *
1022  * The function implements a parallel version of the following snippet:
1023  *
1024  *   for (size_t i = 0; i < range_i; i++)
1025  *     for (size_t j = 0; j < range_j; j++)
1026  *       for (size_t k = 0; k < range_k; k++)
1027  *         for (size_t l = 0; l < range_l; l++)
1028  *           for (size_t m = 0; m < range_m; m += tile_m)
1029  *             for (size_t n = 0; n < range_n; n += tile_n)
1030  *               function(context, i, j, k, l, m, n,
1031  *                 min(range_m - m, tile_m), min(range_n - n, tile_n));
1032  *
1033  * When the function returns, all items have been processed and the thread pool
1034  * is ready for a new task.
1035  *
1036  * @note If multiple threads call this function with the same thread pool, the
1037  *    calls are serialized.
1038  *
1039  * @param threadpool  the thread pool to use for parallelisation. If threadpool
1040  *    is NULL, all items are processed serially on the calling thread.
1041  * @param function    the function to call for each tile.
1042  * @param context     the first argument passed to the specified function.
1043  * @param range_i     the number of items to process along the first dimension
1044  *    of the 6D grid.
1045  * @param range_j     the number of items to process along the second dimension
1046  *    of the 6D grid.
1047  * @param range_k     the number of items to process along the third dimension
1048  *    of the 6D grid.
1049  * @param range_l     the number of items to process along the fourth dimension
1050  *    of the 6D grid.
1051  * @param range_m     the number of items to process along the fifth dimension
1052  *    of the 6D grid.
1053  * @param range_n     the number of items to process along the sixth dimension
1054  *    of the 6D grid.
1055  * @param tile_m      the maximum number of items along the fifth dimension of
1056  *    the 6D grid to process in one function call.
1057  * @param tile_n      the maximum number of items along the sixth dimension of
1058  *    the 6D grid to process in one function call.
1059  * @param flags       a bitwise combination of zero or more optional flags
1060  *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1061  */
1062 void pthreadpool_parallelize_6d_tile_2d(
1063     pthreadpool_t threadpool,
1064     pthreadpool_task_6d_tile_2d_t function,
1065     void* context,
1066     size_t range_i,
1067     size_t range_j,
1068     size_t range_k,
1069     size_t range_l,
1070     size_t range_m,
1071     size_t range_n,
1072     size_t tile_m,
1073     size_t tile_n,
1074     uint32_t flags);
1075 
1076 /**
1077  * Terminates threads in the thread pool and releases associated resources.
1078  *
1079  * @warning  Accessing the thread pool after a call to this function constitutes
1080  *    undefined behaviour and may cause data corruption.
1081  *
1082  * @param[in,out]  threadpool  The thread pool to destroy.
1083  */
1084 void pthreadpool_destroy(pthreadpool_t threadpool);
1085 
1086 
1087 #ifndef PTHREADPOOL_NO_DEPRECATED_API
1088 
1089 /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
1090 #if defined(__GNUC__)
1091     #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
1092 #else
1093     #define PTHREADPOOL_DEPRECATED
1094 #endif
1095 
1096 typedef void (*pthreadpool_function_1d_t)(void*, size_t);
1097 typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t);
1098 typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t);
1099 typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t);
1100 typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
1101 typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
1102 
1103 void pthreadpool_compute_1d(
1104     pthreadpool_t threadpool,
1105     pthreadpool_function_1d_t function,
1106     void* argument,
1107     size_t range) PTHREADPOOL_DEPRECATED;
1108 
1109 void pthreadpool_compute_1d_tiled(
1110     pthreadpool_t threadpool,
1111     pthreadpool_function_1d_tiled_t function,
1112     void* argument,
1113     size_t range,
1114     size_t tile) PTHREADPOOL_DEPRECATED;
1115 
1116 void pthreadpool_compute_2d(
1117     pthreadpool_t threadpool,
1118     pthreadpool_function_2d_t function,
1119     void* argument,
1120     size_t range_i,
1121     size_t range_j) PTHREADPOOL_DEPRECATED;
1122 
1123 void pthreadpool_compute_2d_tiled(
1124     pthreadpool_t threadpool,
1125     pthreadpool_function_2d_tiled_t function,
1126     void* argument,
1127     size_t range_i,
1128     size_t range_j,
1129     size_t tile_i,
1130     size_t tile_j) PTHREADPOOL_DEPRECATED;
1131 
1132 void pthreadpool_compute_3d_tiled(
1133     pthreadpool_t threadpool,
1134     pthreadpool_function_3d_tiled_t function,
1135     void* argument,
1136     size_t range_i,
1137     size_t range_j,
1138     size_t range_k,
1139     size_t tile_i,
1140     size_t tile_j,
1141     size_t tile_k) PTHREADPOOL_DEPRECATED;
1142 
1143 void pthreadpool_compute_4d_tiled(
1144     pthreadpool_t threadpool,
1145     pthreadpool_function_4d_tiled_t function,
1146     void* argument,
1147     size_t range_i,
1148     size_t range_j,
1149     size_t range_k,
1150     size_t range_l,
1151     size_t tile_i,
1152     size_t tile_j,
1153     size_t tile_k,
1154     size_t tile_l) PTHREADPOOL_DEPRECATED;
1155 
1156 #endif /* PTHREADPOOL_NO_DEPRECATED_API */
1157 
1158 #ifdef __cplusplus
1159 } /* extern "C" */
1160 #endif
1161 
1162 #endif /* PTHREADPOOL_H_ */