source/include/H5DOpublic.h

0001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
0002  * Copyright by The HDF Group.                                               *
0003  * All rights reserved.                                                      *
0004  *                                                                           *
0005  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
0006  * terms governing use, modification, and redistribution, is contained in    *
0007  * the COPYING file, which can be found at the root of the source code       *
0008  * distribution tree, or in https://www.hdfgroup.org/licenses.               *
0009  * If you do not have access to either file, you may request a copy from     *
0010  * help@hdfgroup.org.                                                        *
0011  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
0012
0013 #ifndef H5DOpublic_H
0014 #define H5DOpublic_H
0015
0016 #ifdef __cplusplus
0017 extern "C" {
0018 #endif
0019
0020 /** \page H5DO_UG HDF5 High Level Optimizations
0021  * Since version 1.10.3 these functions are deprecated in favor of #H5Dwrite_chunk and #H5Dread_chunk.
0022  *
0023  * \section sec_hldo_direct_chunk Direct Chunk Write Function
0024  * When a user application has a chunked dataset and is trying to write a single chunk of data with
0025  * #H5Dwrite, the data goes through several steps inside the HDF5 library. The library first examines the
0026  * hyperslab selection. Then it converts the data from the datatype in memory to the datatype in the file if
0027  * they are different. Finally, the library processes the data in the filter pipeline. Starting with
0028  * the 1.8.11 release, a new high-level C function called #H5DOwrite_chunk becomes available. It writes a data
0029  * chunk directly to the file bypassing the library’s hyperslab selection, data conversion, and filter
0030  * pipeline processes. In other words, if an application can pre-process the data, then the application can
0031  * use #H5DOwrite_chunk to write the data much faster.
0032  *
0033  * #H5DOwrite_chunk was developed in response to a client request. The client builds X-ray pixel
0034  * detectors for use at synchrotron light sources. These detectors can produce data at the rate of tens of
0035  * gigabytes per second. Before transferring the data over their network, the detectors compress the data
0036  * by a factor of 10 or more. The modular architecture of the detectors can scale up its data stream in
0037  * parallel and maps well to current parallel computing and storage systems.
0038  * See the \ref_rfc20121114 for the original proposal.
0039  *
0040  * \subsection subsec_hldo_direct_chunk_using Using the Direct Chunk Write Function
0041  * Basically, the #H5DOwrite_chunk function takes a pre-processed data chunk (buf) and its size
0042  * (data_size) and writes to the chunk location (offset) in the dataset ( dset_id).
0043  *
0044  * The function prototype is shown below:
0045  * \code
0046  * herr_t H5DOwrite_chunk(
0047  *                        hid_t       dset_id,     // the dataset
0048  *                        hid_t       dxpl_id,     // data transfer property list
0049  *                        uint32_t    filter_mask, // indicates which filters are used
0050  *                        hsize_t*    offset,      // position of the chunk
0051  *                        size_t      data_size,   // size of the actual data
0052  *                        const void* buf          // buffer with data to be written
0053  *                        )
0054  * \endcode
0055  *
0056  * Below is a simple example showing how to use the function:
0057  * <em>Example 1. Using H5DOwrite_chunk</em>
0058  * \code
0059  *     hsize_t offset[2] = {4, 4};
0060  *     uint32_t filter_mask = 0;
0061  *     size_t nbytes = 40;
0062  *     if(H5DOwrite_chunk(dset_id, dxpl, filter_mask, offset, nbytes, data_buf) < 0)
0063  *         goto error;
0064  * \endcode
0065  *
0066  * In the example above, the dataset is 8x8 elements of int. Each chunk is 4x4. The offset of the first
0067  * element of the chunk to be written is 4 and 4. In the diagram below, the shaded chunk is the data to be
0068  * written. The function is writing a pre-compressed data chunk of 40 bytes (assumed) to the dataset. The
0069  * zero value of the filter mask means that all filters have been applied to the pre-processed data.
0070  *
0071  * <table>
0072  * <tr>
0073  * <td>
0074  * \image html DOChunks_fig1.png "Figure 1. Illustration of the chunk to be written"
0075  * </td>
0076  * </tr>
0077  * </table>
0078  *
0079  * The complete code example at the end of this topic shows how to set the value of the filter mask to
0080  * indicate a filter being skipped. The corresponding bit in the filter mask is turned on when a filter is
0081  * skipped. For example, if the second filter is skipped, the second bit of the filter mask should be turned
0082  * on. For more information, see the #H5DOwrite_chunk entry in the \ref RM.
0083  *
0084  * \subsection subsec_hldo_direct_chunk_design The Design
0085  * The following diagram shows how the function #H5DOwrite_chunk bypasses hyperslab selection, data
0086  * conversion, and filter pipeline inside the HDF5 library.
0087  *
0088  * <table>
0089  * <tr>
0090  * <td>
0091  * \image html DOChunks_fig2.png "Figure 2. Diagram for H5DOwrite_chunk"
0092  * </td>
0093  * </tr>
0094  * </table>
0095  *
0096  * \subsection subsec_hldo_direct_chunk_perf Performance
0097  * The table below describes the results of performance benchmark tests run by HDF developers. It shows
0098  * that using the new function #H5DOwrite_chunk to write pre-compressed data is much faster than using
0099  * the #H5Dwrite function to compress and write the same data with the filter pipeline. Measurements
0100  * involving #H5Dwrite include compression time in the filter pipeline. Since the data is already
0101  * compressed before #H5DOwrite_chunk is called, use of #H5DOwrite_chunk to write compressed data
0102  * avoids the performance bottleneck in the HDF5 filter pipeline.
0103  *
0104  * The test was run on a Linux 2.6.18 / 64-bit Intel x86_64 machine. The dataset contained 100 chunks.
0105  * Only one chunk was written to the file per write call. The number of writes was 100. The time
0106  * measurement was for the entire dataset with the Unix system function gettimeofday. Writing the
0107  * entire dataset with one write call took almost the same amount of time as writing chunk by chunk. In
0108  * order to force the system to flush the data to the file, the O_SYNC flag was used to open the file.
0109  *
0110  * <em>Table 1. Performance result for H5DOwrite_chunk in the high-level library</em>
0111  * <table>
0112  * <tr>
0113  * <td>Dataset size (MB)</td><td span='2'>95.37</td><td span='2'>762.94</td><td span='2'>2288.82</td>
0114  * </tr>
0115  * <tr>
0116  * <td>Size after compression (MB)</td><td span='2'>64.14</td><td span='2'>512.94</td><td
0117  * span='2'>1538.81</td>
0118  * </tr>
0119  * <tr>
0120  * <td>Dataset dimensionality</td><td span='2'>100x1000x250</td><td span='2'>100x2000x1000</td><td
0121  * span='2'>100x2000x3000</td>
0122  * </tr>
0123  * <tr>
0124  * <td>Chunk dimensionality</td><td span='2'>1000x250</td><td span='2'>2000x1000</td><td
0125  * span='2'>2000x3000</td>
0126  * </tr>
0127  * <tr>
0128  * <td>Datatype</td><td span='2'>4-byte integer</td><td span='2'>4-byte integer</td><td span='2'>4-byte
0129  * integer</td>
0130  * </tr>
0131  * <tr>
0132  * <th>IO speed is in MB/s and Time is in second
0133  * (s).</th><th>speed1</th><th>time2</th><th>speed</th><th>time</th><th>speed</th><th>time</th>
0134  * </tr>
0135  * <tr>
0136  * <td>H5Dwrite writes without compression
0137  * filter</td><td>77.27</td><td>1.23</td><td>97.02</td><td>7.86</td><td>91.77</td><td>24.94</td>
0138  * </tr>
0139  * <tr>
0140  * <td>H5DOwrite_chunk writes uncompressed
0141  * data</td><td>79</td><td>1.21</td><td>95.71</td><td>7.97</td><td>89.17</td><td>25.67</td>
0142  * </tr>
0143  * <tr>
0144  * <td>H5Dwrite writes with compression
0145  * filter</td><td>2.68</td><td>35.59</td><td>2.67</td><td>285.75</td><td>2.67</td><td>857.24</td>
0146  * </tr>
0147  * <tr>
0148  * <td>H5DOwrite_chunk writes compressed
0149  * data</td><td>77.19</td><td>0.83</td><td>78.56</td><td>6.53</td><td>96.28</td><td>15.98</td>
0150  * </tr>
0151  * <tr>
0152  * <td>Unix writes compressed data to Unix
0153  * file</td><td>76.49</td><td>0.84</td><td>95</td><td>5.4</td><td>98.59</td><td>15.61</td>
0154  * </tr>
0155  * </table>
0156  *
0157  * \subsection subsec_hldo_direct_chunk_caution A Word of Caution
0158  * Since #H5DOwrite_chunk writes data chunks directly in a file, developers must be careful when using it.
0159  * The function bypasses hyperslab selection, the conversion of data from one datatype to another, and
0160  * the filter pipeline to write the chunk. Developers should have experience with these processes before
0161  * they use this function.
0162  *
0163  * \subsection subsec_hldo_direct_chunk_example A Complete Code Example
0164  * The following is an example of using #H5DOwrite_chunk to write an entire dataset by chunk.
0165  * \code
0166  *     #include <zlib.h>
0167  *     #include <math.h>
0168  *     #define DEFLATE_SIZE_ADJUST(s) (ceil(((double)(s))*1.001)+12)
0169  *
0170  *     size_t       buf_size     = CHUNK_NX*CHUNK_NY*sizeof(int);
0171  *     const Bytef *z_src        = (const Bytef*)(direct_buf);
0172  *     Bytef       *z_dst;         // destination buffer
0173  *     uLongf       z_dst_nbytes = (uLongf)DEFLATE_SIZE_ADJUST(buf_size);
0174  *     uLong        z_src_nbytes = (uLong)buf_size;
0175  *     int          aggression   = 9; // Compression aggression setting
0176  *     uint32_t     filter_mask  = 0;
0177  *     size_t       buf_size     = CHUNK_NX*CHUNK_NY*sizeof(int);
0178  *
0179  *     // Create the data space
0180  *     if((dataspace = H5Screate_simple(RANK, dims, maxdims)) < 0)
0181  *         goto error;
0182  *     // Create a new file
0183  *     if((file = H5Fcreate(FILE_NAME5, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
0184  *         goto error;
0185  *     // Modify dataset creation properties, i.e. enable chunking and compression
0186  *     if((cparms = H5Pcreate(H5P_DATASET_CREATE)) < 0)
0187  *         goto error;
0188  *     if((status = H5Pset_chunk( cparms, RANK, chunk_dims)) < 0)
0189  *         goto error;
0190  *     if((status = H5Pset_deflate( cparms, aggression)) < 0)
0191  *         goto error;
0192  *     // Create a new dataset within the file using cparms creation properties
0193  *     if((dset_id = H5Dcreate2(file, DATASETNAME, H5T_NATIVE_INT, dataspace, H5P_DEFAULT,cparms,
0194  * H5P_DEFAULT)) < 0) goto error;
0195  *     // Initialize data for one chunk
0196  *     for(i = n = 0; i < CHUNK_NX; i++)
0197  *         for(j = 0; j < CHUNK_NY; j++)
0198  *             direct_buf[i][j] = n++;
0199  *     // Allocate output (compressed) buffer
0200  *     outbuf = malloc(z_dst_nbytes);
0201  *     z_dst = (Bytef *)outbuf;
0202  *     // Perform compression from the source to the destination buffer
0203  *     ret = compress2(z_dst, &z_dst_nbytes, z_src, z_src_nbytes, aggression);
0204  *     // Check for various zlib errors
0205  *     if(Z_BUF_ERROR == ret) {
0206  *         fprintf(stderr, "overflow");
0207  *         goto error;
0208  *     } else if(Z_MEM_ERROR == ret) {
0209  *         fprintf(stderr, "deflate memory error");
0210  *         goto error;
0211  *     } else if(Z_OK != ret) {
0212  *         fprintf(stderr, "other deflate error");
0213  *         goto error;
0214  *     }
0215  *     // Write the compressed chunk data repeatedly to cover all the chunks in the dataset, using the direct
0216  * write function. for(i=0; i<NX/CHUNK_NX; i++) { for(j=0; j<NY/CHUNK_NY; j++) { status =
0217  * H5DOwrite_chunk(dset_id, H5P_DEFAULT, filter_mask, offset, z_dst_nbytes, outbuf); offset[1] += CHUNK_NY;
0218  *         }
0219  *         offset[0] += CHUNK_NX;
0220  *         offset[1] = 0;
0221  *     }
0222  *     // Overwrite the first chunk with uncompressed data. Set the filter mask to indicate the compression
0223  * filter is skipped filter_mask = 0x00000001; offset[0] = offset[1] = 0; if(H5DOwrite_chunk(dset_id,
0224  * H5P_DEFAULT, filter_mask, offset, buf_size, direct_buf) < 0) goto error;
0225  *     // Read the entire dataset back for data verification converting ints to longs
0226  *     if(H5Dread(dataset, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, outbuf_long) < 0)
0227  *     goto error;
0228  *     // Data verification here
0229  *     ...
0230  * \endcode
0231  */
0232
0233 /**\defgroup H5DO HDF5 Optimizations APIs (H5DO)
0234  *
0235  * <em>Bypassing default HDF5 behavior in order to optimize for specific
0236  * use cases (H5DO)</em>
0237  *
0238  * HDF5 functions described is this section are implemented in the HDF5 High-level
0239  * library as optimized functions. These functions generally require careful setup
0240  * and testing as they enable an application to bypass portions of the HDF5
0241  * library's I/O pipeline for performance purposes.
0242  *
0243  * These functions are distributed in the standard HDF5 distribution and are
0244  * available any time the HDF5 High-level library is available.
0245  *
0246  * - \ref H5DOappend
0247  *    \n Appends data to a dataset along a specified dimension.
0248  * - \ref H5DOread_chunk
0249  *   \n Reads a raw data chunk directly from a dataset in a file into a buffer (DEPRECATED)
0250  * - \ref H5DOwrite_chunk
0251  *   \n  Writes a raw data chunk from a buffer directly to a dataset in a file (DEPRECATED)
0252  *
0253  */
0254
0255 /*-------------------------------------------------------------------------
0256  *
0257  * "Optimized dataset" routines.
0258  *
0259  *-------------------------------------------------------------------------
0260  */
0261
0262 /**
0263  * --------------------------------------------------------------------------
0264  * \ingroup H5DO
0265  *
0266  * \brief Appends data to a dataset along a specified dimension.
0267  *
0268  * \param[in] dset_id   Dataset identifier
0269  * \param[in] dxpl_id   Dataset transfer property list identifier
0270  * \param[in] axis      Dataset Dimension (0-based) for the append
0271  * \param[in] extension Number of elements to append for the
0272  *                      axis-th dimension
0273  * \param[in] memtype   The memory datatype identifier
0274  * \param[in] buf       Buffer with data for the append
0275  *
0276  * \return \herr_t
0277  *
0278  * \details The H5DOappend() routine extends a dataset by \p extension
0279  *          number of elements along a dimension specified by a
0280  *          dimension \p axis and writes \p buf of elements to the
0281  *          dataset. Dimension \p axis is 0-based. Elements’ type
0282  *          is described by \p memtype.
0283  *
0284  *          This routine combines calling H5Dset_extent(),
0285  *          H5Sselect_hyperslab(), and H5Dwrite() into a single routine
0286  *          that simplifies application development for the common case
0287  *          of appending elements to an existing dataset.
0288  *
0289  *          For a multi-dimensional dataset, appending to one dimension
0290  *          will write a contiguous hyperslab over the other dimensions.
0291  *          For example, if a 3-D dataset has dimension sizes (3, 5, 8),
0292  *          extending the 0th dimension (currently of size 3) by 3 will
0293  *          append 3*5*8 = 120 elements (which must be pointed to by the
0294  *          \p buffer parameter) to the dataset, making its final
0295  *          dimension sizes (6, 5, 8).
0296  *
0297  *          If a dataset has more than one unlimited dimension, any of
0298  *          those dimensions may be appended to, although only along
0299  *          one dimension per call to H5DOappend().
0300  *
0301  * \since   1.10.0
0302  *
0303  */
0304 H5_HLDLL herr_t H5DOappend(hid_t dset_id, hid_t dxpl_id, unsigned axis, size_t extension, hid_t memtype,
0305                            const void *buf);
0306
0307 /* Symbols defined for compatibility with previous versions of the HDF5 API.
0308  *
0309  * Use of these symbols is deprecated.
0310  */
0311 #ifndef H5_NO_DEPRECATED_SYMBOLS
0312
0313 /* Compatibility wrappers for functionality moved to H5D */
0314
0315 /**
0316  * --------------------------------------------------------------------------
0317  * \ingroup H5DO
0318  *
0319  * \brief Writes a raw data chunk from a buffer directly to a dataset in a file.
0320  *
0321  * \param[in] dset_id       Identifier for the dataset to write to
0322  * \param[in] dxpl_id       Transfer property list identifier for
0323  *                          this I/O operation
0324  * \param[in] filters       Mask for identifying the filters in use
0325  * \param[in] offset        Logical position of the chunk's first element
0326  *                          in the dataspace
0327  * \param[in] data_size     Size of the actual data to be written in bytes
0328  * \param[in] buf           Buffer containing data to be written to the chunk
0329  *
0330  * \return \herr_t
0331  *
0332  * \deprecated This function was deprecated in favor of the function
0333  *             H5Dwrite_chunk() of HDF5-1.10.3.
0334  *             The functionality of H5DOwrite_chunk() was moved
0335  *             to H5Dwrite_chunk().
0336  * \deprecated For compatibility, this API call has been left as a stub which
0337  *             simply calls H5Dwrite_chunk(). New code should use H5Dwrite_chunk().
0338  *
0339  * \details The H5DOwrite_chunk() writes a raw data chunk as specified by its
0340  *          logical \p offset in a chunked dataset \p dset_id from the application
0341  *          memory buffer \p buf to the dataset in the file. Typically, the data
0342  *          in \p buf is preprocessed in memory by a custom transformation, such as
0343  *          compression. The chunk will bypass the library's internal data
0344  *          transfer pipeline, including filters, and will be written directly to the file.
0345  *
0346  *          \p dxpl_id is a data transfer property list identifier.
0347  *
0348  *          \p filters is a mask providing a record of which filters are used
0349  *          with the chunk. The default value of the mask is zero (\c 0),
0350  *          indicating that all enabled filters are applied. A filter is skipped
0351  *          if the bit corresponding to the filter's position in the pipeline
0352  *          (<tt>0 ≤ position < 32</tt>) is turned on. This mask is saved
0353  *          with the chunk in the file.
0354  *
0355  *          \p offset is an array specifying the logical position of the first
0356  *          element of the chunk in the dataset's dataspace. The length of the
0357  *          offset array must equal the number of dimensions, or rank, of the
0358  *          dataspace. The values in \p offset must not exceed the dimension limits
0359  *          and must specify a point that falls on a dataset chunk boundary.
0360  *
0361  *          \p data_size is the size in bytes of the chunk, representing the number of
0362  *          bytes to be read from the buffer \p buf. If the data chunk has been
0363  *          precompressed, \p data_size should be the size of the compressed data.
0364  *
0365  *          \p buf is the memory buffer containing data to be written to the chunk in the file.
0366  *
0367  * \attention   Exercise caution when using H5DOread_chunk() and H5DOwrite_chunk(),
0368  *              as they read and write data chunks directly in a file.
0369  *              H5DOwrite_chunk() bypasses hyperslab selection, the conversion of data
0370  *              from one datatype to another, and the filter pipeline to write the chunk.
0371  *              Developers should have experience with these processes before
0372  *              using this function. Please see
0373  *              <a href="https://\DOCURL/advanced_topics/UsingDirectChunkWrite.pdf">
0374  *              Using the Direct Chunk Write Function</a>
0375  *              for more information.
0376  *
0377  * \note    H5DOread_chunk() and H5DOwrite_chunk() are not
0378  *          supported under parallel and do not support variable length types.
0379  *
0380  * \par Example
0381  * The following code illustrates the use of H5DOwrite_chunk to write
0382  * an entire dataset, chunk by chunk:
0383  * \snippet H5DO_examples.c H5DOwrite
0384  *
0385  * \version 1.10.3  Function deprecated in favor of H5Dwrite_chunk.
0386  *
0387  * \since   1.8.11
0388  */
0389 H5_HLDLL herr_t H5DOwrite_chunk(hid_t dset_id, hid_t dxpl_id, uint32_t filters, const hsize_t *offset,
0390                                 size_t data_size, const void *buf);
0391
0392 /**
0393  * --------------------------------------------------------------------------
0394  * \ingroup H5DO
0395  *
0396  * \brief Reads a raw data chunk directly from a dataset in a file into a buffer.
0397  *
0398  * \param[in] dset_id           Identifier for the dataset to be read
0399  * \param[in] dxpl_id           Transfer property list identifier for
0400  *                              this I/O operation
0401  * \param[in] offset            Logical position of the chunk's first
0402                                 element in the dataspace
0403  * \param[in,out] filters       Mask for identifying the filters used
0404  *                              with the chunk
0405  * \param[in] buf               Buffer containing the chunk read from
0406  *                              the dataset
0407  *
0408  * \return \herr_t
0409  *
0410  * \deprecated This function was deprecated in favor of the function
0411  *             H5Dread_chunk() as of HDF5-1.10.3.
0412  *             In HDF5 1.10.3, the functionality of H5DOread_chunk()
0413  *             was moved to H5Dread_chunk().
0414  * \deprecated For compatibility, this API call has been left as a stub which
0415  *             simply calls H5Dread_chunk().  New code should use H5Dread_chunk().
0416  *
0417  * \details The H5DOread_chunk() reads a raw data chunk as specified
0418  *          by its logical \p offset in a chunked dataset \p dset_id
0419  *          from the dataset in the file into the application memory
0420  *          buffer \p buf. The data in \p buf is read directly from the file
0421  *          bypassing the library's internal data transfer pipeline,
0422  *          including filters.
0423  *
0424  *          \p dxpl_id is a data transfer property list identifier.
0425  *
0426  *          The mask \p filters indicates which filters are used with the
0427  *          chunk when written. A zero value indicates that all enabled filters
0428  *          are applied on the chunk. A filter is skipped if the bit corresponding
0429  *          to the filter's position in the pipeline
0430  *          (<tt>0 ≤ position < 32</tt>) is turned on.
0431  *
0432  *          \p offset is an array specifying the logical position of the first
0433  *          element of the chunk in the dataset's dataspace. The length of the
0434  *          offset array must equal the number of dimensions, or rank, of the
0435  *          dataspace. The values in \p offset must not exceed the dimension
0436  *          limits and must specify a point that falls on a dataset chunk boundary.
0437  *
0438  *          \p buf is the memory buffer containing the chunk read from the dataset
0439  *          in the file.
0440  *
0441  * \par Example
0442  * The following code illustrates the use of H5DOread_chunk()
0443  * to read a chunk from a dataset:
0444  * \snippet H5DO_examples.c H5DOread
0445  *
0446  * \version 1.10.3  Function deprecated in favor of H5Dread_chunk.
0447  *
0448  * \since   1.10.2, 1.8.19
0449  */
0450 H5_HLDLL herr_t H5DOread_chunk(hid_t dset_id, hid_t dxpl_id, const hsize_t *offset, uint32_t *filters /*out*/,
0451                                void *buf /*out*/);
0452
0453 #endif /* H5_NO_DEPRECATED_SYMBOLS */
0454
0455 #ifdef __cplusplus
0456 }
0457 #endif
0458
0459 #endif