Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:20:43

0001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
0002  * Copyright by The HDF Group.                                               *
0003  * All rights reserved.                                                      *
0004  *                                                                           *
0005  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
0006  * terms governing use, modification, and redistribution, is contained in    *
0007  * the COPYING file, which can be found at the root of the source code       *
0008  * distribution tree, or in https://www.hdfgroup.org/licenses.               *
0009  * If you do not have access to either file, you may request a copy from     *
0010  * help@hdfgroup.org.                                                        *
0011  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
0012 
0013 /*****************************************************************************
0014  * Read-Only S3 Virtual File Driver (VFD)
0015  *
0016  * This is the header for the S3 Communications module
0017  *
0018  * ***NOT A FILE DRIVER***
0019  *
0020  * Purpose:
0021  *
0022  *     - Provide structures and functions related to communicating with
0023  *       Amazon S3 (Simple Storage Service).
0024  *     - Abstract away the REST API (HTTP,
0025  *       networked communications) behind a series of uniform function calls.
0026  *     - Handle AWS4 authentication, if appropriate.
0027  *     - Eventually, support more S3 operations, such as creating, writing to,
0028  *       and removing Objects remotely.
0029  *
0030  *     translates:
0031  *     `read(some_file, bytes_offset, bytes_length, &dest_buffer);`
0032  *     to:
0033  *     ```
0034  *     GET myfile HTTP/1.1
0035  *     Host: somewhere.me
0036  *     Range: bytes=4096-5115
0037  *     ```
0038  *     and places received bytes from HTTP response...
0039  *     ```
0040  *     HTTP/1.1 206 Partial-Content
0041  *     Content-Range: 4096-5115/63239
0042  *
0043  *     <bytes>
0044  *     ```
0045  *     ...in destination buffer.
0046  *
0047  *****************************************************************************/
0048 
0049 #include "H5private.h" /* Generic Functions        */
0050 #include "H5FDros3.h"  /* ros3 VFD                 */
0051 
0052 #ifdef H5_HAVE_ROS3_VFD
0053 
0054 /* Necessary S3 headers */
0055 #include <curl/curl.h>
0056 #include <openssl/evp.h>
0057 #include <openssl/hmac.h>
0058 #include <openssl/sha.h>
0059 
0060 /**********
0061  * MACROS *
0062  **********/
0063 
0064 /* hexadecimal string of pre-computed sha256 checksum of the empty string
0065  * hex(sha256sum(""))
0066  */
0067 #define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
0068 
0069 /* string length (plus null terminator)
0070  * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_')
0071  */
0072 #define ISO8601_SIZE 17
0073 
0074 /* string length (plus null terminator)
0075  * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT"
0076  */
0077 #define RFC7231_SIZE 30
0078 
0079 /*---------------------------------------------------------------------------
0080  *
0081  * Macro: ISO8601NOW()
0082  *
0083  * Purpose:
0084  *
0085  *     write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest
0086  *     e.g., "20170630T204155Z"
0087  *
0088  *     wrapper for strftime()
0089  *
0090  *     It is left to the programmer to check return value of
0091  *     ISO8601NOW (should equal ISO8601_SIZE - 1).
0092  *
0093  *---------------------------------------------------------------------------
0094  */
0095 #define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm))
0096 
0097 /*---------------------------------------------------------------------------
0098  *
0099  * Macro: RFC7231NOW()
0100  *
0101  * Purpose:
0102  *
0103  *     write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest
0104  *     e.g., "Fri, 30 Jun 2017 20:41:55 GMT"
0105  *
0106  *     wrapper for strftime()
0107  *
0108  *     It is left to the programmer to check return value of
0109  *     RFC7231NOW (should equal RFC7231_SIZE - 1).
0110  *
0111  *---------------------------------------------------------------------------
0112  */
0113 #define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm))
0114 
0115 /* Reasonable maximum length of a credential string.
0116  * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below).
0117  *  17 <- "////aws4_request\0"
0118  *   2 < "s3" (service)
0119  *   8 <- "YYYYmmdd" (date)
0120  * 128 <- (access_id)
0121  * 155 :: sum
0122  */
0123 #define S3COMMS_MAX_CREDENTIAL_SIZE 155
0124 
0125 /*---------------------------------------------------------------------------
0126  * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL()
0127  *
0128  * Purpose:
0129  *
0130  *     Format "S3 Credential" string from inputs, for AWS4.
0131  *
0132  *     Wrapper for snprintf().
0133  *
0134  *     _HAS NO ERROR-CHECKING FACILITIES_
0135  *     It is left to programmer to ensure that return value confers success.
0136  *     e.g.,
0137  *     ```
0138  *     assert( S3COMMS_MAX_CREDENTIAL_SIZE >=
0139  *             S3COMMS_FORMAT_CREDENTIAL(...) );
0140  *     ```
0141  *
0142  *     "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request"
0143  *     assuming that `dest` has adequate space.
0144  *
0145  *     ALL inputs must be null-terminated strings.
0146  *
0147  *     `access` should be the user's access key ID.
0148  *     `date` must be of format "YYYYmmdd".
0149  *     `region` should be relevant AWS region, i.e. "us-east-1".
0150  *     `service` should be "s3".
0151  *---------------------------------------------------------------------------
0152  */
0153 #define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service)                               \
0154     snprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date),      \
0155              (region), (service))
0156 
0157 /*********************
0158  * PUBLIC STRUCTURES *
0159  *********************/
0160 
0161 /*----------------------------------------------------------------------------
0162  * Structure: hrb_node_t
0163  *
0164  * HTTP Header Field Node
0165  *
0166  * Maintain a ordered (linked) list of HTTP Header fields.
0167  *
0168  * Provides efficient access and manipulation of a logical sequence of
0169  * HTTP header fields, of particular use when composing an
0170  * "S3 Canonical Request" for authentication.
0171  *
0172  * - The creation of a Canonical Request involves:
0173  *     - convert field names to lower case
0174  *     - sort by this lower-case name
0175  *     - convert ": " name-value separator in HTTP string to ":"
0176  *     - get sorted lowercase names without field or separator
0177  *
0178  * As HTTP headers allow headers in any order (excepting the case of multiple
0179  * headers with the same name), the list ordering can be optimized for Canonical
0180  * Request creation, suggesting alphabtical order. For more expedient insertion
0181  * and removal of elements in the list, linked list seems preferable to a
0182  * dynamically-expanding array. The usually-smaller number of entries (5 or
0183  * fewer) makes performance overhead of traversing the list trivial.
0184  *
0185  * The above requirements of creating at Canonical Request suggests a reasonable
0186  * trade-off of speed for space with the option to compute elements as needed
0187  * or to have the various elements prepared and stored in the structure
0188  * (e.g. name, value, lowername, concatenated name:value)
0189  * The structure currently is implemented to pre-compute.
0190  *
0191  * At all times, the "first" node of the list should be the least,
0192  * alphabetically. For all nodes, the `next` node should be either NULL or
0193  * of greater alphabetical value.
0194  *
0195  * Each node contains its own header field information, plus a pointer to the
0196  * next node.
0197  *
0198  * It is not allowed to have multiple nodes with the same _lowercase_ `name`s
0199  * in the same list
0200  * (i.e., name is case-insensitive for access and modification.)
0201  *
0202  * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated
0203  * strings allocated specifically for their node.
0204  *
0205  * `name` (char *)
0206  *
0207  *     Case-meaningful name of the HTTP field.
0208  *     Given case is how it is supplied to networking code.
0209  *     e.g., "Range"
0210  *
0211  * `lowername` (char *)
0212  *
0213  *     Lowercase copy of name.
0214  *     e.g., "range"
0215  *
0216  * `value` (char *)
0217  *
0218  *     Case-meaningful value of HTTP field.
0219  *     e.g., "bytes=0-9"
0220  *
0221  * `cat` (char *)
0222  *
0223  *     Concatenated, null-terminated string of HTTP header line,
0224  *     as the field would appear in an HTTP request.
0225  *     e.g., "Range: bytes=0-9"
0226  *
0227  * `next` (hrb_node_t *)
0228  *
0229  *     Pointers to next node in the list, or NULL sentinel as end of list.
0230  *     Next node must have a greater `lowername` as determined by strcmp().
0231  *----------------------------------------------------------------------------
0232  */
0233 typedef struct hrb_node_t {
0234     char              *name;
0235     char              *value;
0236     char              *cat;
0237     char              *lowername;
0238     struct hrb_node_t *next;
0239 } hrb_node_t;
0240 
0241 /*----------------------------------------------------------------------------
0242  * Structure: hrb_t
0243  *
0244  * HTTP Request Buffer structure
0245  *
0246  * Logically represent an HTTP request
0247  *
0248  *     GET /myplace/myfile.h5 HTTP/1.1
0249  *     Host: over.rainbow.oz
0250  *     Date: Fri, 01 Dec 2017 12:35:04 CST
0251  *
0252  *     <body>
0253  *
0254  * ...with fast, efficient access to and modification of primary and field
0255  * elements.
0256  *
0257  * Structure for building HTTP requests while hiding much of the string
0258  * processing required "under the hood."
0259  *
0260  * Information about the request target -- the first line -- and the body text,
0261  * if any, are managed directly with this structure. All header fields, e.g.,
0262  * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and
0263  * included in the request by a pointer to the head of the list.
0264  *
0265  *
0266  * `body` (char *) :
0267  *
0268  *     Pointer to start of HTTP body.
0269  *
0270  *     Can be NULL, in which case it is treated as the empty string, "".
0271  *
0272  * `body_len` (size_t) :
0273  *
0274  *     Number of bytes (characters) in `body`. 0 if empty or NULL `body`.
0275  *
0276  * `first_header` (hrb_node_t *) :
0277  *
0278  *     Pointer to first SORTED header node, if any.
0279  *     It is left to the programmer to ensure that this node and associated
0280  *     list is destroyed when done.
0281  *
0282  * `resource` (char *) :
0283  *
0284  *     Pointer to resource URL string, e.g., "/folder/page.xhtml".
0285  *
0286  * `verb` (char *) :
0287  *
0288  *     Pointer to HTTP verb string, e.g., "GET".
0289  *
0290  * `version` (char *) :
0291  *
0292  *     Pointer to HTTP version string, e.g., "HTTP/1.1".
0293  *----------------------------------------------------------------------------
0294  */
0295 typedef struct {
0296     char       *body;
0297     size_t      body_len;
0298     hrb_node_t *first_header;
0299     char       *resource;
0300     char       *verb;
0301     char       *version;
0302 } hrb_t;
0303 
0304 /*----------------------------------------------------------------------------
0305  * Structure: parsed_url_t
0306  *
0307  * Represent a URL with easily-accessed pointers to logical elements within.
0308  * These elements (components) are stored as null-terminated strings (or just
0309  * NULLs). These components should be allocated for the structure, making the
0310  * data as safe as possible from modification. If a component is NULL, it is
0311  * either implicit in or absent from the URL.
0312  *
0313  * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value"
0314  *  ^--^   ^-----------------------^ ^--^ ^---------^ ^-------------------^
0315  * Scheme             Host           Port  Resource        Query/-ies
0316  *
0317  *
0318  * `scheme` (char *)
0319  *
0320  *     String representing which protocol is to be expected.
0321  *     _Must_ be present.
0322  *     "http", "https", "ftp", e.g.
0323  *
0324  * `host` (char *)
0325  *
0326  *     String of host, either domain name, IPv4, or IPv6 format.
0327  *     _Must_ be present.
0328  *     "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]"
0329  *
0330  * `port` (char *)
0331  *
0332  *     String representation of specified port. Must resolve to a valid unsigned
0333  *     integer.
0334  *     "9000", "80"
0335  *
0336  * `path` (char *)
0337  *
0338  *     Path to resource on host. If not specified, assumes root "/".
0339  *     "lollipop_guild.wav", "characters/witches/white.dat"
0340  *
0341  * `query` (char *)
0342  *
0343  *     Single string of all query parameters in url (if any).
0344  *     "arg1=value1&arg2=value2"
0345  *----------------------------------------------------------------------------
0346  */
0347 typedef struct {
0348     char *scheme; /* required */
0349     char *host;   /* required */
0350     char *port;
0351     char *path;
0352     char *query;
0353 } parsed_url_t;
0354 
0355 /*----------------------------------------------------------------------------
0356  * Structure: s3r_t
0357  *
0358  * S3 request structure "handle".
0359  *
0360  * Holds persistent information for Amazon S3 requests.
0361  *
0362  * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self.
0363  *
0364  * Intended to be reused for operations on a remote object.
0365  *
0366  * Cleaned up through `H5FD_s3comms_s3r_close()`.
0367  *
0368  * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has
0369  * undefined behavior if called to perform in multiple threads.
0370  *
0371  *
0372  * curlhandle
0373  *
0374  *     Pointer to the curl_easy handle generated for the request
0375  *
0376  * http_verb
0377  *
0378  *     Pointer to NULL-terminated string. HTTP verb,
0379  *     e.g. "GET", "HEAD", "PUT", etc.
0380  *
0381  *     Default is NULL, resulting in a "GET" request
0382  *
0383  * purl ("parsed url")
0384  *
0385  *     Pointer to structure holding the elements of URL for file open
0386  *
0387  *     e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2"
0388  *     parsed into...
0389  *     {   scheme: "http"
0390  *         host:   "bucket.aws.com"
0391  *         port:   "8080"
0392  *         path:   "myfile.dat"
0393  *         query:  "q1=v1&q2=v2"
0394  *     }
0395  *
0396  *     Cannot be NULL
0397  *
0398  * aws_region
0399  *
0400  *     Pointer to NULL-terminated string, specifying S3 "region"
0401  *     e.g., "us-east-1".
0402  *
0403  *     Required to authenticate
0404  *
0405  * secret_id
0406  *
0407  *     Pointer to NULL-terminated string for "secret" access id to S3 resource
0408  *
0409  *     Required to authenticate
0410  *
0411  * signing_key
0412  *
0413  *     Pointer to `SHA256_DIGEST_LENGTH`-long buffer for "reusable" signing
0414  *     key, generated via
0415  *     `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>",
0416  *         "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")`
0417  *     which may be reused for several (up to seven (7)) days from creation?
0418  *     Computed once upon file open.
0419  *
0420  *     Computed once upon file open from the secret key string in the fapl
0421  *
0422  *     Required to authenticate
0423  *----------------------------------------------------------------------------
0424  */
0425 typedef struct {
0426     CURL          *curlhandle;
0427     size_t         filesize;
0428     char          *httpverb;
0429     parsed_url_t  *purl;
0430     char          *region;
0431     char          *secret_id;
0432     unsigned char *signing_key;
0433     char          *token;
0434 } s3r_t;
0435 
0436 #ifdef __cplusplus
0437 extern "C" {
0438 #endif
0439 
0440 /*******************************************
0441  * DECLARATION OF HTTP FIELD LIST ROUTINES *
0442  *******************************************/
0443 
0444 H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value);
0445 
0446 /***********************************************
0447  * DECLARATION OF HTTP REQUEST BUFFER ROUTINES *
0448  ***********************************************/
0449 
0450 H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf);
0451 
0452 H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host);
0453 
0454 /*************************************
0455  * DECLARATION OF S3REQUEST ROUTINES *
0456  *************************************/
0457 
0458 H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle);
0459 
0460 H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle);
0461 
0462 H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[],
0463                                     const unsigned char signing_key[], const char token[]);
0464 
0465 H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest);
0466 
0467 /*********************************
0468  * DECLARATION OF OTHER ROUTINES *
0469  *********************************/
0470 
0471 H5_DLL struct tm *gmnow(void);
0472 
0473 H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size,
0474                                                  char *signed_headers_dest, int sh_size, hrb_t *http_request);
0475 
0476 H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len, bool lowercase);
0477 
0478 H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl);
0479 
0480 H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg,
0481                                        size_t msg_len, char *dest);
0482 
0483 H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out,
0484                                             char *aws_region_out);
0485 
0486 H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl);
0487 
0488 H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region,
0489                                        const char *iso8601now);
0490 
0491 H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now,
0492                                           const char *region);
0493 #ifdef __cplusplus
0494 }
0495 #endif
0496 
0497 #endif /* H5_HAVE_ROS3_VFD */