Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-18 09:16:02

0001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
0002  * Copyright by The HDF Group.                                               *
0003  * All rights reserved.                                                      *
0004  *                                                                           *
0005  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
0006  * terms governing use, modification, and redistribution, is contained in    *
0007  * the COPYING file, which can be found at the root of the source code       *
0008  * distribution tree, or in https://www.hdfgroup.org/licenses.               *
0009  * If you do not have access to either file, you may request a copy from     *
0010  * help@hdfgroup.org.                                                        *
0011  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
0012 
0013 /*****************************************************************************
0014  * Read-Only S3 Virtual File Driver (VFD)
0015  *
0016  * This is the header for the S3 Communications module
0017  *
0018  * ***NOT A FILE DRIVER***
0019  *
0020  * Purpose:
0021  *
0022  *     - Provide structures and functions related to communicating with
0023  *       Amazon S3 (Simple Storage Service).
0024  *     - Abstract away the REST API (HTTP,
0025  *       networked communications) behind a series of uniform function calls.
0026  *     - Handle AWS4 authentication, if appropriate.
0027  *     - Fail predictably in event of errors.
0028  *     - Eventually, support more S3 operations, such as creating, writing to,
0029  *       and removing Objects remotely.
0030  *
0031  *     translates:
0032  *     `read(some_file, bytes_offset, bytes_length, &dest_buffer);`
0033  *     to:
0034  *     ```
0035  *     GET myfile HTTP/1.1
0036  *     Host: somewhere.me
0037  *     Range: bytes=4096-5115
0038  *     ```
0039  *     and places received bytes from HTTP response...
0040  *     ```
0041  *     HTTP/1.1 206 Partial-Content
0042  *     Content-Range: 4096-5115/63239
0043  *
0044  *     <bytes>
0045  *     ```
0046  *     ...in destination buffer.
0047  *
0048  *****************************************************************************/
0049 
0050 #include "H5private.h" /* Generic Functions        */
0051 
0052 #ifdef H5_HAVE_ROS3_VFD
0053 
0054 /* Necessary S3 headers */
0055 #include <curl/curl.h>
0056 #include <openssl/evp.h>
0057 #include <openssl/hmac.h>
0058 #include <openssl/sha.h>
0059 
0060 /*****************
0061  * PUBLIC MACROS *
0062  *****************/
0063 
0064 /* hexadecimal string of pre-computed sha256 checksum of the empty string
0065  * hex(sha256sum(""))
0066  */
0067 #define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
0068 
0069 /* string length (plus null terminator)
0070  * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_')
0071  */
0072 #define ISO8601_SIZE 17
0073 
0074 /* string length (plus null terminator)
0075  * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT"
0076  */
0077 #define RFC7231_SIZE 30
0078 
0079 /*---------------------------------------------------------------------------
0080  *
0081  * Macro: ISO8601NOW()
0082  *
0083  * Purpose:
0084  *
0085  *     write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest
0086  *     e.g., "20170630T204155Z"
0087  *
0088  *     wrapper for strftime()
0089  *
0090  *     It is left to the programmer to check return value of
0091  *     ISO8601NOW (should equal ISO8601_SIZE - 1).
0092  *
0093  *---------------------------------------------------------------------------
0094  */
0095 #define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm))
0096 
0097 /*---------------------------------------------------------------------------
0098  *
0099  * Macro: RFC7231NOW()
0100  *
0101  * Purpose:
0102  *
0103  *     write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest
0104  *     e.g., "Fri, 30 Jun 2017 20:41:55 GMT"
0105  *
0106  *     wrapper for strftime()
0107  *
0108  *     It is left to the programmer to check return value of
0109  *     RFC7231NOW (should equal RFC7231_SIZE - 1).
0110  *
0111  *---------------------------------------------------------------------------
0112  */
0113 #define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm))
0114 
0115 /* Reasonable maximum length of a credential string.
0116  * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below).
0117  *  17 <- "////aws4_request\0"
0118  *   2 < "s3" (service)
0119  *   8 <- "YYYYmmdd" (date)
0120  * 128 <- (access_id)
0121  * 155 :: sum
0122  */
0123 #define S3COMMS_MAX_CREDENTIAL_SIZE 155
0124 
0125 /*---------------------------------------------------------------------------
0126  *
0127  * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL()
0128  *
0129  * Purpose:
0130  *
0131  *     Format "S3 Credential" string from inputs, for AWS4.
0132  *
0133  *     Wrapper for snprintf().
0134  *
0135  *     _HAS NO ERROR-CHECKING FACILITIES_
0136  *     It is left to programmer to ensure that return value confers success.
0137  *     e.g.,
0138  *     ```
0139  *     assert( S3COMMS_MAX_CREDENTIAL_SIZE >=
0140  *             S3COMMS_FORMAT_CREDENTIAL(...) );
0141  *     ```
0142  *
0143  *     "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request"
0144  *     assuming that `dest` has adequate space.
0145  *
0146  *     ALL inputs must be null-terminated strings.
0147  *
0148  *     `access` should be the user's access key ID.
0149  *     `date` must be of format "YYYYmmdd".
0150  *     `region` should be relevant AWS region, i.e. "us-east-1".
0151  *     `service` should be "s3".
0152  *
0153  *---------------------------------------------------------------------------
0154  */
0155 #define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service)                               \
0156     snprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date),      \
0157              (region), (service))
0158 
0159 /*********************
0160  * PUBLIC STRUCTURES *
0161  *********************/
0162 
0163 /*----------------------------------------------------------------------------
0164  *
0165  * Structure: hrb_node_t
0166  *
0167  * HTTP Header Field Node
0168  *
0169  *
0170  *
0171  * Maintain a ordered (linked) list of HTTP Header fields.
0172  *
0173  * Provides efficient access and manipulation of a logical sequence of
0174  * HTTP header fields, of particular use when composing an
0175  * "S3 Canonical Request" for authentication.
0176  *
0177  * - The creation of a Canonical Request involves:
0178  *     - convert field names to lower case
0179  *     - sort by this lower-case name
0180  *     - convert ": " name-value separator in HTTP string to ":"
0181  *     - get sorted lowercase names without field or separator
0182  *
0183  * As HTTP headers allow headers in any order (excepting the case of multiple
0184  * headers with the same name), the list ordering can be optimized for Canonical
0185  * Request creation, suggesting alphabtical order. For more expedient insertion
0186  * and removal of elements in the list, linked list seems preferable to a
0187  * dynamically-expanding array. The usually-smaller number of entries (5 or
0188  * fewer) makes performance overhead of traversing the list trivial.
0189  *
0190  * The above requirements of creating at Canonical Request suggests a reasonable
0191  * trade-off of speed for space with the option to compute elements as needed
0192  * or to have the various elements prepared and stored in the structure
0193  * (e.g. name, value, lowername, concatenated name:value)
0194  * The structure currently is implemented to pre-compute.
0195  *
0196  * At all times, the "first" node of the list should be the least,
0197  * alphabetically. For all nodes, the `next` node should be either NULL or
0198  * of greater alphabetical value.
0199  *
0200  * Each node contains its own header field information, plus a pointer to the
0201  * next node.
0202  *
0203  * It is not allowed to have multiple nodes with the same _lowercase_ `name`s
0204  * in the same list
0205  * (i.e., name is case-insensitive for access and modification.)
0206  *
0207  * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated
0208  * strings allocated specifically for their node.
0209  *
0210  * `name` (char *)
0211  *
0212  *     Case-meaningful name of the HTTP field.
0213  *     Given case is how it is supplied to networking code.
0214  *     e.g., "Range"
0215  *
0216  * `lowername` (char *)
0217  *
0218  *     Lowercase copy of name.
0219  *     e.g., "range"
0220  *
0221  * `value` (char *)
0222  *
0223  *     Case-meaningful value of HTTP field.
0224  *     e.g., "bytes=0-9"
0225  *
0226  * `cat` (char *)
0227  *
0228  *     Concatenated, null-terminated string of HTTP header line,
0229  *     as the field would appear in an HTTP request.
0230  *     e.g., "Range: bytes=0-9"
0231  *
0232  * `next` (hrb_node_t *)
0233  *
0234  *     Pointers to next node in the list, or NULL sentinel as end of list.
0235  *     Next node must have a greater `lowername` as determined by strcmp().
0236  *
0237  *----------------------------------------------------------------------------
0238  */
0239 typedef struct hrb_node_t {
0240     char              *name;
0241     char              *value;
0242     char              *cat;
0243     char              *lowername;
0244     struct hrb_node_t *next;
0245 } hrb_node_t;
0246 
0247 /*----------------------------------------------------------------------------
0248  *
0249  * Structure: hrb_t
0250  *
0251  * HTTP Request Buffer structure
0252  *
0253  *
0254  *
0255  * Logically represent an HTTP request
0256  *
0257  *     GET /myplace/myfile.h5 HTTP/1.1
0258  *     Host: over.rainbow.oz
0259  *     Date: Fri, 01 Dec 2017 12:35:04 CST
0260  *
0261  *     <body>
0262  *
0263  * ...with fast, efficient access to and modification of primary and field
0264  * elements.
0265  *
0266  * Structure for building HTTP requests while hiding much of the string
0267  * processing required "under the hood."
0268  *
0269  * Information about the request target -- the first line -- and the body text,
0270  * if any, are managed directly with this structure. All header fields, e.g.,
0271  * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and
0272  * included in the request by a pointer to the head of the list.
0273  *
0274  *
0275  * `body` (char *) :
0276  *
0277  *     Pointer to start of HTTP body.
0278  *
0279  *     Can be NULL, in which case it is treated as the empty string, "".
0280  *
0281  * `body_len` (size_t) :
0282  *
0283  *     Number of bytes (characters) in `body`. 0 if empty or NULL `body`.
0284  *
0285  * `first_header` (hrb_node_t *) :
0286  *
0287  *     Pointer to first SORTED header node, if any.
0288  *     It is left to the programmer to ensure that this node and associated
0289  *     list is destroyed when done.
0290  *
0291  * `resource` (char *) :
0292  *
0293  *     Pointer to resource URL string, e.g., "/folder/page.xhtml".
0294  *
0295  * `verb` (char *) :
0296  *
0297  *     Pointer to HTTP verb string, e.g., "GET".
0298  *
0299  * `version` (char *) :
0300  *
0301  *     Pointer to HTTP version string, e.g., "HTTP/1.1".
0302  *
0303  *----------------------------------------------------------------------------
0304  */
0305 typedef struct {
0306     char       *body;
0307     size_t      body_len;
0308     hrb_node_t *first_header;
0309     char       *resource;
0310     char       *verb;
0311     char       *version;
0312 } hrb_t;
0313 
0314 /*----------------------------------------------------------------------------
0315  *
0316  * Structure: parsed_url_t
0317  *
0318  *
0319  * Represent a URL with easily-accessed pointers to logical elements within.
0320  * These elements (components) are stored as null-terminated strings (or just
0321  * NULLs). These components should be allocated for the structure, making the
0322  * data as safe as possible from modification. If a component is NULL, it is
0323  * either implicit in or absent from the URL.
0324  *
0325  * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value"
0326  *  ^--^   ^-----------------------^ ^--^ ^---------^ ^-------------------^
0327  * Scheme             Host           Port  Resource        Query/-ies
0328  *
0329  *
0330  * `scheme` (char *)
0331  *
0332  *     String representing which protocol is to be expected.
0333  *     _Must_ be present.
0334  *     "http", "https", "ftp", e.g.
0335  *
0336  * `host` (char *)
0337  *
0338  *     String of host, either domain name, IPv4, or IPv6 format.
0339  *     _Must_ be present.
0340  *     "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]"
0341  *
0342  * `port` (char *)
0343  *
0344  *     String representation of specified port. Must resolve to a valid unsigned
0345  *     integer.
0346  *     "9000", "80"
0347  *
0348  * `path` (char *)
0349  *
0350  *     Path to resource on host. If not specified, assumes root "/".
0351  *     "lollipop_guild.wav", "characters/witches/white.dat"
0352  *
0353  * `query` (char *)
0354  *
0355  *     Single string of all query parameters in url (if any).
0356  *     "arg1=value1&arg2=value2"
0357  *
0358  *----------------------------------------------------------------------------
0359  */
0360 typedef struct {
0361     char *scheme; /* required */
0362     char *host;   /* required */
0363     char *port;
0364     char *path;
0365     char *query;
0366 } parsed_url_t;
0367 
0368 /*----------------------------------------------------------------------------
0369  *
0370  * Structure: s3r_t
0371  *
0372  *
0373  *
0374  * S3 request structure "handle".
0375  *
0376  * Holds persistent information for Amazon S3 requests.
0377  *
0378  * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self.
0379  *
0380  * Intended to be re-used for operations on a remote object.
0381  *
0382  * Cleaned up through `H5FD_s3comms_s3r_close()`.
0383  *
0384  * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has
0385  * undefined behavior if called to perform in multiple threads.
0386  *
0387  *
0388  * `curlhandle` (CURL)
0389  *
0390  *     Pointer to the curl_easy handle generated for the request.
0391  *
0392  * `httpverb` (char *)
0393  *
0394  *     Pointer to NULL-terminated string. HTTP verb,
0395  *     e.g. "GET", "HEAD", "PUT", etc.
0396  *
0397  *     Default is NULL, resulting in a "GET" request.
0398  *
0399  * `purl` (parsed_url_t *)
0400  *
0401  *     Pointer to structure holding the elements of URL for file open.
0402  *
0403  *     e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2"
0404  *     parsed into...
0405  *     {   scheme: "http"
0406  *         host:   "bucket.aws.com"
0407  *         port:   "8080"
0408  *         path:   "myfile.dat"
0409  *         query:  "q1=v1&q2=v2"
0410  *     }
0411  *
0412  *     Cannot be NULL.
0413  *
0414  * `region` (char *)
0415  *
0416  *     Pointer to NULL-terminated string, specifying S3 "region",
0417  *     e.g., "us-east-1".
0418  *
0419  *     Required to authenticate.
0420  *
0421  * `secret_id` (char *)
0422  *
0423  *     Pointer to NULL-terminated string for "secret" access id to S3 resource.
0424  *
0425  *     Required to authenticate.
0426  *
0427  * `signing_key` (unsigned char *)
0428  *
0429  *     Pointer to `SHA256_DIGEST_LENGTH`-long string for "reusable" signing
0430  *     key, generated via
0431  *     `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>",
0432  *         "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")`
0433  *     which may be re-used for several (up to seven (7)) days from creation?
0434  *     Computed once upon file open.
0435  *
0436  *     Required to authenticate.
0437  *
0438  *----------------------------------------------------------------------------
0439  */
0440 typedef struct {
0441     CURL          *curlhandle;
0442     size_t         filesize;
0443     char          *httpverb;
0444     parsed_url_t  *purl;
0445     char          *region;
0446     char          *secret_id;
0447     unsigned char *signing_key;
0448     char          *token;
0449 } s3r_t;
0450 
0451 #ifdef __cplusplus
0452 extern "C" {
0453 #endif
0454 
0455 /*******************************************
0456  * DECLARATION OF HTTP FIELD LIST ROUTINES *
0457  *******************************************/
0458 
0459 H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value);
0460 
0461 /***********************************************
0462  * DECLARATION OF HTTP REQUEST BUFFER ROUTINES *
0463  ***********************************************/
0464 
0465 H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf);
0466 
0467 H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host);
0468 
0469 /*************************************
0470  * DECLARATION OF S3REQUEST ROUTINES *
0471  *************************************/
0472 
0473 H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle);
0474 
0475 H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle);
0476 
0477 H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[],
0478                                     const unsigned char signing_key[], const char token[]);
0479 
0480 H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest);
0481 
0482 /*********************************
0483  * DECLARATION OF OTHER ROUTINES *
0484  *********************************/
0485 
0486 H5_DLL struct tm *gmnow(void);
0487 
0488 H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size,
0489                                                  char *signed_headers_dest, int sh_size, hrb_t *http_request);
0490 
0491 H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len, bool lowercase);
0492 
0493 H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl);
0494 
0495 H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg,
0496                                        size_t msg_len, char *dest);
0497 
0498 H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out,
0499                                             char *aws_region_out);
0500 
0501 H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl);
0502 
0503 H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region,
0504                                        const char *iso8601now);
0505 
0506 H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now,
0507                                           const char *region);
0508 #ifdef __cplusplus
0509 }
0510 #endif
0511 
0512 #endif /* H5_HAVE_ROS3_VFD */