![]() |
|
|||
File indexing completed on 2025-09-17 09:20:43
0001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0002 * Copyright by The HDF Group. * 0003 * All rights reserved. * 0004 * * 0005 * This file is part of HDF5. The full HDF5 copyright notice, including * 0006 * terms governing use, modification, and redistribution, is contained in * 0007 * the COPYING file, which can be found at the root of the source code * 0008 * distribution tree, or in https://www.hdfgroup.org/licenses. * 0009 * If you do not have access to either file, you may request a copy from * 0010 * help@hdfgroup.org. * 0011 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 0012 0013 /***************************************************************************** 0014 * Read-Only S3 Virtual File Driver (VFD) 0015 * 0016 * This is the header for the S3 Communications module 0017 * 0018 * ***NOT A FILE DRIVER*** 0019 * 0020 * Purpose: 0021 * 0022 * - Provide structures and functions related to communicating with 0023 * Amazon S3 (Simple Storage Service). 0024 * - Abstract away the REST API (HTTP, 0025 * networked communications) behind a series of uniform function calls. 0026 * - Handle AWS4 authentication, if appropriate. 0027 * - Eventually, support more S3 operations, such as creating, writing to, 0028 * and removing Objects remotely. 0029 * 0030 * translates: 0031 * `read(some_file, bytes_offset, bytes_length, &dest_buffer);` 0032 * to: 0033 * ``` 0034 * GET myfile HTTP/1.1 0035 * Host: somewhere.me 0036 * Range: bytes=4096-5115 0037 * ``` 0038 * and places received bytes from HTTP response... 0039 * ``` 0040 * HTTP/1.1 206 Partial-Content 0041 * Content-Range: 4096-5115/63239 0042 * 0043 * <bytes> 0044 * ``` 0045 * ...in destination buffer. 0046 * 0047 *****************************************************************************/ 0048 0049 #include "H5private.h" /* Generic Functions */ 0050 #include "H5FDros3.h" /* ros3 VFD */ 0051 0052 #ifdef H5_HAVE_ROS3_VFD 0053 0054 /* Necessary S3 headers */ 0055 #include <curl/curl.h> 0056 #include <openssl/evp.h> 0057 #include <openssl/hmac.h> 0058 #include <openssl/sha.h> 0059 0060 /********** 0061 * MACROS * 0062 **********/ 0063 0064 /* hexadecimal string of pre-computed sha256 checksum of the empty string 0065 * hex(sha256sum("")) 0066 */ 0067 #define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" 0068 0069 /* string length (plus null terminator) 0070 * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_') 0071 */ 0072 #define ISO8601_SIZE 17 0073 0074 /* string length (plus null terminator) 0075 * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT" 0076 */ 0077 #define RFC7231_SIZE 30 0078 0079 /*--------------------------------------------------------------------------- 0080 * 0081 * Macro: ISO8601NOW() 0082 * 0083 * Purpose: 0084 * 0085 * write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest 0086 * e.g., "20170630T204155Z" 0087 * 0088 * wrapper for strftime() 0089 * 0090 * It is left to the programmer to check return value of 0091 * ISO8601NOW (should equal ISO8601_SIZE - 1). 0092 * 0093 *--------------------------------------------------------------------------- 0094 */ 0095 #define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm)) 0096 0097 /*--------------------------------------------------------------------------- 0098 * 0099 * Macro: RFC7231NOW() 0100 * 0101 * Purpose: 0102 * 0103 * write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest 0104 * e.g., "Fri, 30 Jun 2017 20:41:55 GMT" 0105 * 0106 * wrapper for strftime() 0107 * 0108 * It is left to the programmer to check return value of 0109 * RFC7231NOW (should equal RFC7231_SIZE - 1). 0110 * 0111 *--------------------------------------------------------------------------- 0112 */ 0113 #define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm)) 0114 0115 /* Reasonable maximum length of a credential string. 0116 * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below). 0117 * 17 <- "////aws4_request\0" 0118 * 2 < "s3" (service) 0119 * 8 <- "YYYYmmdd" (date) 0120 * 128 <- (access_id) 0121 * 155 :: sum 0122 */ 0123 #define S3COMMS_MAX_CREDENTIAL_SIZE 155 0124 0125 /*--------------------------------------------------------------------------- 0126 * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL() 0127 * 0128 * Purpose: 0129 * 0130 * Format "S3 Credential" string from inputs, for AWS4. 0131 * 0132 * Wrapper for snprintf(). 0133 * 0134 * _HAS NO ERROR-CHECKING FACILITIES_ 0135 * It is left to programmer to ensure that return value confers success. 0136 * e.g., 0137 * ``` 0138 * assert( S3COMMS_MAX_CREDENTIAL_SIZE >= 0139 * S3COMMS_FORMAT_CREDENTIAL(...) ); 0140 * ``` 0141 * 0142 * "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request" 0143 * assuming that `dest` has adequate space. 0144 * 0145 * ALL inputs must be null-terminated strings. 0146 * 0147 * `access` should be the user's access key ID. 0148 * `date` must be of format "YYYYmmdd". 0149 * `region` should be relevant AWS region, i.e. "us-east-1". 0150 * `service` should be "s3". 0151 *--------------------------------------------------------------------------- 0152 */ 0153 #define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \ 0154 snprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date), \ 0155 (region), (service)) 0156 0157 /********************* 0158 * PUBLIC STRUCTURES * 0159 *********************/ 0160 0161 /*---------------------------------------------------------------------------- 0162 * Structure: hrb_node_t 0163 * 0164 * HTTP Header Field Node 0165 * 0166 * Maintain a ordered (linked) list of HTTP Header fields. 0167 * 0168 * Provides efficient access and manipulation of a logical sequence of 0169 * HTTP header fields, of particular use when composing an 0170 * "S3 Canonical Request" for authentication. 0171 * 0172 * - The creation of a Canonical Request involves: 0173 * - convert field names to lower case 0174 * - sort by this lower-case name 0175 * - convert ": " name-value separator in HTTP string to ":" 0176 * - get sorted lowercase names without field or separator 0177 * 0178 * As HTTP headers allow headers in any order (excepting the case of multiple 0179 * headers with the same name), the list ordering can be optimized for Canonical 0180 * Request creation, suggesting alphabtical order. For more expedient insertion 0181 * and removal of elements in the list, linked list seems preferable to a 0182 * dynamically-expanding array. The usually-smaller number of entries (5 or 0183 * fewer) makes performance overhead of traversing the list trivial. 0184 * 0185 * The above requirements of creating at Canonical Request suggests a reasonable 0186 * trade-off of speed for space with the option to compute elements as needed 0187 * or to have the various elements prepared and stored in the structure 0188 * (e.g. name, value, lowername, concatenated name:value) 0189 * The structure currently is implemented to pre-compute. 0190 * 0191 * At all times, the "first" node of the list should be the least, 0192 * alphabetically. For all nodes, the `next` node should be either NULL or 0193 * of greater alphabetical value. 0194 * 0195 * Each node contains its own header field information, plus a pointer to the 0196 * next node. 0197 * 0198 * It is not allowed to have multiple nodes with the same _lowercase_ `name`s 0199 * in the same list 0200 * (i.e., name is case-insensitive for access and modification.) 0201 * 0202 * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated 0203 * strings allocated specifically for their node. 0204 * 0205 * `name` (char *) 0206 * 0207 * Case-meaningful name of the HTTP field. 0208 * Given case is how it is supplied to networking code. 0209 * e.g., "Range" 0210 * 0211 * `lowername` (char *) 0212 * 0213 * Lowercase copy of name. 0214 * e.g., "range" 0215 * 0216 * `value` (char *) 0217 * 0218 * Case-meaningful value of HTTP field. 0219 * e.g., "bytes=0-9" 0220 * 0221 * `cat` (char *) 0222 * 0223 * Concatenated, null-terminated string of HTTP header line, 0224 * as the field would appear in an HTTP request. 0225 * e.g., "Range: bytes=0-9" 0226 * 0227 * `next` (hrb_node_t *) 0228 * 0229 * Pointers to next node in the list, or NULL sentinel as end of list. 0230 * Next node must have a greater `lowername` as determined by strcmp(). 0231 *---------------------------------------------------------------------------- 0232 */ 0233 typedef struct hrb_node_t { 0234 char *name; 0235 char *value; 0236 char *cat; 0237 char *lowername; 0238 struct hrb_node_t *next; 0239 } hrb_node_t; 0240 0241 /*---------------------------------------------------------------------------- 0242 * Structure: hrb_t 0243 * 0244 * HTTP Request Buffer structure 0245 * 0246 * Logically represent an HTTP request 0247 * 0248 * GET /myplace/myfile.h5 HTTP/1.1 0249 * Host: over.rainbow.oz 0250 * Date: Fri, 01 Dec 2017 12:35:04 CST 0251 * 0252 * <body> 0253 * 0254 * ...with fast, efficient access to and modification of primary and field 0255 * elements. 0256 * 0257 * Structure for building HTTP requests while hiding much of the string 0258 * processing required "under the hood." 0259 * 0260 * Information about the request target -- the first line -- and the body text, 0261 * if any, are managed directly with this structure. All header fields, e.g., 0262 * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and 0263 * included in the request by a pointer to the head of the list. 0264 * 0265 * 0266 * `body` (char *) : 0267 * 0268 * Pointer to start of HTTP body. 0269 * 0270 * Can be NULL, in which case it is treated as the empty string, "". 0271 * 0272 * `body_len` (size_t) : 0273 * 0274 * Number of bytes (characters) in `body`. 0 if empty or NULL `body`. 0275 * 0276 * `first_header` (hrb_node_t *) : 0277 * 0278 * Pointer to first SORTED header node, if any. 0279 * It is left to the programmer to ensure that this node and associated 0280 * list is destroyed when done. 0281 * 0282 * `resource` (char *) : 0283 * 0284 * Pointer to resource URL string, e.g., "/folder/page.xhtml". 0285 * 0286 * `verb` (char *) : 0287 * 0288 * Pointer to HTTP verb string, e.g., "GET". 0289 * 0290 * `version` (char *) : 0291 * 0292 * Pointer to HTTP version string, e.g., "HTTP/1.1". 0293 *---------------------------------------------------------------------------- 0294 */ 0295 typedef struct { 0296 char *body; 0297 size_t body_len; 0298 hrb_node_t *first_header; 0299 char *resource; 0300 char *verb; 0301 char *version; 0302 } hrb_t; 0303 0304 /*---------------------------------------------------------------------------- 0305 * Structure: parsed_url_t 0306 * 0307 * Represent a URL with easily-accessed pointers to logical elements within. 0308 * These elements (components) are stored as null-terminated strings (or just 0309 * NULLs). These components should be allocated for the structure, making the 0310 * data as safe as possible from modification. If a component is NULL, it is 0311 * either implicit in or absent from the URL. 0312 * 0313 * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value" 0314 * ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^ 0315 * Scheme Host Port Resource Query/-ies 0316 * 0317 * 0318 * `scheme` (char *) 0319 * 0320 * String representing which protocol is to be expected. 0321 * _Must_ be present. 0322 * "http", "https", "ftp", e.g. 0323 * 0324 * `host` (char *) 0325 * 0326 * String of host, either domain name, IPv4, or IPv6 format. 0327 * _Must_ be present. 0328 * "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]" 0329 * 0330 * `port` (char *) 0331 * 0332 * String representation of specified port. Must resolve to a valid unsigned 0333 * integer. 0334 * "9000", "80" 0335 * 0336 * `path` (char *) 0337 * 0338 * Path to resource on host. If not specified, assumes root "/". 0339 * "lollipop_guild.wav", "characters/witches/white.dat" 0340 * 0341 * `query` (char *) 0342 * 0343 * Single string of all query parameters in url (if any). 0344 * "arg1=value1&arg2=value2" 0345 *---------------------------------------------------------------------------- 0346 */ 0347 typedef struct { 0348 char *scheme; /* required */ 0349 char *host; /* required */ 0350 char *port; 0351 char *path; 0352 char *query; 0353 } parsed_url_t; 0354 0355 /*---------------------------------------------------------------------------- 0356 * Structure: s3r_t 0357 * 0358 * S3 request structure "handle". 0359 * 0360 * Holds persistent information for Amazon S3 requests. 0361 * 0362 * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self. 0363 * 0364 * Intended to be reused for operations on a remote object. 0365 * 0366 * Cleaned up through `H5FD_s3comms_s3r_close()`. 0367 * 0368 * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has 0369 * undefined behavior if called to perform in multiple threads. 0370 * 0371 * 0372 * curlhandle 0373 * 0374 * Pointer to the curl_easy handle generated for the request 0375 * 0376 * http_verb 0377 * 0378 * Pointer to NULL-terminated string. HTTP verb, 0379 * e.g. "GET", "HEAD", "PUT", etc. 0380 * 0381 * Default is NULL, resulting in a "GET" request 0382 * 0383 * purl ("parsed url") 0384 * 0385 * Pointer to structure holding the elements of URL for file open 0386 * 0387 * e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2" 0388 * parsed into... 0389 * { scheme: "http" 0390 * host: "bucket.aws.com" 0391 * port: "8080" 0392 * path: "myfile.dat" 0393 * query: "q1=v1&q2=v2" 0394 * } 0395 * 0396 * Cannot be NULL 0397 * 0398 * aws_region 0399 * 0400 * Pointer to NULL-terminated string, specifying S3 "region" 0401 * e.g., "us-east-1". 0402 * 0403 * Required to authenticate 0404 * 0405 * secret_id 0406 * 0407 * Pointer to NULL-terminated string for "secret" access id to S3 resource 0408 * 0409 * Required to authenticate 0410 * 0411 * signing_key 0412 * 0413 * Pointer to `SHA256_DIGEST_LENGTH`-long buffer for "reusable" signing 0414 * key, generated via 0415 * `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>", 0416 * "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")` 0417 * which may be reused for several (up to seven (7)) days from creation? 0418 * Computed once upon file open. 0419 * 0420 * Computed once upon file open from the secret key string in the fapl 0421 * 0422 * Required to authenticate 0423 *---------------------------------------------------------------------------- 0424 */ 0425 typedef struct { 0426 CURL *curlhandle; 0427 size_t filesize; 0428 char *httpverb; 0429 parsed_url_t *purl; 0430 char *region; 0431 char *secret_id; 0432 unsigned char *signing_key; 0433 char *token; 0434 } s3r_t; 0435 0436 #ifdef __cplusplus 0437 extern "C" { 0438 #endif 0439 0440 /******************************************* 0441 * DECLARATION OF HTTP FIELD LIST ROUTINES * 0442 *******************************************/ 0443 0444 H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value); 0445 0446 /*********************************************** 0447 * DECLARATION OF HTTP REQUEST BUFFER ROUTINES * 0448 ***********************************************/ 0449 0450 H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf); 0451 0452 H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host); 0453 0454 /************************************* 0455 * DECLARATION OF S3REQUEST ROUTINES * 0456 *************************************/ 0457 0458 H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle); 0459 0460 H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle); 0461 0462 H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[], 0463 const unsigned char signing_key[], const char token[]); 0464 0465 H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest); 0466 0467 /********************************* 0468 * DECLARATION OF OTHER ROUTINES * 0469 *********************************/ 0470 0471 H5_DLL struct tm *gmnow(void); 0472 0473 H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size, 0474 char *signed_headers_dest, int sh_size, hrb_t *http_request); 0475 0476 H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len, bool lowercase); 0477 0478 H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl); 0479 0480 H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg, 0481 size_t msg_len, char *dest); 0482 0483 H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out, 0484 char *aws_region_out); 0485 0486 H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl); 0487 0488 H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region, 0489 const char *iso8601now); 0490 0491 H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now, 0492 const char *region); 0493 #ifdef __cplusplus 0494 } 0495 #endif 0496 0497 #endif /* H5_HAVE_ROS3_VFD */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |