![]() |
|
|||
File indexing completed on 2025-04-18 09:16:02
0001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0002 * Copyright by The HDF Group. * 0003 * All rights reserved. * 0004 * * 0005 * This file is part of HDF5. The full HDF5 copyright notice, including * 0006 * terms governing use, modification, and redistribution, is contained in * 0007 * the COPYING file, which can be found at the root of the source code * 0008 * distribution tree, or in https://www.hdfgroup.org/licenses. * 0009 * If you do not have access to either file, you may request a copy from * 0010 * help@hdfgroup.org. * 0011 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 0012 0013 /***************************************************************************** 0014 * Read-Only S3 Virtual File Driver (VFD) 0015 * 0016 * This is the header for the S3 Communications module 0017 * 0018 * ***NOT A FILE DRIVER*** 0019 * 0020 * Purpose: 0021 * 0022 * - Provide structures and functions related to communicating with 0023 * Amazon S3 (Simple Storage Service). 0024 * - Abstract away the REST API (HTTP, 0025 * networked communications) behind a series of uniform function calls. 0026 * - Handle AWS4 authentication, if appropriate. 0027 * - Fail predictably in event of errors. 0028 * - Eventually, support more S3 operations, such as creating, writing to, 0029 * and removing Objects remotely. 0030 * 0031 * translates: 0032 * `read(some_file, bytes_offset, bytes_length, &dest_buffer);` 0033 * to: 0034 * ``` 0035 * GET myfile HTTP/1.1 0036 * Host: somewhere.me 0037 * Range: bytes=4096-5115 0038 * ``` 0039 * and places received bytes from HTTP response... 0040 * ``` 0041 * HTTP/1.1 206 Partial-Content 0042 * Content-Range: 4096-5115/63239 0043 * 0044 * <bytes> 0045 * ``` 0046 * ...in destination buffer. 0047 * 0048 *****************************************************************************/ 0049 0050 #include "H5private.h" /* Generic Functions */ 0051 0052 #ifdef H5_HAVE_ROS3_VFD 0053 0054 /* Necessary S3 headers */ 0055 #include <curl/curl.h> 0056 #include <openssl/evp.h> 0057 #include <openssl/hmac.h> 0058 #include <openssl/sha.h> 0059 0060 /***************** 0061 * PUBLIC MACROS * 0062 *****************/ 0063 0064 /* hexadecimal string of pre-computed sha256 checksum of the empty string 0065 * hex(sha256sum("")) 0066 */ 0067 #define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" 0068 0069 /* string length (plus null terminator) 0070 * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_') 0071 */ 0072 #define ISO8601_SIZE 17 0073 0074 /* string length (plus null terminator) 0075 * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT" 0076 */ 0077 #define RFC7231_SIZE 30 0078 0079 /*--------------------------------------------------------------------------- 0080 * 0081 * Macro: ISO8601NOW() 0082 * 0083 * Purpose: 0084 * 0085 * write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest 0086 * e.g., "20170630T204155Z" 0087 * 0088 * wrapper for strftime() 0089 * 0090 * It is left to the programmer to check return value of 0091 * ISO8601NOW (should equal ISO8601_SIZE - 1). 0092 * 0093 *--------------------------------------------------------------------------- 0094 */ 0095 #define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm)) 0096 0097 /*--------------------------------------------------------------------------- 0098 * 0099 * Macro: RFC7231NOW() 0100 * 0101 * Purpose: 0102 * 0103 * write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest 0104 * e.g., "Fri, 30 Jun 2017 20:41:55 GMT" 0105 * 0106 * wrapper for strftime() 0107 * 0108 * It is left to the programmer to check return value of 0109 * RFC7231NOW (should equal RFC7231_SIZE - 1). 0110 * 0111 *--------------------------------------------------------------------------- 0112 */ 0113 #define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm)) 0114 0115 /* Reasonable maximum length of a credential string. 0116 * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below). 0117 * 17 <- "////aws4_request\0" 0118 * 2 < "s3" (service) 0119 * 8 <- "YYYYmmdd" (date) 0120 * 128 <- (access_id) 0121 * 155 :: sum 0122 */ 0123 #define S3COMMS_MAX_CREDENTIAL_SIZE 155 0124 0125 /*--------------------------------------------------------------------------- 0126 * 0127 * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL() 0128 * 0129 * Purpose: 0130 * 0131 * Format "S3 Credential" string from inputs, for AWS4. 0132 * 0133 * Wrapper for snprintf(). 0134 * 0135 * _HAS NO ERROR-CHECKING FACILITIES_ 0136 * It is left to programmer to ensure that return value confers success. 0137 * e.g., 0138 * ``` 0139 * assert( S3COMMS_MAX_CREDENTIAL_SIZE >= 0140 * S3COMMS_FORMAT_CREDENTIAL(...) ); 0141 * ``` 0142 * 0143 * "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request" 0144 * assuming that `dest` has adequate space. 0145 * 0146 * ALL inputs must be null-terminated strings. 0147 * 0148 * `access` should be the user's access key ID. 0149 * `date` must be of format "YYYYmmdd". 0150 * `region` should be relevant AWS region, i.e. "us-east-1". 0151 * `service` should be "s3". 0152 * 0153 *--------------------------------------------------------------------------- 0154 */ 0155 #define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \ 0156 snprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date), \ 0157 (region), (service)) 0158 0159 /********************* 0160 * PUBLIC STRUCTURES * 0161 *********************/ 0162 0163 /*---------------------------------------------------------------------------- 0164 * 0165 * Structure: hrb_node_t 0166 * 0167 * HTTP Header Field Node 0168 * 0169 * 0170 * 0171 * Maintain a ordered (linked) list of HTTP Header fields. 0172 * 0173 * Provides efficient access and manipulation of a logical sequence of 0174 * HTTP header fields, of particular use when composing an 0175 * "S3 Canonical Request" for authentication. 0176 * 0177 * - The creation of a Canonical Request involves: 0178 * - convert field names to lower case 0179 * - sort by this lower-case name 0180 * - convert ": " name-value separator in HTTP string to ":" 0181 * - get sorted lowercase names without field or separator 0182 * 0183 * As HTTP headers allow headers in any order (excepting the case of multiple 0184 * headers with the same name), the list ordering can be optimized for Canonical 0185 * Request creation, suggesting alphabtical order. For more expedient insertion 0186 * and removal of elements in the list, linked list seems preferable to a 0187 * dynamically-expanding array. The usually-smaller number of entries (5 or 0188 * fewer) makes performance overhead of traversing the list trivial. 0189 * 0190 * The above requirements of creating at Canonical Request suggests a reasonable 0191 * trade-off of speed for space with the option to compute elements as needed 0192 * or to have the various elements prepared and stored in the structure 0193 * (e.g. name, value, lowername, concatenated name:value) 0194 * The structure currently is implemented to pre-compute. 0195 * 0196 * At all times, the "first" node of the list should be the least, 0197 * alphabetically. For all nodes, the `next` node should be either NULL or 0198 * of greater alphabetical value. 0199 * 0200 * Each node contains its own header field information, plus a pointer to the 0201 * next node. 0202 * 0203 * It is not allowed to have multiple nodes with the same _lowercase_ `name`s 0204 * in the same list 0205 * (i.e., name is case-insensitive for access and modification.) 0206 * 0207 * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated 0208 * strings allocated specifically for their node. 0209 * 0210 * `name` (char *) 0211 * 0212 * Case-meaningful name of the HTTP field. 0213 * Given case is how it is supplied to networking code. 0214 * e.g., "Range" 0215 * 0216 * `lowername` (char *) 0217 * 0218 * Lowercase copy of name. 0219 * e.g., "range" 0220 * 0221 * `value` (char *) 0222 * 0223 * Case-meaningful value of HTTP field. 0224 * e.g., "bytes=0-9" 0225 * 0226 * `cat` (char *) 0227 * 0228 * Concatenated, null-terminated string of HTTP header line, 0229 * as the field would appear in an HTTP request. 0230 * e.g., "Range: bytes=0-9" 0231 * 0232 * `next` (hrb_node_t *) 0233 * 0234 * Pointers to next node in the list, or NULL sentinel as end of list. 0235 * Next node must have a greater `lowername` as determined by strcmp(). 0236 * 0237 *---------------------------------------------------------------------------- 0238 */ 0239 typedef struct hrb_node_t { 0240 char *name; 0241 char *value; 0242 char *cat; 0243 char *lowername; 0244 struct hrb_node_t *next; 0245 } hrb_node_t; 0246 0247 /*---------------------------------------------------------------------------- 0248 * 0249 * Structure: hrb_t 0250 * 0251 * HTTP Request Buffer structure 0252 * 0253 * 0254 * 0255 * Logically represent an HTTP request 0256 * 0257 * GET /myplace/myfile.h5 HTTP/1.1 0258 * Host: over.rainbow.oz 0259 * Date: Fri, 01 Dec 2017 12:35:04 CST 0260 * 0261 * <body> 0262 * 0263 * ...with fast, efficient access to and modification of primary and field 0264 * elements. 0265 * 0266 * Structure for building HTTP requests while hiding much of the string 0267 * processing required "under the hood." 0268 * 0269 * Information about the request target -- the first line -- and the body text, 0270 * if any, are managed directly with this structure. All header fields, e.g., 0271 * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and 0272 * included in the request by a pointer to the head of the list. 0273 * 0274 * 0275 * `body` (char *) : 0276 * 0277 * Pointer to start of HTTP body. 0278 * 0279 * Can be NULL, in which case it is treated as the empty string, "". 0280 * 0281 * `body_len` (size_t) : 0282 * 0283 * Number of bytes (characters) in `body`. 0 if empty or NULL `body`. 0284 * 0285 * `first_header` (hrb_node_t *) : 0286 * 0287 * Pointer to first SORTED header node, if any. 0288 * It is left to the programmer to ensure that this node and associated 0289 * list is destroyed when done. 0290 * 0291 * `resource` (char *) : 0292 * 0293 * Pointer to resource URL string, e.g., "/folder/page.xhtml". 0294 * 0295 * `verb` (char *) : 0296 * 0297 * Pointer to HTTP verb string, e.g., "GET". 0298 * 0299 * `version` (char *) : 0300 * 0301 * Pointer to HTTP version string, e.g., "HTTP/1.1". 0302 * 0303 *---------------------------------------------------------------------------- 0304 */ 0305 typedef struct { 0306 char *body; 0307 size_t body_len; 0308 hrb_node_t *first_header; 0309 char *resource; 0310 char *verb; 0311 char *version; 0312 } hrb_t; 0313 0314 /*---------------------------------------------------------------------------- 0315 * 0316 * Structure: parsed_url_t 0317 * 0318 * 0319 * Represent a URL with easily-accessed pointers to logical elements within. 0320 * These elements (components) are stored as null-terminated strings (or just 0321 * NULLs). These components should be allocated for the structure, making the 0322 * data as safe as possible from modification. If a component is NULL, it is 0323 * either implicit in or absent from the URL. 0324 * 0325 * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value" 0326 * ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^ 0327 * Scheme Host Port Resource Query/-ies 0328 * 0329 * 0330 * `scheme` (char *) 0331 * 0332 * String representing which protocol is to be expected. 0333 * _Must_ be present. 0334 * "http", "https", "ftp", e.g. 0335 * 0336 * `host` (char *) 0337 * 0338 * String of host, either domain name, IPv4, or IPv6 format. 0339 * _Must_ be present. 0340 * "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]" 0341 * 0342 * `port` (char *) 0343 * 0344 * String representation of specified port. Must resolve to a valid unsigned 0345 * integer. 0346 * "9000", "80" 0347 * 0348 * `path` (char *) 0349 * 0350 * Path to resource on host. If not specified, assumes root "/". 0351 * "lollipop_guild.wav", "characters/witches/white.dat" 0352 * 0353 * `query` (char *) 0354 * 0355 * Single string of all query parameters in url (if any). 0356 * "arg1=value1&arg2=value2" 0357 * 0358 *---------------------------------------------------------------------------- 0359 */ 0360 typedef struct { 0361 char *scheme; /* required */ 0362 char *host; /* required */ 0363 char *port; 0364 char *path; 0365 char *query; 0366 } parsed_url_t; 0367 0368 /*---------------------------------------------------------------------------- 0369 * 0370 * Structure: s3r_t 0371 * 0372 * 0373 * 0374 * S3 request structure "handle". 0375 * 0376 * Holds persistent information for Amazon S3 requests. 0377 * 0378 * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self. 0379 * 0380 * Intended to be re-used for operations on a remote object. 0381 * 0382 * Cleaned up through `H5FD_s3comms_s3r_close()`. 0383 * 0384 * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has 0385 * undefined behavior if called to perform in multiple threads. 0386 * 0387 * 0388 * `curlhandle` (CURL) 0389 * 0390 * Pointer to the curl_easy handle generated for the request. 0391 * 0392 * `httpverb` (char *) 0393 * 0394 * Pointer to NULL-terminated string. HTTP verb, 0395 * e.g. "GET", "HEAD", "PUT", etc. 0396 * 0397 * Default is NULL, resulting in a "GET" request. 0398 * 0399 * `purl` (parsed_url_t *) 0400 * 0401 * Pointer to structure holding the elements of URL for file open. 0402 * 0403 * e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2" 0404 * parsed into... 0405 * { scheme: "http" 0406 * host: "bucket.aws.com" 0407 * port: "8080" 0408 * path: "myfile.dat" 0409 * query: "q1=v1&q2=v2" 0410 * } 0411 * 0412 * Cannot be NULL. 0413 * 0414 * `region` (char *) 0415 * 0416 * Pointer to NULL-terminated string, specifying S3 "region", 0417 * e.g., "us-east-1". 0418 * 0419 * Required to authenticate. 0420 * 0421 * `secret_id` (char *) 0422 * 0423 * Pointer to NULL-terminated string for "secret" access id to S3 resource. 0424 * 0425 * Required to authenticate. 0426 * 0427 * `signing_key` (unsigned char *) 0428 * 0429 * Pointer to `SHA256_DIGEST_LENGTH`-long string for "reusable" signing 0430 * key, generated via 0431 * `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>", 0432 * "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")` 0433 * which may be re-used for several (up to seven (7)) days from creation? 0434 * Computed once upon file open. 0435 * 0436 * Required to authenticate. 0437 * 0438 *---------------------------------------------------------------------------- 0439 */ 0440 typedef struct { 0441 CURL *curlhandle; 0442 size_t filesize; 0443 char *httpverb; 0444 parsed_url_t *purl; 0445 char *region; 0446 char *secret_id; 0447 unsigned char *signing_key; 0448 char *token; 0449 } s3r_t; 0450 0451 #ifdef __cplusplus 0452 extern "C" { 0453 #endif 0454 0455 /******************************************* 0456 * DECLARATION OF HTTP FIELD LIST ROUTINES * 0457 *******************************************/ 0458 0459 H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value); 0460 0461 /*********************************************** 0462 * DECLARATION OF HTTP REQUEST BUFFER ROUTINES * 0463 ***********************************************/ 0464 0465 H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf); 0466 0467 H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host); 0468 0469 /************************************* 0470 * DECLARATION OF S3REQUEST ROUTINES * 0471 *************************************/ 0472 0473 H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle); 0474 0475 H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle); 0476 0477 H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[], 0478 const unsigned char signing_key[], const char token[]); 0479 0480 H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest); 0481 0482 /********************************* 0483 * DECLARATION OF OTHER ROUTINES * 0484 *********************************/ 0485 0486 H5_DLL struct tm *gmnow(void); 0487 0488 H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size, 0489 char *signed_headers_dest, int sh_size, hrb_t *http_request); 0490 0491 H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len, bool lowercase); 0492 0493 H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl); 0494 0495 H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg, 0496 size_t msg_len, char *dest); 0497 0498 H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out, 0499 char *aws_region_out); 0500 0501 H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl); 0502 0503 H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region, 0504 const char *iso8601now); 0505 0506 H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now, 0507 const char *region); 0508 #ifdef __cplusplus 0509 } 0510 #endif 0511 0512 #endif /* H5_HAVE_ROS3_VFD */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |