|
||||
File indexing completed on 2025-01-18 10:13:15
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * 0006 * Copyright (C) 1999-2012, International Business Machines 0007 * Corporation and others. All Rights Reserved. 0008 * 0009 ******************************************************************************* 0010 * file name: utf16.h 0011 * encoding: UTF-8 0012 * tab size: 8 (not used) 0013 * indentation:4 0014 * 0015 * created on: 1999sep09 0016 * created by: Markus W. Scherer 0017 */ 0018 0019 /** 0020 * \file 0021 * \brief C API: 16-bit Unicode handling macros 0022 * 0023 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. 0024 * 0025 * For more information see utf.h and the ICU User Guide Strings chapter 0026 * (https://unicode-org.github.io/icu/userguide/strings). 0027 * 0028 * <em>Usage:</em> 0029 * ICU coding guidelines for if() statements should be followed when using these macros. 0030 * Compound statements (curly braces {}) must be used for if-else-while... 0031 * bodies and all macro statements should be terminated with semicolon. 0032 */ 0033 0034 #ifndef __UTF16_H__ 0035 #define __UTF16_H__ 0036 0037 #include <stdbool.h> 0038 #include "unicode/umachine.h" 0039 #ifndef __UTF_H__ 0040 # include "unicode/utf.h" 0041 #endif 0042 0043 /* single-code point definitions -------------------------------------------- */ 0044 0045 /** 0046 * Does this code unit alone encode a code point (BMP, not a surrogate)? 0047 * @param c 16-bit code unit 0048 * @return true or false 0049 * @stable ICU 2.4 0050 */ 0051 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) 0052 0053 /** 0054 * Is this code unit a lead surrogate (U+d800..U+dbff)? 0055 * @param c 16-bit code unit 0056 * @return true or false 0057 * @stable ICU 2.4 0058 */ 0059 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 0060 0061 /** 0062 * Is this code unit a trail surrogate (U+dc00..U+dfff)? 0063 * @param c 16-bit code unit 0064 * @return true or false 0065 * @stable ICU 2.4 0066 */ 0067 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 0068 0069 /** 0070 * Is this code unit a surrogate (U+d800..U+dfff)? 0071 * @param c 16-bit code unit 0072 * @return true or false 0073 * @stable ICU 2.4 0074 */ 0075 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) 0076 0077 /** 0078 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 0079 * is it a lead surrogate? 0080 * @param c 16-bit code unit 0081 * @return true or false 0082 * @stable ICU 2.4 0083 */ 0084 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 0085 0086 /** 0087 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 0088 * is it a trail surrogate? 0089 * @param c 16-bit code unit 0090 * @return true or false 0091 * @stable ICU 4.2 0092 */ 0093 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) 0094 0095 /** 0096 * Helper constant for U16_GET_SUPPLEMENTARY. 0097 * @internal 0098 */ 0099 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 0100 0101 /** 0102 * Get a supplementary code point value (U+10000..U+10ffff) 0103 * from its lead and trail surrogates. 0104 * The result is undefined if the input values are not 0105 * lead and trail surrogates. 0106 * 0107 * @param lead lead surrogate (U+d800..U+dbff) 0108 * @param trail trail surrogate (U+dc00..U+dfff) 0109 * @return supplementary code point (U+10000..U+10ffff) 0110 * @stable ICU 2.4 0111 */ 0112 #define U16_GET_SUPPLEMENTARY(lead, trail) \ 0113 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) 0114 0115 0116 /** 0117 * Get the lead surrogate (0xd800..0xdbff) for a 0118 * supplementary code point (0x10000..0x10ffff). 0119 * @param supplementary 32-bit code point (U+10000..U+10ffff) 0120 * @return lead surrogate (U+d800..U+dbff) for supplementary 0121 * @stable ICU 2.4 0122 */ 0123 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 0124 0125 /** 0126 * Get the trail surrogate (0xdc00..0xdfff) for a 0127 * supplementary code point (0x10000..0x10ffff). 0128 * @param supplementary 32-bit code point (U+10000..U+10ffff) 0129 * @return trail surrogate (U+dc00..U+dfff) for supplementary 0130 * @stable ICU 2.4 0131 */ 0132 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 0133 0134 /** 0135 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) 0136 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). 0137 * @param c 32-bit code point 0138 * @return 1 or 2 0139 * @stable ICU 2.4 0140 */ 0141 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 0142 0143 /** 0144 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). 0145 * @return 2 0146 * @stable ICU 2.4 0147 */ 0148 #define U16_MAX_LENGTH 2 0149 0150 /** 0151 * Get a code point from a string at a random-access offset, 0152 * without changing the offset. 0153 * "Unsafe" macro, assumes well-formed UTF-16. 0154 * 0155 * The offset may point to either the lead or trail surrogate unit 0156 * for a supplementary code point, in which case the macro will read 0157 * the adjacent matching surrogate as well. 0158 * The result is undefined if the offset points to a single, unpaired surrogate. 0159 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 0160 * 0161 * @param s const UChar * string 0162 * @param i string offset 0163 * @param c output UChar32 variable 0164 * @see U16_GET 0165 * @stable ICU 2.4 0166 */ 0167 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0168 (c)=(s)[i]; \ 0169 if(U16_IS_SURROGATE(c)) { \ 0170 if(U16_IS_SURROGATE_LEAD(c)) { \ 0171 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ 0172 } else { \ 0173 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ 0174 } \ 0175 } \ 0176 } UPRV_BLOCK_MACRO_END 0177 0178 /** 0179 * Get a code point from a string at a random-access offset, 0180 * without changing the offset. 0181 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0182 * 0183 * The offset may point to either the lead or trail surrogate unit 0184 * for a supplementary code point, in which case the macro will read 0185 * the adjacent matching surrogate as well. 0186 * 0187 * The length can be negative for a NUL-terminated string. 0188 * 0189 * If the offset points to a single, unpaired surrogate, then 0190 * c is set to that unpaired surrogate. 0191 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 0192 * 0193 * @param s const UChar * string 0194 * @param start starting string offset (usually 0) 0195 * @param i string offset, must be start<=i<length 0196 * @param length string length 0197 * @param c output UChar32 variable 0198 * @see U16_GET_UNSAFE 0199 * @stable ICU 2.4 0200 */ 0201 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 0202 (c)=(s)[i]; \ 0203 if(U16_IS_SURROGATE(c)) { \ 0204 uint16_t __c2; \ 0205 if(U16_IS_SURROGATE_LEAD(c)) { \ 0206 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 0207 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 0208 } \ 0209 } else { \ 0210 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 0211 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 0212 } \ 0213 } \ 0214 } \ 0215 } UPRV_BLOCK_MACRO_END 0216 0217 /** 0218 * Get a code point from a string at a random-access offset, 0219 * without changing the offset. 0220 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0221 * 0222 * The offset may point to either the lead or trail surrogate unit 0223 * for a supplementary code point, in which case the macro will read 0224 * the adjacent matching surrogate as well. 0225 * 0226 * The length can be negative for a NUL-terminated string. 0227 * 0228 * If the offset points to a single, unpaired surrogate, then 0229 * c is set to U+FFFD. 0230 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. 0231 * 0232 * @param s const UChar * string 0233 * @param start starting string offset (usually 0) 0234 * @param i string offset, must be start<=i<length 0235 * @param length string length 0236 * @param c output UChar32 variable 0237 * @see U16_GET_UNSAFE 0238 * @stable ICU 60 0239 */ 0240 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 0241 (c)=(s)[i]; \ 0242 if(U16_IS_SURROGATE(c)) { \ 0243 uint16_t __c2; \ 0244 if(U16_IS_SURROGATE_LEAD(c)) { \ 0245 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 0246 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 0247 } else { \ 0248 (c)=0xfffd; \ 0249 } \ 0250 } else { \ 0251 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 0252 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 0253 } else { \ 0254 (c)=0xfffd; \ 0255 } \ 0256 } \ 0257 } \ 0258 } UPRV_BLOCK_MACRO_END 0259 0260 /* definitions with forward iteration --------------------------------------- */ 0261 0262 /** 0263 * Get a code point from a string at a code point boundary offset, 0264 * and advance the offset to the next code point boundary. 0265 * (Post-incrementing forward iteration.) 0266 * "Unsafe" macro, assumes well-formed UTF-16. 0267 * 0268 * The offset may point to the lead surrogate unit 0269 * for a supplementary code point, in which case the macro will read 0270 * the following trail surrogate as well. 0271 * If the offset points to a trail surrogate, then that itself 0272 * will be returned as the code point. 0273 * The result is undefined if the offset points to a single, unpaired lead surrogate. 0274 * 0275 * @param s const UChar * string 0276 * @param i string offset 0277 * @param c output UChar32 variable 0278 * @see U16_NEXT 0279 * @stable ICU 2.4 0280 */ 0281 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0282 (c)=(s)[(i)++]; \ 0283 if(U16_IS_LEAD(c)) { \ 0284 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ 0285 } \ 0286 } UPRV_BLOCK_MACRO_END 0287 0288 /** 0289 * Get a code point from a string at a code point boundary offset, 0290 * and advance the offset to the next code point boundary. 0291 * (Post-incrementing forward iteration.) 0292 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0293 * 0294 * The length can be negative for a NUL-terminated string. 0295 * 0296 * The offset may point to the lead surrogate unit 0297 * for a supplementary code point, in which case the macro will read 0298 * the following trail surrogate as well. 0299 * If the offset points to a trail surrogate or 0300 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. 0301 * 0302 * @param s const UChar * string 0303 * @param i string offset, must be i<length 0304 * @param length string length 0305 * @param c output UChar32 variable 0306 * @see U16_NEXT_UNSAFE 0307 * @stable ICU 2.4 0308 */ 0309 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 0310 (c)=(s)[(i)++]; \ 0311 if(U16_IS_LEAD(c)) { \ 0312 uint16_t __c2; \ 0313 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 0314 ++(i); \ 0315 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 0316 } \ 0317 } \ 0318 } UPRV_BLOCK_MACRO_END 0319 0320 /** 0321 * Get a code point from a string at a code point boundary offset, 0322 * and advance the offset to the next code point boundary. 0323 * (Post-incrementing forward iteration.) 0324 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0325 * 0326 * The length can be negative for a NUL-terminated string. 0327 * 0328 * The offset may point to the lead surrogate unit 0329 * for a supplementary code point, in which case the macro will read 0330 * the following trail surrogate as well. 0331 * If the offset points to a trail surrogate or 0332 * to a single, unpaired lead surrogate, then c is set to U+FFFD. 0333 * 0334 * @param s const UChar * string 0335 * @param i string offset, must be i<length 0336 * @param length string length 0337 * @param c output UChar32 variable 0338 * @see U16_NEXT_UNSAFE 0339 * @stable ICU 60 0340 */ 0341 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 0342 (c)=(s)[(i)++]; \ 0343 if(U16_IS_SURROGATE(c)) { \ 0344 uint16_t __c2; \ 0345 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 0346 ++(i); \ 0347 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 0348 } else { \ 0349 (c)=0xfffd; \ 0350 } \ 0351 } \ 0352 } UPRV_BLOCK_MACRO_END 0353 0354 /** 0355 * Append a code point to a string, overwriting 1 or 2 code units. 0356 * The offset points to the current end of the string contents 0357 * and is advanced (post-increment). 0358 * "Unsafe" macro, assumes a valid code point and sufficient space in the string. 0359 * Otherwise, the result is undefined. 0360 * 0361 * @param s const UChar * string buffer 0362 * @param i string offset 0363 * @param c code point to append 0364 * @see U16_APPEND 0365 * @stable ICU 2.4 0366 */ 0367 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0368 if((uint32_t)(c)<=0xffff) { \ 0369 (s)[(i)++]=(uint16_t)(c); \ 0370 } else { \ 0371 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 0372 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 0373 } \ 0374 } UPRV_BLOCK_MACRO_END 0375 0376 /** 0377 * Append a code point to a string, overwriting 1 or 2 code units. 0378 * The offset points to the current end of the string contents 0379 * and is advanced (post-increment). 0380 * "Safe" macro, checks for a valid code point. 0381 * If a surrogate pair is written, checks for sufficient space in the string. 0382 * If the code point is not valid or a trail surrogate does not fit, 0383 * then isError is set to true. 0384 * 0385 * @param s const UChar * string buffer 0386 * @param i string offset, must be i<capacity 0387 * @param capacity size of the string buffer 0388 * @param c code point to append 0389 * @param isError output UBool set to true if an error occurs, otherwise not modified 0390 * @see U16_APPEND_UNSAFE 0391 * @stable ICU 2.4 0392 */ 0393 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ 0394 if((uint32_t)(c)<=0xffff) { \ 0395 (s)[(i)++]=(uint16_t)(c); \ 0396 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ 0397 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 0398 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 0399 } else /* c>0x10ffff or not enough space */ { \ 0400 (isError)=true; \ 0401 } \ 0402 } UPRV_BLOCK_MACRO_END 0403 0404 /** 0405 * Advance the string offset from one code point boundary to the next. 0406 * (Post-incrementing iteration.) 0407 * "Unsafe" macro, assumes well-formed UTF-16. 0408 * 0409 * @param s const UChar * string 0410 * @param i string offset 0411 * @see U16_FWD_1 0412 * @stable ICU 2.4 0413 */ 0414 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 0415 if(U16_IS_LEAD((s)[(i)++])) { \ 0416 ++(i); \ 0417 } \ 0418 } UPRV_BLOCK_MACRO_END 0419 0420 /** 0421 * Advance the string offset from one code point boundary to the next. 0422 * (Post-incrementing iteration.) 0423 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0424 * 0425 * The length can be negative for a NUL-terminated string. 0426 * 0427 * @param s const UChar * string 0428 * @param i string offset, must be i<length 0429 * @param length string length 0430 * @see U16_FWD_1_UNSAFE 0431 * @stable ICU 2.4 0432 */ 0433 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 0434 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ 0435 ++(i); \ 0436 } \ 0437 } UPRV_BLOCK_MACRO_END 0438 0439 /** 0440 * Advance the string offset from one code point boundary to the n-th next one, 0441 * i.e., move forward by n code points. 0442 * (Post-incrementing iteration.) 0443 * "Unsafe" macro, assumes well-formed UTF-16. 0444 * 0445 * @param s const UChar * string 0446 * @param i string offset 0447 * @param n number of code points to skip 0448 * @see U16_FWD_N 0449 * @stable ICU 2.4 0450 */ 0451 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 0452 int32_t __N=(n); \ 0453 while(__N>0) { \ 0454 U16_FWD_1_UNSAFE(s, i); \ 0455 --__N; \ 0456 } \ 0457 } UPRV_BLOCK_MACRO_END 0458 0459 /** 0460 * Advance the string offset from one code point boundary to the n-th next one, 0461 * i.e., move forward by n code points. 0462 * (Post-incrementing iteration.) 0463 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0464 * 0465 * The length can be negative for a NUL-terminated string. 0466 * 0467 * @param s const UChar * string 0468 * @param i int32_t string offset, must be i<length 0469 * @param length int32_t string length 0470 * @param n number of code points to skip 0471 * @see U16_FWD_N_UNSAFE 0472 * @stable ICU 2.4 0473 */ 0474 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ 0475 int32_t __N=(n); \ 0476 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ 0477 U16_FWD_1(s, i, length); \ 0478 --__N; \ 0479 } \ 0480 } UPRV_BLOCK_MACRO_END 0481 0482 /** 0483 * Adjust a random-access offset to a code point boundary 0484 * at the start of a code point. 0485 * If the offset points to the trail surrogate of a surrogate pair, 0486 * then the offset is decremented. 0487 * Otherwise, it is not modified. 0488 * "Unsafe" macro, assumes well-formed UTF-16. 0489 * 0490 * @param s const UChar * string 0491 * @param i string offset 0492 * @see U16_SET_CP_START 0493 * @stable ICU 2.4 0494 */ 0495 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 0496 if(U16_IS_TRAIL((s)[i])) { \ 0497 --(i); \ 0498 } \ 0499 } UPRV_BLOCK_MACRO_END 0500 0501 /** 0502 * Adjust a random-access offset to a code point boundary 0503 * at the start of a code point. 0504 * If the offset points to the trail surrogate of a surrogate pair, 0505 * then the offset is decremented. 0506 * Otherwise, it is not modified. 0507 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0508 * 0509 * @param s const UChar * string 0510 * @param start starting string offset (usually 0) 0511 * @param i string offset, must be start<=i 0512 * @see U16_SET_CP_START_UNSAFE 0513 * @stable ICU 2.4 0514 */ 0515 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 0516 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 0517 --(i); \ 0518 } \ 0519 } UPRV_BLOCK_MACRO_END 0520 0521 /* definitions with backward iteration -------------------------------------- */ 0522 0523 /** 0524 * Move the string offset from one code point boundary to the previous one 0525 * and get the code point between them. 0526 * (Pre-decrementing backward iteration.) 0527 * "Unsafe" macro, assumes well-formed UTF-16. 0528 * 0529 * The input offset may be the same as the string length. 0530 * If the offset is behind a trail surrogate unit 0531 * for a supplementary code point, then the macro will read 0532 * the preceding lead surrogate as well. 0533 * If the offset is behind a lead surrogate, then that itself 0534 * will be returned as the code point. 0535 * The result is undefined if the offset is behind a single, unpaired trail surrogate. 0536 * 0537 * @param s const UChar * string 0538 * @param i string offset 0539 * @param c output UChar32 variable 0540 * @see U16_PREV 0541 * @stable ICU 2.4 0542 */ 0543 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0544 (c)=(s)[--(i)]; \ 0545 if(U16_IS_TRAIL(c)) { \ 0546 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ 0547 } \ 0548 } UPRV_BLOCK_MACRO_END 0549 0550 /** 0551 * Move the string offset from one code point boundary to the previous one 0552 * and get the code point between them. 0553 * (Pre-decrementing backward iteration.) 0554 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0555 * 0556 * The input offset may be the same as the string length. 0557 * If the offset is behind a trail surrogate unit 0558 * for a supplementary code point, then the macro will read 0559 * the preceding lead surrogate as well. 0560 * If the offset is behind a lead surrogate or behind a single, unpaired 0561 * trail surrogate, then c is set to that unpaired surrogate. 0562 * 0563 * @param s const UChar * string 0564 * @param start starting string offset (usually 0) 0565 * @param i string offset, must be start<i 0566 * @param c output UChar32 variable 0567 * @see U16_PREV_UNSAFE 0568 * @stable ICU 2.4 0569 */ 0570 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0571 (c)=(s)[--(i)]; \ 0572 if(U16_IS_TRAIL(c)) { \ 0573 uint16_t __c2; \ 0574 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 0575 --(i); \ 0576 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 0577 } \ 0578 } \ 0579 } UPRV_BLOCK_MACRO_END 0580 0581 /** 0582 * Move the string offset from one code point boundary to the previous one 0583 * and get the code point between them. 0584 * (Pre-decrementing backward iteration.) 0585 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0586 * 0587 * The input offset may be the same as the string length. 0588 * If the offset is behind a trail surrogate unit 0589 * for a supplementary code point, then the macro will read 0590 * the preceding lead surrogate as well. 0591 * If the offset is behind a lead surrogate or behind a single, unpaired 0592 * trail surrogate, then c is set to U+FFFD. 0593 * 0594 * @param s const UChar * string 0595 * @param start starting string offset (usually 0) 0596 * @param i string offset, must be start<i 0597 * @param c output UChar32 variable 0598 * @see U16_PREV_UNSAFE 0599 * @stable ICU 60 0600 */ 0601 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 0602 (c)=(s)[--(i)]; \ 0603 if(U16_IS_SURROGATE(c)) { \ 0604 uint16_t __c2; \ 0605 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 0606 --(i); \ 0607 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 0608 } else { \ 0609 (c)=0xfffd; \ 0610 } \ 0611 } \ 0612 } UPRV_BLOCK_MACRO_END 0613 0614 /** 0615 * Move the string offset from one code point boundary to the previous one. 0616 * (Pre-decrementing backward iteration.) 0617 * The input offset may be the same as the string length. 0618 * "Unsafe" macro, assumes well-formed UTF-16. 0619 * 0620 * @param s const UChar * string 0621 * @param i string offset 0622 * @see U16_BACK_1 0623 * @stable ICU 2.4 0624 */ 0625 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 0626 if(U16_IS_TRAIL((s)[--(i)])) { \ 0627 --(i); \ 0628 } \ 0629 } UPRV_BLOCK_MACRO_END 0630 0631 /** 0632 * Move the string offset from one code point boundary to the previous one. 0633 * (Pre-decrementing backward iteration.) 0634 * The input offset may be the same as the string length. 0635 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0636 * 0637 * @param s const UChar * string 0638 * @param start starting string offset (usually 0) 0639 * @param i string offset, must be start<i 0640 * @see U16_BACK_1_UNSAFE 0641 * @stable ICU 2.4 0642 */ 0643 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 0644 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 0645 --(i); \ 0646 } \ 0647 } UPRV_BLOCK_MACRO_END 0648 0649 /** 0650 * Move the string offset from one code point boundary to the n-th one before it, 0651 * i.e., move backward by n code points. 0652 * (Pre-decrementing backward iteration.) 0653 * The input offset may be the same as the string length. 0654 * "Unsafe" macro, assumes well-formed UTF-16. 0655 * 0656 * @param s const UChar * string 0657 * @param i string offset 0658 * @param n number of code points to skip 0659 * @see U16_BACK_N 0660 * @stable ICU 2.4 0661 */ 0662 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 0663 int32_t __N=(n); \ 0664 while(__N>0) { \ 0665 U16_BACK_1_UNSAFE(s, i); \ 0666 --__N; \ 0667 } \ 0668 } UPRV_BLOCK_MACRO_END 0669 0670 /** 0671 * Move the string offset from one code point boundary to the n-th one before it, 0672 * i.e., move backward by n code points. 0673 * (Pre-decrementing backward iteration.) 0674 * The input offset may be the same as the string length. 0675 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0676 * 0677 * @param s const UChar * string 0678 * @param start start of string 0679 * @param i string offset, must be start<i 0680 * @param n number of code points to skip 0681 * @see U16_BACK_N_UNSAFE 0682 * @stable ICU 2.4 0683 */ 0684 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 0685 int32_t __N=(n); \ 0686 while(__N>0 && (i)>(start)) { \ 0687 U16_BACK_1(s, start, i); \ 0688 --__N; \ 0689 } \ 0690 } UPRV_BLOCK_MACRO_END 0691 0692 /** 0693 * Adjust a random-access offset to a code point boundary after a code point. 0694 * If the offset is behind the lead surrogate of a surrogate pair, 0695 * then the offset is incremented. 0696 * Otherwise, it is not modified. 0697 * The input offset may be the same as the string length. 0698 * "Unsafe" macro, assumes well-formed UTF-16. 0699 * 0700 * @param s const UChar * string 0701 * @param i string offset 0702 * @see U16_SET_CP_LIMIT 0703 * @stable ICU 2.4 0704 */ 0705 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 0706 if(U16_IS_LEAD((s)[(i)-1])) { \ 0707 ++(i); \ 0708 } \ 0709 } UPRV_BLOCK_MACRO_END 0710 0711 /** 0712 * Adjust a random-access offset to a code point boundary after a code point. 0713 * If the offset is behind the lead surrogate of a surrogate pair, 0714 * then the offset is incremented. 0715 * Otherwise, it is not modified. 0716 * The input offset may be the same as the string length. 0717 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 0718 * 0719 * The length can be negative for a NUL-terminated string. 0720 * 0721 * @param s const UChar * string 0722 * @param start int32_t starting string offset (usually 0) 0723 * @param i int32_t string offset, start<=i<=length 0724 * @param length int32_t string length 0725 * @see U16_SET_CP_LIMIT_UNSAFE 0726 * @stable ICU 2.4 0727 */ 0728 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 0729 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ 0730 ++(i); \ 0731 } \ 0732 } UPRV_BLOCK_MACRO_END 0733 0734 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |