|
||||
Warning, file /include/unicode/messagepattern.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * Copyright (C) 2011-2013, International Business Machines 0006 * Corporation and others. All Rights Reserved. 0007 ******************************************************************************* 0008 * file name: messagepattern.h 0009 * encoding: UTF-8 0010 * tab size: 8 (not used) 0011 * indentation:4 0012 * 0013 * created on: 2011mar14 0014 * created by: Markus W. Scherer 0015 */ 0016 0017 #ifndef __MESSAGEPATTERN_H__ 0018 #define __MESSAGEPATTERN_H__ 0019 0020 /** 0021 * \file 0022 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 0023 */ 0024 0025 #include "unicode/utypes.h" 0026 0027 #if U_SHOW_CPLUSPLUS_API 0028 0029 #if !UCONFIG_NO_FORMATTING 0030 0031 #include "unicode/parseerr.h" 0032 #include "unicode/unistr.h" 0033 0034 /** 0035 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 0036 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 0037 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 0038 * <p> 0039 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 0040 * even when the pair is between two single, text-quoting apostrophes. 0041 * <p> 0042 * The following table shows examples of desired MessageFormat.format() output 0043 * with the pattern strings that yield that output. 0044 * <p> 0045 * <table> 0046 * <tr> 0047 * <th>Desired output</th> 0048 * <th>DOUBLE_OPTIONAL</th> 0049 * <th>DOUBLE_REQUIRED</th> 0050 * </tr> 0051 * <tr> 0052 * <td>I see {many}</td> 0053 * <td>I see '{many}'</td> 0054 * <td>(same)</td> 0055 * </tr> 0056 * <tr> 0057 * <td>I said {'Wow!'}</td> 0058 * <td>I said '{''Wow!''}'</td> 0059 * <td>(same)</td> 0060 * </tr> 0061 * <tr> 0062 * <td>I don't know</td> 0063 * <td>I don't know OR<br> I don''t know</td> 0064 * <td>I don''t know</td> 0065 * </tr> 0066 * </table> 0067 * @stable ICU 4.8 0068 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 0069 */ 0070 enum UMessagePatternApostropheMode { 0071 /** 0072 * A literal apostrophe is represented by 0073 * either a single or a double apostrophe pattern character. 0074 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 0075 * if it immediately precedes a curly brace {}, 0076 * or a pipe symbol | if inside a choice format, 0077 * or a pound symbol # if inside a plural format. 0078 * <p> 0079 * This is the default behavior starting with ICU 4.8. 0080 * @stable ICU 4.8 0081 */ 0082 UMSGPAT_APOS_DOUBLE_OPTIONAL, 0083 /** 0084 * A literal apostrophe must be represented by 0085 * a double apostrophe pattern character. 0086 * A single apostrophe always starts quoted literal text. 0087 * <p> 0088 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 0089 * @stable ICU 4.8 0090 */ 0091 UMSGPAT_APOS_DOUBLE_REQUIRED 0092 }; 0093 /** 0094 * @stable ICU 4.8 0095 */ 0096 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 0097 0098 /** 0099 * MessagePattern::Part type constants. 0100 * @stable ICU 4.8 0101 */ 0102 enum UMessagePatternPartType { 0103 /** 0104 * Start of a message pattern (main or nested). 0105 * The length is 0 for the top-level message 0106 * and for a choice argument sub-message, otherwise 1 for the '{'. 0107 * The value indicates the nesting level, starting with 0 for the main message. 0108 * <p> 0109 * There is always a later MSG_LIMIT part. 0110 * @stable ICU 4.8 0111 */ 0112 UMSGPAT_PART_TYPE_MSG_START, 0113 /** 0114 * End of a message pattern (main or nested). 0115 * The length is 0 for the top-level message and 0116 * the last sub-message of a choice argument, 0117 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 0118 * The value indicates the nesting level, starting with 0 for the main message. 0119 * @stable ICU 4.8 0120 */ 0121 UMSGPAT_PART_TYPE_MSG_LIMIT, 0122 /** 0123 * Indicates a substring of the pattern string which is to be skipped when formatting. 0124 * For example, an apostrophe that begins or ends quoted text 0125 * would be indicated with such a part. 0126 * The value is undefined and currently always 0. 0127 * @stable ICU 4.8 0128 */ 0129 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 0130 /** 0131 * Indicates that a syntax character needs to be inserted for auto-quoting. 0132 * The length is 0. 0133 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 0134 * @stable ICU 4.8 0135 */ 0136 UMSGPAT_PART_TYPE_INSERT_CHAR, 0137 /** 0138 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 0139 * When formatting, replace this part's substring with the 0140 * (value-offset) for the plural argument value. 0141 * The value is undefined and currently always 0. 0142 * @stable ICU 4.8 0143 */ 0144 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 0145 /** 0146 * Start of an argument. 0147 * The length is 1 for the '{'. 0148 * The value is the ordinal value of the ArgType. Use getArgType(). 0149 * <p> 0150 * This part is followed by either an ARG_NUMBER or ARG_NAME, 0151 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 0152 * and finally an ARG_LIMIT part. 0153 * @stable ICU 4.8 0154 */ 0155 UMSGPAT_PART_TYPE_ARG_START, 0156 /** 0157 * End of an argument. 0158 * The length is 1 for the '}'. 0159 * The value is the ordinal value of the ArgType. Use getArgType(). 0160 * @stable ICU 4.8 0161 */ 0162 UMSGPAT_PART_TYPE_ARG_LIMIT, 0163 /** 0164 * The argument number, provided by the value. 0165 * @stable ICU 4.8 0166 */ 0167 UMSGPAT_PART_TYPE_ARG_NUMBER, 0168 /** 0169 * The argument name. 0170 * The value is undefined and currently always 0. 0171 * @stable ICU 4.8 0172 */ 0173 UMSGPAT_PART_TYPE_ARG_NAME, 0174 /** 0175 * The argument type. 0176 * The value is undefined and currently always 0. 0177 * @stable ICU 4.8 0178 */ 0179 UMSGPAT_PART_TYPE_ARG_TYPE, 0180 /** 0181 * The argument style text. 0182 * The value is undefined and currently always 0. 0183 * @stable ICU 4.8 0184 */ 0185 UMSGPAT_PART_TYPE_ARG_STYLE, 0186 /** 0187 * A selector substring in a "complex" argument style. 0188 * The value is undefined and currently always 0. 0189 * @stable ICU 4.8 0190 */ 0191 UMSGPAT_PART_TYPE_ARG_SELECTOR, 0192 /** 0193 * An integer value, for example the offset or an explicit selector value 0194 * in a PluralFormat style. 0195 * The part value is the integer value. 0196 * @stable ICU 4.8 0197 */ 0198 UMSGPAT_PART_TYPE_ARG_INT, 0199 /** 0200 * A numeric value, for example the offset or an explicit selector value 0201 * in a PluralFormat style. 0202 * The part value is an index into an internal array of numeric values; 0203 * use getNumericValue(). 0204 * @stable ICU 4.8 0205 */ 0206 UMSGPAT_PART_TYPE_ARG_DOUBLE 0207 }; 0208 /** 0209 * @stable ICU 4.8 0210 */ 0211 typedef enum UMessagePatternPartType UMessagePatternPartType; 0212 0213 /** 0214 * Argument type constants. 0215 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 0216 * 0217 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 0218 * with a nesting level one greater than the surrounding message. 0219 * @stable ICU 4.8 0220 */ 0221 enum UMessagePatternArgType { 0222 /** 0223 * The argument has no specified type. 0224 * @stable ICU 4.8 0225 */ 0226 UMSGPAT_ARG_TYPE_NONE, 0227 /** 0228 * The argument has a "simple" type which is provided by the ARG_TYPE part. 0229 * An ARG_STYLE part might follow that. 0230 * @stable ICU 4.8 0231 */ 0232 UMSGPAT_ARG_TYPE_SIMPLE, 0233 /** 0234 * The argument is a ChoiceFormat with one or more 0235 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 0236 * @stable ICU 4.8 0237 */ 0238 UMSGPAT_ARG_TYPE_CHOICE, 0239 /** 0240 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 0241 * (e.g., offset:1) 0242 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 0243 * If the selector has an explicit value (e.g., =2), then 0244 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 0245 * Otherwise the message immediately follows the ARG_SELECTOR. 0246 * @stable ICU 4.8 0247 */ 0248 UMSGPAT_ARG_TYPE_PLURAL, 0249 /** 0250 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 0251 * @stable ICU 4.8 0252 */ 0253 UMSGPAT_ARG_TYPE_SELECT, 0254 /** 0255 * The argument is an ordinal-number PluralFormat 0256 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 0257 * @stable ICU 50 0258 */ 0259 UMSGPAT_ARG_TYPE_SELECTORDINAL 0260 }; 0261 /** 0262 * @stable ICU 4.8 0263 */ 0264 typedef enum UMessagePatternArgType UMessagePatternArgType; 0265 0266 /** 0267 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE 0268 * Returns true if the argument type has a plural style part sequence and semantics, 0269 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 0270 * @stable ICU 50 0271 */ 0272 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 0273 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 0274 0275 enum { 0276 /** 0277 * Return value from MessagePattern.validateArgumentName() for when 0278 * the string is a valid "pattern identifier" but not a number. 0279 * @stable ICU 4.8 0280 */ 0281 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 0282 0283 /** 0284 * Return value from MessagePattern.validateArgumentName() for when 0285 * the string is invalid. 0286 * It might not be a valid "pattern identifier", 0287 * or it have only ASCII digits but there is a leading zero or the number is too large. 0288 * @stable ICU 4.8 0289 */ 0290 UMSGPAT_ARG_NAME_NOT_VALID=-2 0291 }; 0292 0293 /** 0294 * Special value that is returned by getNumericValue(Part) when no 0295 * numeric value is defined for a part. 0296 * @see MessagePattern.getNumericValue() 0297 * @stable ICU 4.8 0298 */ 0299 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 0300 0301 U_NAMESPACE_BEGIN 0302 0303 class MessagePatternDoubleList; 0304 class MessagePatternPartsList; 0305 0306 /** 0307 * Parses and represents ICU MessageFormat patterns. 0308 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 0309 * Used in the implementations of those classes as well as in tools 0310 * for message validation, translation and format conversion. 0311 * <p> 0312 * The parser handles all syntax relevant for identifying message arguments. 0313 * This includes "complex" arguments whose style strings contain 0314 * nested MessageFormat pattern substrings. 0315 * For "simple" arguments (with no nested MessageFormat pattern substrings), 0316 * the argument style is not parsed any further. 0317 * <p> 0318 * The parser handles named and numbered message arguments and allows both in one message. 0319 * <p> 0320 * Once a pattern has been parsed successfully, iterate through the parsed data 0321 * with countParts(), getPart() and related methods. 0322 * <p> 0323 * The data logically represents a parse tree, but is stored and accessed 0324 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 0325 * Arguments and nested messages are best handled via recursion. 0326 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 0327 * the index of the corresponding _LIMIT "part". 0328 * <p> 0329 * List of "parts": 0330 * <pre> 0331 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 0332 * argument = noneArg | simpleArg | complexArg 0333 * complexArg = choiceArg | pluralArg | selectArg 0334 * 0335 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 0336 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 0337 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 0338 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 0339 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 0340 * 0341 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 0342 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 0343 * selectStyle = (ARG_SELECTOR message)+ 0344 * </pre> 0345 * <ul> 0346 * <li>Literal output text is not represented directly by "parts" but accessed 0347 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 0348 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 0349 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 0350 * the less-than-or-equal-to sign (U+2264). 0351 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 0352 * The optional numeric Part between each (ARG_SELECTOR, message) pair 0353 * is the value of an explicit-number selector like "=2", 0354 * otherwise the selector is a non-numeric identifier. 0355 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 0356 * </ul> 0357 * <p> 0358 * This class is not intended for public subclassing. 0359 * 0360 * @stable ICU 4.8 0361 */ 0362 class U_COMMON_API MessagePattern : public UObject { 0363 public: 0364 /** 0365 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 0366 * @param errorCode Standard ICU error code. Its input value must 0367 * pass the U_SUCCESS() test, or else the function returns 0368 * immediately. Check for U_FAILURE() on output or use with 0369 * function chaining. (See User Guide for details.) 0370 * @stable ICU 4.8 0371 */ 0372 MessagePattern(UErrorCode &errorCode); 0373 0374 /** 0375 * Constructs an empty MessagePattern. 0376 * @param mode Explicit UMessagePatternApostropheMode. 0377 * @param errorCode Standard ICU error code. Its input value must 0378 * pass the U_SUCCESS() test, or else the function returns 0379 * immediately. Check for U_FAILURE() on output or use with 0380 * function chaining. (See User Guide for details.) 0381 * @stable ICU 4.8 0382 */ 0383 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 0384 0385 /** 0386 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 0387 * parses the MessageFormat pattern string. 0388 * @param pattern a MessageFormat pattern string 0389 * @param parseError Struct to receive information on the position 0390 * of an error within the pattern. 0391 * Can be nullptr. 0392 * @param errorCode Standard ICU error code. Its input value must 0393 * pass the U_SUCCESS() test, or else the function returns 0394 * immediately. Check for U_FAILURE() on output or use with 0395 * function chaining. (See User Guide for details.) 0396 * TODO: turn @throws into UErrorCode specifics? 0397 * @throws IllegalArgumentException for syntax errors in the pattern string 0398 * @throws IndexOutOfBoundsException if certain limits are exceeded 0399 * (e.g., argument number too high, argument name too long, etc.) 0400 * @throws NumberFormatException if a number could not be parsed 0401 * @stable ICU 4.8 0402 */ 0403 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 0404 0405 /** 0406 * Copy constructor. 0407 * @param other Object to copy. 0408 * @stable ICU 4.8 0409 */ 0410 MessagePattern(const MessagePattern &other); 0411 0412 /** 0413 * Assignment operator. 0414 * @param other Object to copy. 0415 * @return *this=other 0416 * @stable ICU 4.8 0417 */ 0418 MessagePattern &operator=(const MessagePattern &other); 0419 0420 /** 0421 * Destructor. 0422 * @stable ICU 4.8 0423 */ 0424 virtual ~MessagePattern(); 0425 0426 /** 0427 * Parses a MessageFormat pattern string. 0428 * @param pattern a MessageFormat pattern string 0429 * @param parseError Struct to receive information on the position 0430 * of an error within the pattern. 0431 * Can be nullptr. 0432 * @param errorCode Standard ICU error code. Its input value must 0433 * pass the U_SUCCESS() test, or else the function returns 0434 * immediately. Check for U_FAILURE() on output or use with 0435 * function chaining. (See User Guide for details.) 0436 * @return *this 0437 * @throws IllegalArgumentException for syntax errors in the pattern string 0438 * @throws IndexOutOfBoundsException if certain limits are exceeded 0439 * (e.g., argument number too high, argument name too long, etc.) 0440 * @throws NumberFormatException if a number could not be parsed 0441 * @stable ICU 4.8 0442 */ 0443 MessagePattern &parse(const UnicodeString &pattern, 0444 UParseError *parseError, UErrorCode &errorCode); 0445 0446 /** 0447 * Parses a ChoiceFormat pattern string. 0448 * @param pattern a ChoiceFormat pattern string 0449 * @param parseError Struct to receive information on the position 0450 * of an error within the pattern. 0451 * Can be nullptr. 0452 * @param errorCode Standard ICU error code. Its input value must 0453 * pass the U_SUCCESS() test, or else the function returns 0454 * immediately. Check for U_FAILURE() on output or use with 0455 * function chaining. (See User Guide for details.) 0456 * @return *this 0457 * @throws IllegalArgumentException for syntax errors in the pattern string 0458 * @throws IndexOutOfBoundsException if certain limits are exceeded 0459 * (e.g., argument number too high, argument name too long, etc.) 0460 * @throws NumberFormatException if a number could not be parsed 0461 * @stable ICU 4.8 0462 */ 0463 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 0464 UParseError *parseError, UErrorCode &errorCode); 0465 0466 /** 0467 * Parses a PluralFormat pattern string. 0468 * @param pattern a PluralFormat pattern string 0469 * @param parseError Struct to receive information on the position 0470 * of an error within the pattern. 0471 * Can be nullptr. 0472 * @param errorCode Standard ICU error code. Its input value must 0473 * pass the U_SUCCESS() test, or else the function returns 0474 * immediately. Check for U_FAILURE() on output or use with 0475 * function chaining. (See User Guide for details.) 0476 * @return *this 0477 * @throws IllegalArgumentException for syntax errors in the pattern string 0478 * @throws IndexOutOfBoundsException if certain limits are exceeded 0479 * (e.g., argument number too high, argument name too long, etc.) 0480 * @throws NumberFormatException if a number could not be parsed 0481 * @stable ICU 4.8 0482 */ 0483 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 0484 UParseError *parseError, UErrorCode &errorCode); 0485 0486 /** 0487 * Parses a SelectFormat pattern string. 0488 * @param pattern a SelectFormat pattern string 0489 * @param parseError Struct to receive information on the position 0490 * of an error within the pattern. 0491 * Can be nullptr. 0492 * @param errorCode Standard ICU error code. Its input value must 0493 * pass the U_SUCCESS() test, or else the function returns 0494 * immediately. Check for U_FAILURE() on output or use with 0495 * function chaining. (See User Guide for details.) 0496 * @return *this 0497 * @throws IllegalArgumentException for syntax errors in the pattern string 0498 * @throws IndexOutOfBoundsException if certain limits are exceeded 0499 * (e.g., argument number too high, argument name too long, etc.) 0500 * @throws NumberFormatException if a number could not be parsed 0501 * @stable ICU 4.8 0502 */ 0503 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 0504 UParseError *parseError, UErrorCode &errorCode); 0505 0506 /** 0507 * Clears this MessagePattern. 0508 * countParts() will return 0. 0509 * @stable ICU 4.8 0510 */ 0511 void clear(); 0512 0513 /** 0514 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 0515 * countParts() will return 0. 0516 * @param mode The new UMessagePatternApostropheMode. 0517 * @stable ICU 4.8 0518 */ 0519 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 0520 clear(); 0521 aposMode=mode; 0522 } 0523 0524 /** 0525 * @param other another object to compare with. 0526 * @return true if this object is equivalent to the other one. 0527 * @stable ICU 4.8 0528 */ 0529 bool operator==(const MessagePattern &other) const; 0530 0531 /** 0532 * @param other another object to compare with. 0533 * @return false if this object is equivalent to the other one. 0534 * @stable ICU 4.8 0535 */ 0536 inline bool operator!=(const MessagePattern &other) const { 0537 return !operator==(other); 0538 } 0539 0540 /** 0541 * @return A hash code for this object. 0542 * @stable ICU 4.8 0543 */ 0544 int32_t hashCode() const; 0545 0546 /** 0547 * @return this instance's UMessagePatternApostropheMode. 0548 * @stable ICU 4.8 0549 */ 0550 UMessagePatternApostropheMode getApostropheMode() const { 0551 return aposMode; 0552 } 0553 0554 // Java has package-private jdkAposMode() here. 0555 // In C++, this is declared in the MessageImpl class. 0556 0557 /** 0558 * @return the parsed pattern string (null if none was parsed). 0559 * @stable ICU 4.8 0560 */ 0561 const UnicodeString &getPatternString() const { 0562 return msg; 0563 } 0564 0565 /** 0566 * Does the parsed pattern have named arguments like {first_name}? 0567 * @return true if the parsed pattern has at least one named argument. 0568 * @stable ICU 4.8 0569 */ 0570 UBool hasNamedArguments() const { 0571 return hasArgNames; 0572 } 0573 0574 /** 0575 * Does the parsed pattern have numbered arguments like {2}? 0576 * @return true if the parsed pattern has at least one numbered argument. 0577 * @stable ICU 4.8 0578 */ 0579 UBool hasNumberedArguments() const { 0580 return hasArgNumbers; 0581 } 0582 0583 /** 0584 * Validates and parses an argument name or argument number string. 0585 * An argument name must be a "pattern identifier", that is, it must contain 0586 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 0587 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 0588 * @param name Input string. 0589 * @return >=0 if the name is a valid number, 0590 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 0591 * ARG_NAME_NOT_VALID (-2) if it is neither. 0592 * @stable ICU 4.8 0593 */ 0594 static int32_t validateArgumentName(const UnicodeString &name); 0595 0596 /** 0597 * Returns a version of the parsed pattern string where each ASCII apostrophe 0598 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 0599 * <p> 0600 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 0601 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 0602 * @return the deep-auto-quoted version of the parsed pattern string. 0603 * @see MessageFormat.autoQuoteApostrophe() 0604 * @stable ICU 4.8 0605 */ 0606 UnicodeString autoQuoteApostropheDeep() const; 0607 0608 class Part; 0609 0610 /** 0611 * Returns the number of "parts" created by parsing the pattern string. 0612 * Returns 0 if no pattern has been parsed or clear() was called. 0613 * @return the number of pattern parts. 0614 * @stable ICU 4.8 0615 */ 0616 int32_t countParts() const { 0617 return partsLength; 0618 } 0619 0620 /** 0621 * Gets the i-th pattern "part". 0622 * @param i The index of the Part data. (0..countParts()-1) 0623 * @return the i-th pattern "part". 0624 * @stable ICU 4.8 0625 */ 0626 const Part &getPart(int32_t i) const { 0627 return parts[i]; 0628 } 0629 0630 /** 0631 * Returns the UMessagePatternPartType of the i-th pattern "part". 0632 * Convenience method for getPart(i).getType(). 0633 * @param i The index of the Part data. (0..countParts()-1) 0634 * @return The UMessagePatternPartType of the i-th Part. 0635 * @stable ICU 4.8 0636 */ 0637 UMessagePatternPartType getPartType(int32_t i) const { 0638 return getPart(i).type; 0639 } 0640 0641 /** 0642 * Returns the pattern index of the specified pattern "part". 0643 * Convenience method for getPart(partIndex).getIndex(). 0644 * @param partIndex The index of the Part data. (0..countParts()-1) 0645 * @return The pattern index of this Part. 0646 * @stable ICU 4.8 0647 */ 0648 int32_t getPatternIndex(int32_t partIndex) const { 0649 return getPart(partIndex).index; 0650 } 0651 0652 /** 0653 * Returns the substring of the pattern string indicated by the Part. 0654 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 0655 * @param part a part of this MessagePattern. 0656 * @return the substring associated with part. 0657 * @stable ICU 4.8 0658 */ 0659 UnicodeString getSubstring(const Part &part) const { 0660 return msg.tempSubString(part.index, part.length); 0661 } 0662 0663 /** 0664 * Compares the part's substring with the input string s. 0665 * @param part a part of this MessagePattern. 0666 * @param s a string. 0667 * @return true if getSubstring(part).equals(s). 0668 * @stable ICU 4.8 0669 */ 0670 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 0671 return 0==msg.compare(part.index, part.length, s); 0672 } 0673 0674 /** 0675 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 0676 * @param part a part of this MessagePattern. 0677 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 0678 * @stable ICU 4.8 0679 */ 0680 double getNumericValue(const Part &part) const; 0681 0682 /** 0683 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 0684 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 0685 * @return the "offset:" value. 0686 * @stable ICU 4.8 0687 */ 0688 double getPluralOffset(int32_t pluralStart) const; 0689 0690 /** 0691 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 0692 * @param start The index of some Part data (0..countParts()-1); 0693 * this Part should be of Type ARG_START or MSG_START. 0694 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 0695 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 0696 * @stable ICU 4.8 0697 */ 0698 int32_t getLimitPartIndex(int32_t start) const { 0699 int32_t limit=getPart(start).limitPartIndex; 0700 if(limit<start) { 0701 return start; 0702 } 0703 return limit; 0704 } 0705 0706 /** 0707 * A message pattern "part", representing a pattern parsing event. 0708 * There is a part for the start and end of a message or argument, 0709 * for quoting and escaping of and with ASCII apostrophes, 0710 * and for syntax elements of "complex" arguments. 0711 * @stable ICU 4.8 0712 */ 0713 class Part : public UMemory { 0714 public: 0715 /** 0716 * Default constructor, do not use. 0717 * @internal 0718 */ 0719 Part() {} 0720 0721 /** 0722 * Returns the type of this part. 0723 * @return the part type. 0724 * @stable ICU 4.8 0725 */ 0726 UMessagePatternPartType getType() const { 0727 return type; 0728 } 0729 0730 /** 0731 * Returns the pattern string index associated with this Part. 0732 * @return this part's pattern string index. 0733 * @stable ICU 4.8 0734 */ 0735 int32_t getIndex() const { 0736 return index; 0737 } 0738 0739 /** 0740 * Returns the length of the pattern substring associated with this Part. 0741 * This is 0 for some parts. 0742 * @return this part's pattern substring length. 0743 * @stable ICU 4.8 0744 */ 0745 int32_t getLength() const { 0746 return length; 0747 } 0748 0749 /** 0750 * Returns the pattern string limit (exclusive-end) index associated with this Part. 0751 * Convenience method for getIndex()+getLength(). 0752 * @return this part's pattern string limit index, same as getIndex()+getLength(). 0753 * @stable ICU 4.8 0754 */ 0755 int32_t getLimit() const { 0756 return index+length; 0757 } 0758 0759 /** 0760 * Returns a value associated with this part. 0761 * See the documentation of each part type for details. 0762 * @return the part value. 0763 * @stable ICU 4.8 0764 */ 0765 int32_t getValue() const { 0766 return value; 0767 } 0768 0769 /** 0770 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 0771 * otherwise UMSGPAT_ARG_TYPE_NONE. 0772 * @return the argument type for this part. 0773 * @stable ICU 4.8 0774 */ 0775 UMessagePatternArgType getArgType() const { 0776 UMessagePatternPartType msgType=getType(); 0777 if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) { 0778 return (UMessagePatternArgType)value; 0779 } else { 0780 return UMSGPAT_ARG_TYPE_NONE; 0781 } 0782 } 0783 0784 /** 0785 * Indicates whether the Part type has a numeric value. 0786 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 0787 * @param type The Part type to be tested. 0788 * @return true if the Part type has a numeric value. 0789 * @stable ICU 4.8 0790 */ 0791 static UBool hasNumericValue(UMessagePatternPartType type) { 0792 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 0793 } 0794 0795 /** 0796 * @param other another object to compare with. 0797 * @return true if this object is equivalent to the other one. 0798 * @stable ICU 4.8 0799 */ 0800 bool operator==(const Part &other) const; 0801 0802 /** 0803 * @param other another object to compare with. 0804 * @return false if this object is equivalent to the other one. 0805 * @stable ICU 4.8 0806 */ 0807 inline bool operator!=(const Part &other) const { 0808 return !operator==(other); 0809 } 0810 0811 /** 0812 * @return A hash code for this object. 0813 * @stable ICU 4.8 0814 */ 0815 int32_t hashCode() const { 0816 return ((type*37+index)*37+length)*37+value; 0817 } 0818 0819 private: 0820 friend class MessagePattern; 0821 0822 static const int32_t MAX_LENGTH=0xffff; 0823 static const int32_t MAX_VALUE=0x7fff; 0824 0825 // Some fields are not final because they are modified during pattern parsing. 0826 // After pattern parsing, the parts are effectively immutable. 0827 UMessagePatternPartType type; 0828 int32_t index; 0829 uint16_t length; 0830 int16_t value; 0831 int32_t limitPartIndex; 0832 }; 0833 0834 private: 0835 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 0836 0837 void postParse(); 0838 0839 int32_t parseMessage(int32_t index, int32_t msgStartLength, 0840 int32_t nestingLevel, UMessagePatternArgType parentType, 0841 UParseError *parseError, UErrorCode &errorCode); 0842 0843 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 0844 UParseError *parseError, UErrorCode &errorCode); 0845 0846 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 0847 0848 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 0849 UParseError *parseError, UErrorCode &errorCode); 0850 0851 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 0852 UParseError *parseError, UErrorCode &errorCode); 0853 0854 /** 0855 * Validates and parses an argument name or argument number string. 0856 * This internal method assumes that the input substring is a "pattern identifier". 0857 * @return >=0 if the name is a valid number, 0858 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 0859 * ARG_NAME_NOT_VALID (-2) if it is neither. 0860 * @see #validateArgumentName(String) 0861 */ 0862 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 0863 0864 int32_t parseArgNumber(int32_t start, int32_t limit) { 0865 return parseArgNumber(msg, start, limit); 0866 } 0867 0868 /** 0869 * Parses a number from the specified message substring. 0870 * @param start start index into the message string 0871 * @param limit limit index into the message string, must be start<limit 0872 * @param allowInfinity true if U+221E is allowed (for ChoiceFormat) 0873 * @param parseError 0874 * @param errorCode 0875 */ 0876 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 0877 UParseError *parseError, UErrorCode &errorCode); 0878 0879 // Java has package-private appendReducedApostrophes() here. 0880 // In C++, this is declared in the MessageImpl class. 0881 0882 int32_t skipWhiteSpace(int32_t index); 0883 0884 int32_t skipIdentifier(int32_t index); 0885 0886 /** 0887 * Skips a sequence of characters that could occur in a double value. 0888 * Does not fully parse or validate the value. 0889 */ 0890 int32_t skipDouble(int32_t index); 0891 0892 static UBool isArgTypeChar(UChar32 c); 0893 0894 UBool isChoice(int32_t index); 0895 0896 UBool isPlural(int32_t index); 0897 0898 UBool isSelect(int32_t index); 0899 0900 UBool isOrdinal(int32_t index); 0901 0902 /** 0903 * @return true if we are inside a MessageFormat (sub-)pattern, 0904 * as opposed to inside a top-level choice/plural/select pattern. 0905 */ 0906 UBool inMessageFormatPattern(int32_t nestingLevel); 0907 0908 /** 0909 * @return true if we are in a MessageFormat sub-pattern 0910 * of a top-level ChoiceFormat pattern. 0911 */ 0912 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 0913 0914 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 0915 int32_t value, UErrorCode &errorCode); 0916 0917 void addLimitPart(int32_t start, 0918 UMessagePatternPartType type, int32_t index, int32_t length, 0919 int32_t value, UErrorCode &errorCode); 0920 0921 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 0922 0923 void setParseError(UParseError *parseError, int32_t index); 0924 0925 UBool init(UErrorCode &errorCode); 0926 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 0927 0928 UMessagePatternApostropheMode aposMode; 0929 UnicodeString msg; 0930 // ArrayList<Part> parts=new ArrayList<Part>(); 0931 MessagePatternPartsList *partsList; 0932 Part *parts; 0933 int32_t partsLength; 0934 // ArrayList<Double> numericValues; 0935 MessagePatternDoubleList *numericValuesList; 0936 double *numericValues; 0937 int32_t numericValuesLength; 0938 UBool hasArgNames; 0939 UBool hasArgNumbers; 0940 UBool needsAutoQuoting; 0941 }; 0942 0943 U_NAMESPACE_END 0944 0945 #endif // !UCONFIG_NO_FORMATTING 0946 0947 #endif /* U_SHOW_CPLUSPLUS_API */ 0948 0949 #endif // __MESSAGEPATTERN_H__
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |