Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/unicode/selfmt.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /********************************************************************
0004  * COPYRIGHT:
0005  * Copyright (c) 1997-2011, International Business Machines Corporation and
0006  * others. All Rights Reserved.
0007  * Copyright (C) 2010 , Yahoo! Inc.
0008  ********************************************************************
0009  *
0010  * File SELFMT.H
0011  *
0012  * Modification History:
0013  *
0014  *   Date        Name        Description
0015  *   11/11/09    kirtig      Finished first cut of implementation.
0016  ********************************************************************/
0017 
0018 #ifndef SELFMT
0019 #define SELFMT
0020 
0021 #include "unicode/utypes.h"
0022 
0023 #if U_SHOW_CPLUSPLUS_API
0024 
0025 #include "unicode/messagepattern.h"
0026 #include "unicode/numfmt.h"
0027 
0028 /**
0029  * \file
0030  * \brief C++ API: SelectFormat object
0031  */
0032 
0033 #if !UCONFIG_NO_FORMATTING
0034 
0035 U_NAMESPACE_BEGIN
0036 
0037 class MessageFormat;
0038 
0039 /**
0040   * <p><code>SelectFormat</code> supports the creation of  internationalized
0041   * messages by selecting phrases based on keywords. The pattern  specifies
0042   * how to map keywords to phrases and provides a default phrase. The
0043   * object provided to the format method is a string that's matched
0044   * against the keywords. If there is a match, the corresponding phrase
0045   * is selected; otherwise, the default phrase is used.</p>
0046   *
0047   * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
0048   *
0049   * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
0050   * with a <code>select</code> argument type,
0051   * rather than using a stand-alone <code>SelectFormat</code>.</p>
0052   *
0053   * <p>The main use case for the select format is gender based  inflection.
0054   * When names or nouns are inserted into sentences, their gender can  affect pronouns,
0055   * verb forms, articles, and adjectives. Special care needs to be
0056   * taken for the case where the gender cannot be determined.
0057   * The impact varies between languages:</p>
0058   * \htmlonly
0059   * <ul>
0060   * <li>English has three genders, and unknown gender is handled as a  special
0061   * case. Names use the gender of the named person (if known), nouns  referring
0062   * to people use natural gender, and inanimate objects are usually  neutral.
0063   * The gender only affects pronouns: "he", "she", "it", "they".
0064   *
0065   * <li>German differs from English in that the gender of nouns is  rather
0066   * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
0067   * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
0068   * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
0069   *
0070   * <li>French has only two genders; as in German the gender of nouns
0071   * is rather arbitrary - for sun and moon, the genders
0072   * are the opposite of those in German. The gender affects
0073   * pronouns ("il", "elle"), articles ("le", "la"),
0074   * adjective forms ("bon", "bonne"), and sometimes
0075   * verb forms ("all&#x00E9;", "all&#x00E9;e").
0076   *
0077   * <li>Polish distinguishes five genders (or noun classes),
0078   * human masculine, animate non-human masculine, inanimate masculine,
0079   * feminine, and neuter.
0080   * </ul>
0081   * \endhtmlonly
0082   * <p>Some other languages have noun classes that are not related to  gender,
0083   * but similar in grammatical use.
0084   * Some African languages have around 20 noun classes.</p>
0085   *
0086   * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
0087   * we usually need to distinguish only between female, male and other/unknown.</p>
0088   *
0089   * <p>To enable localizers to create sentence patterns that take their
0090   * language's gender dependencies into consideration, software has to  provide
0091   * information about the gender associated with a noun or name to
0092   * <code>MessageFormat</code>.
0093   * Two main cases can be distinguished:</p>
0094   *
0095   * <ul>
0096   * <li>For people, natural gender information should be maintained  for each person.
0097   * Keywords like "male", "female", "mixed" (for groups of people)
0098   * and "unknown" could be used.
0099   *
0100   * <li>For nouns, grammatical gender information should be maintained  for
0101   * each noun and per language, e.g., in resource bundles.
0102   * The keywords "masculine", "feminine", and "neuter" are commonly  used,
0103   * but some languages may require other keywords.
0104   * </ul>
0105   *
0106   * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
0107   * parameter separate from the name or noun it's associated with. For  example,
0108   * to generate a message such as "Jean went to Paris", three separate  arguments
0109   * would be provided: The name of the person as argument 0, the  gender of
0110   * the person as argument 1, and the name of the city as argument 2.
0111   * The sentence pattern for English, where the gender of the person has
0112   * no impact on this simple sentence, would not refer to argument 1  at all:</p>
0113   *
0114   * <pre>{0} went to {2}.</pre>
0115   *
0116   * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
0117   * inside each phrase. Otherwise translators would have to be trained on how to
0118   * move bits of the sentence in and out of the select argument of a message.
0119   * (The examples below do not follow this recommendation!)</p>
0120   *
0121   * <p>The sentence pattern for French, where the gender of the person affects
0122   * the form of the participle, uses a select format based on argument 1:</p>
0123   *
0124   * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
0125   *
0126   * <p>Patterns can be nested, so that it's possible to handle  interactions of
0127   * number and gender where necessary. For example, if the above  sentence should
0128   * allow for the names of several people to be inserted, the  following sentence
0129   * pattern can be used (with argument 0 the list of people's names,
0130   * argument 1 the number of people, argument 2 their combined gender, and
0131   * argument 3 the city name):</p>
0132   *
0133   * \htmlonly
0134   * <pre>{0} {1, plural,
0135   *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
0136   *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
0137   *          }&#x00E0; {3}.</pre>
0138   * \endhtmlonly
0139   *
0140   * <h4>Patterns and Their Interpretation</h4>
0141   *
0142   * <p>The <code>SelectFormat</code> pattern string defines the phrase output
0143   * for each user-defined keyword.
0144   * The pattern is a sequence of (keyword, message) pairs.
0145   * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
0146   *
0147   * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
0148   *
0149   * <p>You always have to define a phrase for the default keyword
0150   * <code>other</code>; this phrase is returned when the keyword
0151   * provided to
0152   * the <code>format</code> method matches no other keyword.
0153   * If a pattern does not provide a phrase for <code>other</code>, the  method
0154   * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
0155   * <br>
0156   * Pattern_White_Space between keywords and messages is ignored.
0157   * Pattern_White_Space within a message is preserved and output.</p>
0158   *
0159   * <p><pre>Example:
0160   * \htmlonly
0161   *
0162   * UErrorCode status = U_ZERO_ERROR;
0163   * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
0164   * if (U_FAILURE(status)) {
0165   *       return;
0166   * }
0167   * FieldPosition ignore(FieldPosition::DONT_CARE);
0168   * UnicodeString result;
0169   *
0170   * char* str1= "Kirti,female";
0171   * Formattable args1[] = {"Kirti","female"};
0172   * msgFmt->format(args1, 2, result, ignore, status);
0173   * cout << "Input is " << str1 << " and result is: " << result << endl;
0174   * delete msgFmt;
0175   *
0176   * \endhtmlonly
0177   * </pre>
0178   * </p>
0179   *
0180   * Produces the output:<br>
0181   * \htmlonly
0182   * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
0183   * \endhtmlonly
0184   *
0185   * @stable ICU 4.4
0186   */
0187 
0188 class U_I18N_API SelectFormat : public Format {
0189 public:
0190 
0191     /**
0192      * Creates a new <code>SelectFormat</code> for a given pattern string.
0193      * @param  pattern the pattern for this <code>SelectFormat</code>.
0194      *                 errors are returned to status if the pattern is invalid.
0195      * @param status   output param set to success/failure code on exit, which
0196      *                 must not indicate a failure before the function call.
0197      * @stable ICU 4.4
0198      */
0199     SelectFormat(const UnicodeString& pattern, UErrorCode& status);
0200 
0201     /**
0202      * copy constructor.
0203      * @stable ICU 4.4
0204      */
0205     SelectFormat(const SelectFormat& other);
0206 
0207     /**
0208      * Destructor.
0209      * @stable ICU 4.4
0210      */
0211     virtual ~SelectFormat();
0212 
0213     /**
0214      * Sets the pattern used by this select format.
0215      * for the keyword rules.
0216      * Patterns and their interpretation are specified in the class description.
0217      *
0218      * @param pattern the pattern for this select format
0219      *                errors are returned to status if the pattern is invalid.
0220      * @param status  output param set to success/failure code on exit, which
0221      *                must not indicate a failure before the function call.
0222      * @stable ICU 4.4
0223      */
0224     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
0225 
0226 
0227     using Format::format;
0228 
0229     /**
0230      * Selects the phrase for  the given keyword
0231      *
0232      * @param keyword  The keyword that is used to select an alternative.
0233      * @param appendTo output parameter to receive result.
0234      *                 result is appended to existing contents.
0235      * @param pos      On input: an alignment field, if desired.
0236      *                 On output: the offsets of the alignment field.
0237      * @param status  output param set to success/failure code on exit, which
0238      *                 must not indicate a failure before the function call.
0239      * @return         Reference to 'appendTo' parameter.
0240      * @stable ICU 4.4
0241      */
0242     UnicodeString& format(const UnicodeString& keyword,
0243                             UnicodeString& appendTo,
0244                             FieldPosition& pos,
0245                             UErrorCode& status) const;
0246 
0247     /**
0248      * Assignment operator
0249      *
0250      * @param other    the SelectFormat object to copy from.
0251      * @stable ICU 4.4
0252      */
0253     SelectFormat& operator=(const SelectFormat& other);
0254 
0255     /**
0256      * Return true if another object is semantically equal to this one.
0257      *
0258      * @param other    the SelectFormat object to be compared with.
0259      * @return         true if other is semantically equal to this.
0260      * @stable ICU 4.4
0261      */
0262     virtual bool operator==(const Format& other) const override;
0263 
0264     /**
0265      * Return true if another object is semantically unequal to this one.
0266      *
0267      * @param other    the SelectFormat object to be compared with.
0268      * @return         true if other is semantically unequal to this.
0269      * @stable ICU 4.4
0270      */
0271     virtual bool operator!=(const Format& other) const;
0272 
0273     /**
0274      * Clones this Format object polymorphically.  The caller owns the
0275      * result and should delete it when done.
0276      * @stable ICU 4.4
0277      */
0278     virtual SelectFormat* clone() const override;
0279 
0280     /**
0281      * Format an object to produce a string.
0282      * This method handles keyword strings.
0283      * If the Formattable object is not a <code>UnicodeString</code>,
0284      * then it returns a failing UErrorCode.
0285      *
0286      * @param obj       A keyword string that is used to select an alternative.
0287      * @param appendTo  output parameter to receive result.
0288      *                  Result is appended to existing contents.
0289      * @param pos       On input: an alignment field, if desired.
0290      *                  On output: the offsets of the alignment field.
0291      * @param status    output param filled with success/failure status.
0292      * @return          Reference to 'appendTo' parameter.
0293      * @stable ICU 4.4
0294      */
0295     UnicodeString& format(const Formattable& obj,
0296                          UnicodeString& appendTo,
0297                          FieldPosition& pos,
0298                          UErrorCode& status) const override;
0299 
0300     /**
0301      * Returns the pattern from applyPattern() or constructor.
0302      *
0303      * @param  appendTo  output parameter to receive result.
0304      *                  Result is appended to existing contents.
0305      * @return the UnicodeString with inserted pattern.
0306      * @stable ICU 4.4
0307      */
0308     UnicodeString& toPattern(UnicodeString& appendTo);
0309 
0310     /**
0311      * This method is not yet supported by <code>SelectFormat</code>.
0312      * <P>
0313      * Before calling, set parse_pos.index to the offset you want to start
0314      * parsing at in the source. After calling, parse_pos.index is the end of
0315      * the text you parsed. If error occurs, index is unchanged.
0316      * <P>
0317      * When parsing, leading whitespace is discarded (with a successful parse),
0318      * while trailing whitespace is left as is.
0319      * <P>
0320      * See Format::parseObject() for more.
0321      *
0322      * @param source     The string to be parsed into an object.
0323      * @param result     Formattable to be set to the parse result.
0324      *     If parse fails, return contents are undefined.
0325      * @param parse_pos The position to start parsing at. Upon return
0326      *     this param is set to the position after the
0327      *     last character successfully parsed. If the
0328      *     source is not parsed successfully, this param
0329      *     will remain unchanged.
0330      * @stable ICU 4.4
0331      */
0332     virtual void parseObject(const UnicodeString& source,
0333                             Formattable& result,
0334                             ParsePosition& parse_pos) const override;
0335 
0336     /**
0337      * ICU "poor man's RTTI", returns a UClassID for this class.
0338      * @stable ICU 4.4
0339      */
0340     static UClassID U_EXPORT2 getStaticClassID(void);
0341 
0342     /**
0343      * ICU "poor man's RTTI", returns a UClassID for the actual class.
0344      * @stable ICU 4.4
0345      */
0346     virtual UClassID getDynamicClassID() const override;
0347 
0348 private:
0349     friend class MessageFormat;
0350 
0351     SelectFormat() = delete;   // default constructor not implemented.
0352 
0353     /**
0354      * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
0355      * @param pattern A MessagePattern.
0356      * @param partIndex the index of the first SelectFormat argument style part.
0357      * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
0358      * @param ec Error code.
0359      * @return the sub-message start part index.
0360      */
0361     static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
0362                                   const UnicodeString& keyword, UErrorCode& ec);
0363 
0364     MessagePattern msgPattern;
0365 };
0366 
0367 U_NAMESPACE_END
0368 
0369 #endif /* #if !UCONFIG_NO_FORMATTING */
0370 
0371 #endif /* U_SHOW_CPLUSPLUS_API */
0372 
0373 #endif // _SELFMT
0374 //eof