Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/unicode/symtable.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 **********************************************************************
0005 *   Copyright (c) 2000-2005, International Business Machines
0006 *   Corporation and others.  All Rights Reserved.
0007 **********************************************************************
0008 *   Date        Name        Description
0009 *   02/04/00    aliu        Creation.
0010 **********************************************************************
0011 */
0012 #ifndef SYMTABLE_H
0013 #define SYMTABLE_H
0014 
0015 #include "unicode/utypes.h"
0016 
0017 #if U_SHOW_CPLUSPLUS_API
0018 
0019 #include "unicode/uobject.h"
0020 
0021 /**
0022  * \file 
0023  * \brief C++ API: An interface that defines both lookup protocol and parsing of
0024  * symbolic names.
0025  */
0026  
0027 U_NAMESPACE_BEGIN
0028 
0029 class ParsePosition;
0030 class UnicodeFunctor;
0031 class UnicodeSet;
0032 class UnicodeString;
0033 
0034 /**
0035  * An interface that defines both lookup protocol and parsing of
0036  * symbolic names.
0037  *
0038  * <p>A symbol table maintains two kinds of mappings.  The first is
0039  * between symbolic names and their values.  For example, if the
0040  * variable with the name "start" is set to the value "alpha"
0041  * (perhaps, though not necessarily, through an expression such as
0042  * "$start=alpha"), then the call lookup("start") will return the
0043  * char[] array ['a', 'l', 'p', 'h', 'a'].
0044  *
0045  * <p>The second kind of mapping is between character values and
0046  * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
0047  * which uses characters in the private use area to represent objects
0048  * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
0049  * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
0050  *
0051  * <p>Finally, a symbol table defines parsing behavior for symbolic
0052  * names.  All symbolic names start with the SYMBOL_REF character.
0053  * When a parser encounters this character, it calls parseReference()
0054  * with the position immediately following the SYMBOL_REF.  The symbol
0055  * table parses the name, if there is one, and returns it.
0056  *
0057  * @stable ICU 2.8
0058  */
0059 class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
0060 public:
0061 
0062     /**
0063      * The character preceding a symbol reference name.
0064      * @stable ICU 2.8
0065      */
0066     enum { SYMBOL_REF = 0x0024 /*$*/ };
0067 
0068     /**
0069      * Destructor.
0070      * @stable ICU 2.8
0071      */
0072     virtual ~SymbolTable();
0073 
0074     /**
0075      * Lookup the characters associated with this string and return it.
0076      * Return <tt>nullptr</tt> if no such name exists.  The resultant
0077      * string may have length zero.
0078      * @param s the symbolic name to lookup
0079      * @return a string containing the name's value, or <tt>nullptr</tt> if
0080      * there is no mapping for s.
0081      * @stable ICU 2.8
0082      */
0083     virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
0084 
0085     /**
0086      * Lookup the UnicodeMatcher associated with the given character, and
0087      * return it.  Return <tt>nullptr</tt> if not found.
0088      * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
0089      * @return the UnicodeMatcher object represented by the given
0090      * character, or nullptr if there is no mapping for ch.
0091      * @stable ICU 2.8
0092      */
0093     virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
0094 
0095     /**
0096      * Parse a symbol reference name from the given string, starting
0097      * at the given position.  If no valid symbol reference name is
0098      * found, return the empty string and leave pos unchanged.  That is, if the
0099      * character at pos cannot start a name, or if pos is at or after
0100      * text.length(), then return an empty string.  This indicates an
0101      * isolated SYMBOL_REF character.
0102      * @param text the text to parse for the name
0103      * @param pos on entry, the index of the first character to parse.
0104      * This is the character following the SYMBOL_REF character.  On
0105      * exit, the index after the last parsed character.  If the parse
0106      * failed, pos is unchanged on exit.
0107      * @param limit the index after the last character to be parsed.
0108      * @return the parsed name, or an empty string if there is no
0109      * valid symbolic name at the given position.
0110      * @stable ICU 2.8
0111      */
0112     virtual UnicodeString parseReference(const UnicodeString& text,
0113                                          ParsePosition& pos, int32_t limit) const = 0;
0114 };
0115 U_NAMESPACE_END
0116 
0117 #endif /* U_SHOW_CPLUSPLUS_API */
0118 
0119 #endif