|
||||
Warning, file /include/unicode/usetiter.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ********************************************************************** 0005 * Copyright (c) 2002-2014, International Business Machines 0006 * Corporation and others. All Rights Reserved. 0007 ********************************************************************** 0008 */ 0009 #ifndef USETITER_H 0010 #define USETITER_H 0011 0012 #include "unicode/utypes.h" 0013 0014 #if U_SHOW_CPLUSPLUS_API 0015 0016 #include "unicode/uobject.h" 0017 #include "unicode/unistr.h" 0018 0019 /** 0020 * \file 0021 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. 0022 */ 0023 0024 U_NAMESPACE_BEGIN 0025 0026 class UnicodeSet; 0027 class UnicodeString; 0028 0029 /** 0030 * 0031 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 0032 * iterates over either code points or code point ranges. After all 0033 * code points or ranges have been returned, it returns the 0034 * multicharacter strings of the UnicodeSet, if any. 0035 * 0036 * This class is not intended for public subclassing. 0037 * 0038 * <p>To iterate over code points and strings, use a loop like this: 0039 * <pre> 0040 * UnicodeSetIterator it(set); 0041 * while (it.next()) { 0042 * processItem(it.getString()); 0043 * } 0044 * </pre> 0045 * <p>Each item in the set is accessed as a string. Set elements 0046 * consisting of single code points are returned as strings containing 0047 * just the one code point. 0048 * 0049 * <p>To iterate over code point ranges, instead of individual code points, 0050 * use a loop like this: 0051 * <pre> 0052 * UnicodeSetIterator it(set); 0053 * while (it.nextRange()) { 0054 * if (it.isString()) { 0055 * processString(it.getString()); 0056 * } else { 0057 * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); 0058 * } 0059 * } 0060 * </pre> 0061 * 0062 * To iterate over only the strings, start with <code>skipToStrings()</code>. 0063 * 0064 * @author M. Davis 0065 * @stable ICU 2.4 0066 */ 0067 class U_COMMON_API UnicodeSetIterator final : public UObject { 0068 /** 0069 * Value of <tt>codepoint</tt> if the iterator points to a string. 0070 * If <tt>codepoint == IS_STRING</tt>, then examine 0071 * <tt>string</tt> for the current iteration result. 0072 */ 0073 enum { IS_STRING = -1 }; 0074 0075 /** 0076 * Current code point, or the special value <tt>IS_STRING</tt>, if 0077 * the iterator points to a string. 0078 */ 0079 UChar32 codepoint; 0080 0081 /** 0082 * When iterating over ranges using <tt>nextRange()</tt>, 0083 * <tt>codepointEnd</tt> contains the inclusive end of the 0084 * iteration range, if <tt>codepoint != IS_STRING</tt>. If 0085 * iterating over code points using <tt>next()</tt>, or if 0086 * <tt>codepoint == IS_STRING</tt>, then the value of 0087 * <tt>codepointEnd</tt> is undefined. 0088 */ 0089 UChar32 codepointEnd; 0090 0091 /** 0092 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 0093 * to the current string. If <tt>codepoint != IS_STRING</tt>, the 0094 * value of <tt>string</tt> is undefined. 0095 */ 0096 const UnicodeString* string; 0097 0098 public: 0099 0100 /** 0101 * Create an iterator over the given set. The iterator is valid 0102 * only so long as <tt>set</tt> is valid. 0103 * @param set set to iterate over 0104 * @stable ICU 2.4 0105 */ 0106 UnicodeSetIterator(const UnicodeSet& set); 0107 0108 /** 0109 * Create an iterator over nothing. <tt>next()</tt> and 0110 * <tt>nextRange()</tt> return false. This is a convenience 0111 * constructor allowing the target to be set later. 0112 * @stable ICU 2.4 0113 */ 0114 UnicodeSetIterator(); 0115 0116 /** 0117 * Destructor. 0118 * @stable ICU 2.4 0119 */ 0120 virtual ~UnicodeSetIterator(); 0121 0122 /** 0123 * Returns true if the current element is a string. If so, the 0124 * caller can retrieve it with <tt>getString()</tt>. If this 0125 * method returns false, the current element is a code point or 0126 * code point range, depending on whether <tt>next()</tt> or 0127 * <tt>nextRange()</tt> was called. 0128 * Elements of types string and codepoint can both be retrieved 0129 * with the function <tt>getString()</tt>. 0130 * Elements of type codepoint can also be retrieved with 0131 * <tt>getCodepoint()</tt>. 0132 * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint 0133 * of the range, and <tt>getCodepointEnd()</tt> returns the end 0134 * of the range. 0135 * @stable ICU 2.4 0136 */ 0137 inline UBool isString() const; 0138 0139 /** 0140 * Returns the current code point, if <tt>isString()</tt> returned 0141 * false. Otherwise returns an undefined result. 0142 * @stable ICU 2.4 0143 */ 0144 inline UChar32 getCodepoint() const; 0145 0146 /** 0147 * Returns the end of the current code point range, if 0148 * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was 0149 * called. Otherwise returns an undefined result. 0150 * @stable ICU 2.4 0151 */ 0152 inline UChar32 getCodepointEnd() const; 0153 0154 /** 0155 * Returns the current string, if <tt>isString()</tt> returned 0156 * true. If the current iteration item is a code point, a UnicodeString 0157 * containing that single code point is returned. 0158 * 0159 * Ownership of the returned string remains with the iterator. 0160 * The string is guaranteed to remain valid only until the iterator is 0161 * advanced to the next item, or until the iterator is deleted. 0162 * 0163 * @stable ICU 2.4 0164 */ 0165 const UnicodeString& getString(); 0166 0167 /** 0168 * Skips over the remaining code points/ranges, if any. 0169 * A following call to next() or nextRange() will yield a string, if there is one. 0170 * No-op if next() would return false, or if it would yield a string anyway. 0171 * 0172 * @return *this 0173 * @stable ICU 70 0174 * @see UnicodeSet#strings() 0175 */ 0176 inline UnicodeSetIterator &skipToStrings() { 0177 // Finish code point/range iteration. 0178 range = endRange; 0179 endElement = -1; 0180 nextElement = 0; 0181 return *this; 0182 } 0183 0184 /** 0185 * Advances the iteration position to the next element in the set, 0186 * which can be either a single code point or a string. 0187 * If there are no more elements in the set, return false. 0188 * 0189 * <p> 0190 * If <tt>isString() == true</tt>, the value is a 0191 * string, otherwise the value is a 0192 * single code point. Elements of either type can be retrieved 0193 * with the function <tt>getString()</tt>, while elements of 0194 * consisting of a single code point can be retrieved with 0195 * <tt>getCodepoint()</tt> 0196 * 0197 * <p>The order of iteration is all code points in sorted order, 0198 * followed by all strings sorted order. Do not mix 0199 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without 0200 * calling <tt>reset()</tt> between them. The results of doing so 0201 * are undefined. 0202 * 0203 * @return true if there was another element in the set. 0204 * @stable ICU 2.4 0205 */ 0206 UBool next(); 0207 0208 /** 0209 * Returns the next element in the set, either a code point range 0210 * or a string. If there are no more elements in the set, return 0211 * false. If <tt>isString() == true</tt>, the value is a 0212 * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a 0213 * range of one or more code points from <tt>getCodepoint()</tt> to 0214 * <tt>getCodepointeEnd()</tt> inclusive. 0215 * 0216 * <p>The order of iteration is all code points ranges in sorted 0217 * order, followed by all strings sorted order. Ranges are 0218 * disjoint and non-contiguous. The value returned from <tt>getString()</tt> 0219 * is undefined unless <tt>isString() == true</tt>. Do not mix calls to 0220 * <tt>next()</tt> and <tt>nextRange()</tt> without calling 0221 * <tt>reset()</tt> between them. The results of doing so are 0222 * undefined. 0223 * 0224 * @return true if there was another element in the set. 0225 * @stable ICU 2.4 0226 */ 0227 UBool nextRange(); 0228 0229 /** 0230 * Sets this iterator to visit the elements of the given set and 0231 * resets it to the start of that set. The iterator is valid only 0232 * so long as <tt>set</tt> is valid. 0233 * @param set the set to iterate over. 0234 * @stable ICU 2.4 0235 */ 0236 void reset(const UnicodeSet& set); 0237 0238 /** 0239 * Resets this iterator to the start of the set. 0240 * @stable ICU 2.4 0241 */ 0242 void reset(); 0243 0244 /** 0245 * ICU "poor man's RTTI", returns a UClassID for this class. 0246 * 0247 * @stable ICU 2.4 0248 */ 0249 static UClassID U_EXPORT2 getStaticClassID(); 0250 0251 /** 0252 * ICU "poor man's RTTI", returns a UClassID for the actual class. 0253 * 0254 * @stable ICU 2.4 0255 */ 0256 virtual UClassID getDynamicClassID() const override; 0257 0258 // ======================= PRIVATES =========================== 0259 0260 private: 0261 0262 // endElement and nextElements are really UChar32's, but we keep 0263 // them as signed int32_t's so we can do comparisons with 0264 // endElement set to -1. Leave them as int32_t's. 0265 /** The set 0266 */ 0267 const UnicodeSet* set; 0268 /** End range 0269 */ 0270 int32_t endRange; 0271 /** Range 0272 */ 0273 int32_t range; 0274 /** End element 0275 */ 0276 int32_t endElement; 0277 /** Next element 0278 */ 0279 int32_t nextElement; 0280 /** Next string 0281 */ 0282 int32_t nextString; 0283 /** String count 0284 */ 0285 int32_t stringCount; 0286 0287 /** 0288 * Points to the string to use when the caller asks for a 0289 * string and the current iteration item is a code point, not a string. 0290 */ 0291 UnicodeString *cpString; 0292 0293 /** Copy constructor. Disallowed. 0294 */ 0295 UnicodeSetIterator(const UnicodeSetIterator&) = delete; 0296 0297 /** Assignment operator. Disallowed. 0298 */ 0299 UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete; 0300 0301 /** Load range 0302 */ 0303 void loadRange(int32_t range); 0304 }; 0305 0306 inline UBool UnicodeSetIterator::isString() const { 0307 return codepoint < 0; 0308 } 0309 0310 inline UChar32 UnicodeSetIterator::getCodepoint() const { 0311 return codepoint; 0312 } 0313 0314 inline UChar32 UnicodeSetIterator::getCodepointEnd() const { 0315 return codepointEnd; 0316 } 0317 0318 0319 U_NAMESPACE_END 0320 0321 #endif /* U_SHOW_CPLUSPLUS_API */ 0322 0323 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |