Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/unicode/usetiter.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 **********************************************************************
0005 * Copyright (c) 2002-2014, International Business Machines
0006 * Corporation and others.  All Rights Reserved.
0007 **********************************************************************
0008 */
0009 #ifndef USETITER_H
0010 #define USETITER_H
0011 
0012 #include "unicode/utypes.h"
0013 
0014 #if U_SHOW_CPLUSPLUS_API
0015 
0016 #include "unicode/uobject.h"
0017 #include "unicode/unistr.h"
0018 
0019 /**
0020  * \file 
0021  * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
0022  */
0023 
0024 U_NAMESPACE_BEGIN
0025 
0026 class UnicodeSet;
0027 class UnicodeString;
0028 
0029 /**
0030  *
0031  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
0032  * iterates over either code points or code point ranges.  After all
0033  * code points or ranges have been returned, it returns the
0034  * multicharacter strings of the UnicodeSet, if any.
0035  *
0036  * This class is not intended for public subclassing.
0037  *
0038  * <p>To iterate over code points and strings, use a loop like this:
0039  * <pre>
0040  * UnicodeSetIterator it(set);
0041  * while (it.next()) {
0042  *     processItem(it.getString());
0043  * }
0044  * </pre>
0045  * <p>Each item in the set is accessed as a string.  Set elements
0046  *    consisting of single code points are returned as strings containing
0047  *    just the one code point.
0048  *
0049  * <p>To iterate over code point ranges, instead of individual code points,
0050  *    use a loop like this:
0051  * <pre>
0052  * UnicodeSetIterator it(set);
0053  * while (it.nextRange()) {
0054  *   if (it.isString()) {
0055  *     processString(it.getString());
0056  *   } else {
0057  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
0058  *   }
0059  * }
0060  * </pre>
0061  *
0062  * To iterate over only the strings, start with <code>skipToStrings()</code>.
0063  *
0064  * @author M. Davis
0065  * @stable ICU 2.4
0066  */
0067 class U_COMMON_API UnicodeSetIterator final : public UObject {
0068     /**
0069      * Value of <tt>codepoint</tt> if the iterator points to a string.
0070      * If <tt>codepoint == IS_STRING</tt>, then examine
0071      * <tt>string</tt> for the current iteration result.
0072      */
0073     enum { IS_STRING = -1 };
0074 
0075     /**
0076      * Current code point, or the special value <tt>IS_STRING</tt>, if
0077      * the iterator points to a string.
0078      */
0079     UChar32 codepoint;
0080 
0081     /**
0082      * When iterating over ranges using <tt>nextRange()</tt>,
0083      * <tt>codepointEnd</tt> contains the inclusive end of the
0084      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
0085      * iterating over code points using <tt>next()</tt>, or if
0086      * <tt>codepoint == IS_STRING</tt>, then the value of
0087      * <tt>codepointEnd</tt> is undefined.
0088      */
0089     UChar32 codepointEnd;
0090 
0091     /**
0092      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
0093      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
0094      * value of <tt>string</tt> is undefined.
0095      */
0096     const UnicodeString* string;
0097 
0098  public:
0099 
0100     /**
0101      * Create an iterator over the given set.  The iterator is valid
0102      * only so long as <tt>set</tt> is valid.
0103      * @param set set to iterate over
0104      * @stable ICU 2.4
0105      */
0106     UnicodeSetIterator(const UnicodeSet& set);
0107 
0108     /**
0109      * Create an iterator over nothing.  <tt>next()</tt> and
0110      * <tt>nextRange()</tt> return false. This is a convenience
0111      * constructor allowing the target to be set later.
0112      * @stable ICU 2.4
0113      */
0114     UnicodeSetIterator();
0115 
0116     /**
0117      * Destructor.
0118      * @stable ICU 2.4
0119      */
0120     virtual ~UnicodeSetIterator();
0121 
0122     /**
0123      * Returns true if the current element is a string.  If so, the
0124      * caller can retrieve it with <tt>getString()</tt>.  If this
0125      * method returns false, the current element is a code point or
0126      * code point range, depending on whether <tt>next()</tt> or
0127      * <tt>nextRange()</tt> was called.
0128      * Elements of types string and codepoint can both be retrieved
0129      * with the function <tt>getString()</tt>.
0130      * Elements of type codepoint can also be retrieved with
0131      * <tt>getCodepoint()</tt>.
0132      * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
0133      * of the range, and <tt>getCodepointEnd()</tt> returns the end
0134      * of the range.
0135      * @stable ICU 2.4
0136      */
0137     inline UBool isString() const;
0138 
0139     /**
0140      * Returns the current code point, if <tt>isString()</tt> returned
0141      * false.  Otherwise returns an undefined result.
0142      * @stable ICU 2.4
0143      */
0144     inline UChar32 getCodepoint() const;
0145 
0146     /**
0147      * Returns the end of the current code point range, if
0148      * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
0149      * called.  Otherwise returns an undefined result.
0150      * @stable ICU 2.4
0151      */
0152     inline UChar32 getCodepointEnd() const;
0153 
0154     /**
0155      * Returns the current string, if <tt>isString()</tt> returned
0156      * true.  If the current iteration item is a code point, a UnicodeString
0157      * containing that single code point is returned.
0158      *
0159      * Ownership of the returned string remains with the iterator.
0160      * The string is guaranteed to remain valid only until the iterator is
0161      *   advanced to the next item, or until the iterator is deleted.
0162      * 
0163      * @stable ICU 2.4
0164      */
0165     const UnicodeString& getString();
0166 
0167     /**
0168      * Skips over the remaining code points/ranges, if any.
0169      * A following call to next() or nextRange() will yield a string, if there is one.
0170      * No-op if next() would return false, or if it would yield a string anyway.
0171      *
0172      * @return *this
0173      * @stable ICU 70
0174      * @see UnicodeSet#strings()
0175      */
0176     inline UnicodeSetIterator &skipToStrings() {
0177         // Finish code point/range iteration.
0178         range = endRange;
0179         endElement = -1;
0180         nextElement = 0;
0181         return *this;
0182     }
0183 
0184     /**
0185      * Advances the iteration position to the next element in the set, 
0186      * which can be either a single code point or a string.  
0187      * If there are no more elements in the set, return false.
0188      *
0189      * <p>
0190      * If <tt>isString() == true</tt>, the value is a
0191      * string, otherwise the value is a
0192      * single code point.  Elements of either type can be retrieved
0193      * with the function <tt>getString()</tt>, while elements of
0194      * consisting of a single code point can be retrieved with
0195      * <tt>getCodepoint()</tt>
0196      *
0197      * <p>The order of iteration is all code points in sorted order,
0198      * followed by all strings sorted order.    Do not mix
0199      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
0200      * calling <tt>reset()</tt> between them.  The results of doing so
0201      * are undefined.
0202      *
0203      * @return true if there was another element in the set.
0204      * @stable ICU 2.4
0205      */
0206     UBool next();
0207 
0208     /**
0209      * Returns the next element in the set, either a code point range
0210      * or a string.  If there are no more elements in the set, return
0211      * false.  If <tt>isString() == true</tt>, the value is a
0212      * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
0213      * range of one or more code points from <tt>getCodepoint()</tt> to
0214      * <tt>getCodepointeEnd()</tt> inclusive.
0215      *
0216      * <p>The order of iteration is all code points ranges in sorted
0217      * order, followed by all strings sorted order.  Ranges are
0218      * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
0219      * is undefined unless <tt>isString() == true</tt>.  Do not mix calls to
0220      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
0221      * <tt>reset()</tt> between them.  The results of doing so are
0222      * undefined.
0223      *
0224      * @return true if there was another element in the set.
0225      * @stable ICU 2.4
0226      */
0227     UBool nextRange();
0228 
0229     /**
0230      * Sets this iterator to visit the elements of the given set and
0231      * resets it to the start of that set.  The iterator is valid only
0232      * so long as <tt>set</tt> is valid.
0233      * @param set the set to iterate over.
0234      * @stable ICU 2.4
0235      */
0236     void reset(const UnicodeSet& set);
0237 
0238     /**
0239      * Resets this iterator to the start of the set.
0240      * @stable ICU 2.4
0241      */
0242     void reset();
0243 
0244     /**
0245      * ICU "poor man's RTTI", returns a UClassID for this class.
0246      *
0247      * @stable ICU 2.4
0248      */
0249     static UClassID U_EXPORT2 getStaticClassID();
0250 
0251     /**
0252      * ICU "poor man's RTTI", returns a UClassID for the actual class.
0253      *
0254      * @stable ICU 2.4
0255      */
0256     virtual UClassID getDynamicClassID() const override;
0257 
0258     // ======================= PRIVATES ===========================
0259 
0260 private:
0261 
0262     // endElement and nextElements are really UChar32's, but we keep
0263     // them as signed int32_t's so we can do comparisons with
0264     // endElement set to -1.  Leave them as int32_t's.
0265     /** The set
0266      */
0267     const UnicodeSet* set;
0268     /** End range
0269      */
0270     int32_t endRange;
0271     /** Range
0272      */
0273     int32_t range;
0274     /** End element
0275      */
0276     int32_t endElement;
0277     /** Next element
0278      */
0279     int32_t nextElement;
0280     /** Next string
0281      */
0282     int32_t nextString;
0283     /** String count
0284      */
0285     int32_t stringCount;
0286 
0287     /**
0288      *  Points to the string to use when the caller asks for a
0289      *  string and the current iteration item is a code point, not a string.
0290      */
0291     UnicodeString *cpString;
0292 
0293     /** Copy constructor. Disallowed.
0294      */
0295     UnicodeSetIterator(const UnicodeSetIterator&) = delete;
0296 
0297     /** Assignment operator. Disallowed.
0298      */
0299     UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
0300 
0301     /** Load range
0302      */
0303     void loadRange(int32_t range);
0304 };
0305 
0306 inline UBool UnicodeSetIterator::isString() const {
0307     return codepoint < 0;
0308 }
0309 
0310 inline UChar32 UnicodeSetIterator::getCodepoint() const {
0311     return codepoint;
0312 }
0313 
0314 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
0315     return codepointEnd;
0316 }
0317 
0318 
0319 U_NAMESPACE_END
0320 
0321 #endif /* U_SHOW_CPLUSPLUS_API */
0322 
0323 #endif