|
||||
Warning, file /include/unicode/utext.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * 0006 * Copyright (C) 2004-2012, International Business Machines 0007 * Corporation and others. All Rights Reserved. 0008 * 0009 ******************************************************************************* 0010 * file name: utext.h 0011 * encoding: UTF-8 0012 * tab size: 8 (not used) 0013 * indentation:4 0014 * 0015 * created on: 2004oct06 0016 * created by: Markus W. Scherer 0017 */ 0018 0019 #ifndef __UTEXT_H__ 0020 #define __UTEXT_H__ 0021 0022 /** 0023 * \file 0024 * \brief C API: Abstract Unicode Text API 0025 * 0026 * The Text Access API provides a means to allow text that is stored in alternative 0027 * formats to work with ICU services. ICU normally operates on text that is 0028 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type 0029 * UnicodeString for C++ APIs. 0030 * 0031 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous 0032 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. 0033 * 0034 * There are three general classes of usage for UText: 0035 * 0036 * Application Level Use. This is the simplest usage - applications would 0037 * use one of the utext_open() functions on their input text, and pass 0038 * the resulting UText to the desired ICU service. 0039 * 0040 * Second is usage in ICU Services, such as break iteration, that will need to 0041 * operate on input presented to them as a UText. These implementations 0042 * will need to use the iteration and related UText functions to gain 0043 * access to the actual text. 0044 * 0045 * The third class of UText users are "text providers." These are the 0046 * UText implementations for the various text storage formats. An application 0047 * or system with a unique text storage format can implement a set of 0048 * UText provider functions for that format, which will then allow 0049 * ICU services to operate on that format. 0050 * 0051 * 0052 * <em>Iterating over text</em> 0053 * 0054 * Here is sample code for a forward iteration over the contents of a UText 0055 * 0056 * \code 0057 * UChar32 c; 0058 * UText *ut = whatever(); 0059 * 0060 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { 0061 * // do whatever with the codepoint c here. 0062 * } 0063 * \endcode 0064 * 0065 * And here is similar code to iterate in the reverse direction, from the end 0066 * of the text towards the beginning. 0067 * 0068 * \code 0069 * UChar32 c; 0070 * UText *ut = whatever(); 0071 * int textLength = utext_nativeLength(ut); 0072 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { 0073 * // do whatever with the codepoint c here. 0074 * } 0075 * \endcode 0076 * 0077 * <em>Characters and Indexing</em> 0078 * 0079 * Indexing into text by UText functions is nearly always in terms of the native 0080 * indexing of the underlying text storage. The storage format could be UTF-8 0081 * or UTF-32, for example. When coding to the UText access API, no assumptions 0082 * can be made regarding the size of characters, or how far an index 0083 * may move when iterating between characters. 0084 * 0085 * All indices supplied to UText functions are pinned to the length of the 0086 * text. An out-of-bounds index is not considered to be an error, but is 0087 * adjusted to be in the range 0 <= index <= length of input text. 0088 * 0089 * 0090 * When an index position is returned from a UText function, it will be 0091 * a native index to the underlying text. In the case of multi-unit characters, 0092 * it will always refer to the first position of the character, 0093 * never to the interior. This is essentially the same thing as saying that 0094 * a returned index will always point to a boundary between characters. 0095 * 0096 * When a native index is supplied to a UText function, all indices that 0097 * refer to any part of a multi-unit character representation are considered 0098 * to be equivalent. In the case of multi-unit characters, an incoming index 0099 * will be logically normalized to refer to the start of the character. 0100 * 0101 * It is possible to test whether a native index is on a code point boundary 0102 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). 0103 * If the index is returned unchanged, it was on a code point boundary. If 0104 * an adjusted index is returned, the original index referred to the 0105 * interior of a character. 0106 * 0107 * <em>Conventions for calling UText functions</em> 0108 * 0109 * Most UText access functions have as their first parameter a (UText *) pointer, 0110 * which specifies the UText to be used. Unless otherwise noted, the 0111 * pointer must refer to a valid, open UText. Attempting to 0112 * use a closed UText or passing a NULL pointer is a programming error and 0113 * will produce undefined results or NULL pointer exceptions. 0114 * 0115 * The UText_Open family of functions can either open an existing (closed) 0116 * UText, or heap allocate a new UText. Here is sample code for creating 0117 * a stack-allocated UText. 0118 * 0119 * \code 0120 * char *s = whatever(); // A utf-8 string 0121 * U_ErrorCode status = U_ZERO_ERROR; 0122 * UText ut = UTEXT_INITIALIZER; 0123 * utext_openUTF8(ut, s, -1, &status); 0124 * if (U_FAILURE(status)) { 0125 * // error handling 0126 * } else { 0127 * // work with the UText 0128 * } 0129 * \endcode 0130 * 0131 * Any existing UText passed to an open function _must_ have been initialized, 0132 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated 0133 * by an open function. Passing NULL will cause the open function to 0134 * heap-allocate and fully initialize a new UText. 0135 * 0136 */ 0137 0138 0139 0140 #include "unicode/utypes.h" 0141 #include "unicode/uchar.h" 0142 #if U_SHOW_CPLUSPLUS_API 0143 #include "unicode/localpointer.h" 0144 #include "unicode/rep.h" 0145 #include "unicode/unistr.h" 0146 #include "unicode/chariter.h" 0147 #endif 0148 0149 0150 U_CDECL_BEGIN 0151 0152 struct UText; 0153 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ 0154 0155 0156 /*************************************************************************************** 0157 * 0158 * C Functions for creating UText wrappers around various kinds of text strings. 0159 * 0160 ****************************************************************************************/ 0161 0162 0163 /** 0164 * Close function for UText instances. 0165 * Cleans up, releases any resources being held by an open UText. 0166 * <p> 0167 * If the UText was originally allocated by one of the utext_open functions, 0168 * the storage associated with the utext will also be freed. 0169 * If the UText storage originated with the application, as it would with 0170 * a local or static instance, the storage will not be deleted. 0171 * 0172 * An open UText can be reset to refer to new string by using one of the utext_open() 0173 * functions without first closing the UText. 0174 * 0175 * @param ut The UText to be closed. 0176 * @return NULL if the UText struct was deleted by the close. If the UText struct 0177 * was originally provided by the caller to the open function, it is 0178 * returned by this function, and may be safely used again in 0179 * a subsequent utext_open. 0180 * 0181 * @stable ICU 3.4 0182 */ 0183 U_CAPI UText * U_EXPORT2 0184 utext_close(UText *ut); 0185 0186 /** 0187 * Open a read-only UText implementation for UTF-8 strings. 0188 * 0189 * \htmlonly 0190 * Any invalid UTF-8 in the input will be handled in this way: 0191 * a sequence of bytes that has the form of a truncated, but otherwise valid, 0192 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 0193 * Any other illegal bytes will each be replaced by a \uFFFD. 0194 * \endhtmlonly 0195 * 0196 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 0197 * If non-NULL, must refer to an initialized UText struct, which will then 0198 * be reset to reference the specified UTF-8 string. 0199 * @param s A UTF-8 string. Must not be NULL. 0200 * @param length The length of the UTF-8 string in bytes, or -1 if the string is 0201 * zero terminated. 0202 * @param status Errors are returned here. 0203 * @return A pointer to the UText. If a pre-allocated UText was provided, it 0204 * will always be used and returned. 0205 * @stable ICU 3.4 0206 */ 0207 U_CAPI UText * U_EXPORT2 0208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); 0209 0210 0211 /** 0212 * Open a read-only UText for UChar * string. 0213 * 0214 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 0215 * If non-NULL, must refer to an initialized UText struct, which will then 0216 * be reset to reference the specified UChar string. 0217 * @param s A UChar (UTF-16) string 0218 * @param length The number of UChars in the input string, or -1 if the string is 0219 * zero terminated. 0220 * @param status Errors are returned here. 0221 * @return A pointer to the UText. If a pre-allocated UText was provided, it 0222 * will always be used and returned. 0223 * @stable ICU 3.4 0224 */ 0225 U_CAPI UText * U_EXPORT2 0226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); 0227 0228 0229 #if U_SHOW_CPLUSPLUS_API 0230 /** 0231 * Open a writable UText for a non-const UnicodeString. 0232 * 0233 * @param ut Pointer to a UText struct. If nullptr, a new UText will be created. 0234 * If non-nullptr, must refer to an initialized UText struct, which will then 0235 * be reset to reference the specified input string. 0236 * @param s A UnicodeString. 0237 * @param status Errors are returned here. 0238 * @return Pointer to the UText. If a UText was supplied as input, this 0239 * will always be used and returned. 0240 * @stable ICU 3.4 0241 */ 0242 U_CAPI UText * U_EXPORT2 0243 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); 0244 0245 0246 /** 0247 * Open a UText for a const UnicodeString. The resulting UText will not be writable. 0248 * 0249 * @param ut Pointer to a UText struct. If nullptr, a new UText will be created. 0250 * If non-nullptr, must refer to an initialized UText struct, which will then 0251 * be reset to reference the specified input string. 0252 * @param s A const UnicodeString to be wrapped. 0253 * @param status Errors are returned here. 0254 * @return Pointer to the UText. If a UText was supplied as input, this 0255 * will always be used and returned. 0256 * @stable ICU 3.4 0257 */ 0258 U_CAPI UText * U_EXPORT2 0259 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); 0260 0261 0262 /** 0263 * Open a writable UText implementation for an ICU Replaceable object. 0264 * @param ut Pointer to a UText struct. If nullptr, a new UText will be created. 0265 * If non-nullptr, must refer to an already existing UText, which will then 0266 * be reset to reference the specified replaceable text. 0267 * @param rep A Replaceable text object. 0268 * @param status Errors are returned here. 0269 * @return Pointer to the UText. If a UText was supplied as input, this 0270 * will always be used and returned. 0271 * @see Replaceable 0272 * @stable ICU 3.4 0273 */ 0274 U_CAPI UText * U_EXPORT2 0275 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); 0276 0277 /** 0278 * Open a UText implementation over an ICU CharacterIterator. 0279 * @param ut Pointer to a UText struct. If nullptr, a new UText will be created. 0280 * If non-nullptr, must refer to an already existing UText, which will then 0281 * be reset to reference the specified replaceable text. 0282 * @param ci A Character Iterator. 0283 * @param status Errors are returned here. 0284 * @return Pointer to the UText. If a UText was supplied as input, this 0285 * will always be used and returned. 0286 * @see Replaceable 0287 * @stable ICU 3.4 0288 */ 0289 U_CAPI UText * U_EXPORT2 0290 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); 0291 0292 #endif 0293 0294 0295 /** 0296 * Clone a UText. This is much like opening a UText where the source text is itself 0297 * another UText. 0298 * 0299 * A deep clone will copy both the UText data structures and the underlying text. 0300 * The original and cloned UText will operate completely independently; modifications 0301 * made to the text in one will not affect the other. Text providers are not 0302 * required to support deep clones. The user of clone() must check the status return 0303 * and be prepared to handle failures. 0304 * 0305 * The standard UText implementations for UTF8, UChar *, UnicodeString and 0306 * Replaceable all support deep cloning. 0307 * 0308 * The UText returned from a deep clone will be writable, assuming that the text 0309 * provider is able to support writing, even if the source UText had been made 0310 * non-writable by means of UText_freeze(). 0311 * 0312 * A shallow clone replicates only the UText data structures; it does not make 0313 * a copy of the underlying text. Shallow clones can be used as an efficient way to 0314 * have multiple iterators active in a single text string that is not being 0315 * modified. 0316 * 0317 * A shallow clone operation will not fail, barring truly exceptional conditions such 0318 * as memory allocation failures. 0319 * 0320 * Shallow UText clones should be avoided if the UText functions that modify the 0321 * text are expected to be used, either on the original or the cloned UText. 0322 * Any such modifications can cause unpredictable behavior. Read Only 0323 * shallow clones provide some protection against errors of this type by 0324 * disabling text modification via the cloned UText. 0325 * 0326 * A shallow clone made with the readOnly parameter == false will preserve the 0327 * utext_isWritable() state of the source object. Note, however, that 0328 * write operations must be avoided while more than one UText exists that refer 0329 * to the same underlying text. 0330 * 0331 * A UText and its clone may be safely concurrently accessed by separate threads. 0332 * This is true for read access only with shallow clones, and for both read and 0333 * write access with deep clones. 0334 * It is the responsibility of the Text Provider to ensure that this thread safety 0335 * constraint is met. 0336 * 0337 * @param dest A UText struct to be filled in with the result of the clone operation, 0338 * or NULL if the clone function should heap-allocate a new UText struct. 0339 * If non-NULL, must refer to an already existing UText, which will then 0340 * be reset to become the clone. 0341 * @param src The UText to be cloned. 0342 * @param deep true to request a deep clone, false for a shallow clone. 0343 * @param readOnly true to request that the cloned UText have read only access to the 0344 * underlying text. 0345 0346 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 0347 * will be returned if the text provider is unable to clone the 0348 * original text. 0349 * @return The newly created clone, or NULL if the clone operation failed. 0350 * @stable ICU 3.4 0351 */ 0352 U_CAPI UText * U_EXPORT2 0353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); 0354 0355 0356 /** 0357 * Compare two UText objects for equality. 0358 * UTexts are equal if they are iterating over the same text, and 0359 * have the same iteration position within the text. 0360 * If either or both of the parameters are NULL, the comparison is false. 0361 * 0362 * @param a The first of the two UTexts to compare. 0363 * @param b The other UText to be compared. 0364 * @return true if the two UTexts are equal. 0365 * @stable ICU 3.6 0366 */ 0367 U_CAPI UBool U_EXPORT2 0368 utext_equals(const UText *a, const UText *b); 0369 0370 0371 /***************************************************************************** 0372 * 0373 * Functions to work with the text represented by a UText wrapper 0374 * 0375 *****************************************************************************/ 0376 0377 /** 0378 * Get the length of the text. Depending on the characteristics 0379 * of the underlying text representation, this may be expensive. 0380 * @see utext_isLengthExpensive() 0381 * 0382 * 0383 * @param ut the text to be accessed. 0384 * @return the length of the text, expressed in native units. 0385 * 0386 * @stable ICU 3.4 0387 */ 0388 U_CAPI int64_t U_EXPORT2 0389 utext_nativeLength(UText *ut); 0390 0391 /** 0392 * Return true if calculating the length of the text could be expensive. 0393 * Finding the length of NUL terminated strings is considered to be expensive. 0394 * 0395 * Note that the value of this function may change 0396 * as the result of other operations on a UText. 0397 * Once the length of a string has been discovered, it will no longer 0398 * be expensive to report it. 0399 * 0400 * @param ut the text to be accessed. 0401 * @return true if determining the length of the text could be time consuming. 0402 * @stable ICU 3.4 0403 */ 0404 U_CAPI UBool U_EXPORT2 0405 utext_isLengthExpensive(const UText *ut); 0406 0407 /** 0408 * Returns the code point at the requested index, 0409 * or U_SENTINEL (-1) if it is out of bounds. 0410 * 0411 * If the specified index points to the interior of a multi-unit 0412 * character - one of the trail bytes of a UTF-8 sequence, for example - 0413 * the complete code point will be returned. 0414 * 0415 * The iteration position will be set to the start of the returned code point. 0416 * 0417 * This function is roughly equivalent to the sequence 0418 * utext_setNativeIndex(index); 0419 * utext_current32(); 0420 * (There is a subtle difference if the index is out of bounds by being less than zero - 0421 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() 0422 * will return the char at zero. utext_char32At(negative index), on the other hand, will 0423 * return the U_SENTINEL value of -1.) 0424 * 0425 * @param ut the text to be accessed 0426 * @param nativeIndex the native index of the character to be accessed. If the index points 0427 * to other than the first unit of a multi-unit character, it will be adjusted 0428 * to the start of the character. 0429 * @return the code point at the specified index. 0430 * @stable ICU 3.4 0431 */ 0432 U_CAPI UChar32 U_EXPORT2 0433 utext_char32At(UText *ut, int64_t nativeIndex); 0434 0435 0436 /** 0437 * 0438 * Get the code point at the current iteration position, 0439 * or U_SENTINEL (-1) if the iteration has reached the end of 0440 * the input text. 0441 * 0442 * @param ut the text to be accessed. 0443 * @return the Unicode code point at the current iterator position. 0444 * @stable ICU 3.4 0445 */ 0446 U_CAPI UChar32 U_EXPORT2 0447 utext_current32(UText *ut); 0448 0449 0450 /** 0451 * Get the code point at the current iteration position of the UText, and 0452 * advance the position to the first index following the character. 0453 * 0454 * If the position is at the end of the text (the index following 0455 * the last character, which is also the length of the text), 0456 * return U_SENTINEL (-1) and do not advance the index. 0457 * 0458 * This is a post-increment operation. 0459 * 0460 * An inline macro version of this function, UTEXT_NEXT32(), 0461 * is available for performance critical use. 0462 * 0463 * @param ut the text to be accessed. 0464 * @return the Unicode code point at the iteration position. 0465 * @see UTEXT_NEXT32 0466 * @stable ICU 3.4 0467 */ 0468 U_CAPI UChar32 U_EXPORT2 0469 utext_next32(UText *ut); 0470 0471 0472 /** 0473 * Move the iterator position to the character (code point) whose 0474 * index precedes the current position, and return that character. 0475 * This is a pre-decrement operation. 0476 * 0477 * If the initial position is at the start of the text (index of 0) 0478 * return U_SENTINEL (-1), and leave the position unchanged. 0479 * 0480 * An inline macro version of this function, UTEXT_PREVIOUS32(), 0481 * is available for performance critical use. 0482 * 0483 * @param ut the text to be accessed. 0484 * @return the previous UChar32 code point, or U_SENTINEL (-1) 0485 * if the iteration has reached the start of the text. 0486 * @see UTEXT_PREVIOUS32 0487 * @stable ICU 3.4 0488 */ 0489 U_CAPI UChar32 U_EXPORT2 0490 utext_previous32(UText *ut); 0491 0492 0493 /** 0494 * Set the iteration index and return the code point at that index. 0495 * Leave the iteration index at the start of the following code point. 0496 * 0497 * This function is the most efficient and convenient way to 0498 * begin a forward iteration. The results are identical to the those 0499 * from the sequence 0500 * \code 0501 * utext_setIndex(); 0502 * utext_next32(); 0503 * \endcode 0504 * 0505 * @param ut the text to be accessed. 0506 * @param nativeIndex Iteration index, in the native units of the text provider. 0507 * @return Code point which starts at or before index, 0508 * or U_SENTINEL (-1) if it is out of bounds. 0509 * @stable ICU 3.4 0510 */ 0511 U_CAPI UChar32 U_EXPORT2 0512 utext_next32From(UText *ut, int64_t nativeIndex); 0513 0514 0515 0516 /** 0517 * Set the iteration index, and return the code point preceding the 0518 * one specified by the initial index. Leave the iteration position 0519 * at the start of the returned code point. 0520 * 0521 * This function is the most efficient and convenient way to 0522 * begin a backwards iteration. 0523 * 0524 * @param ut the text to be accessed. 0525 * @param nativeIndex Iteration index in the native units of the text provider. 0526 * @return Code point preceding the one at the initial index, 0527 * or U_SENTINEL (-1) if it is out of bounds. 0528 * 0529 * @stable ICU 3.4 0530 */ 0531 U_CAPI UChar32 U_EXPORT2 0532 utext_previous32From(UText *ut, int64_t nativeIndex); 0533 0534 /** 0535 * Get the current iterator position, which can range from 0 to 0536 * the length of the text. 0537 * The position is a native index into the input text, in whatever format it 0538 * may have (possibly UTF-8 for example), and may not always be the same as 0539 * the corresponding UChar (UTF-16) index. 0540 * The returned position will always be aligned to a code point boundary. 0541 * 0542 * @param ut the text to be accessed. 0543 * @return the current index position, in the native units of the text provider. 0544 * @stable ICU 3.4 0545 */ 0546 U_CAPI int64_t U_EXPORT2 0547 utext_getNativeIndex(const UText *ut); 0548 0549 /** 0550 * Set the current iteration position to the nearest code point 0551 * boundary at or preceding the specified index. 0552 * The index is in the native units of the original input text. 0553 * If the index is out of range, it will be pinned to be within 0554 * the range of the input text. 0555 * <p> 0556 * It will usually be more efficient to begin an iteration 0557 * using the functions utext_next32From() or utext_previous32From() 0558 * rather than setIndex(). 0559 * <p> 0560 * Moving the index position to an adjacent character is best done 0561 * with utext_next32(), utext_previous32() or utext_moveIndex32(). 0562 * Attempting to do direct arithmetic on the index position is 0563 * complicated by the fact that the size (in native units) of a 0564 * character depends on the underlying representation of the character 0565 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not 0566 * easily knowable. 0567 * 0568 * @param ut the text to be accessed. 0569 * @param nativeIndex the native unit index of the new iteration position. 0570 * @stable ICU 3.4 0571 */ 0572 U_CAPI void U_EXPORT2 0573 utext_setNativeIndex(UText *ut, int64_t nativeIndex); 0574 0575 /** 0576 * Move the iterator position by delta code points. The number of code points 0577 * is a signed number; a negative delta will move the iterator backwards, 0578 * towards the start of the text. 0579 * <p> 0580 * The index is moved by <code>delta</code> code points 0581 * forward or backward, but no further backward than to 0 and 0582 * no further forward than to utext_nativeLength(). 0583 * The resulting index value will be in between 0 and length, inclusive. 0584 * 0585 * @param ut the text to be accessed. 0586 * @param delta the signed number of code points to move the iteration position. 0587 * @return true if the position could be moved the requested number of positions while 0588 * staying within the range [0 - text length]. 0589 * @stable ICU 3.4 0590 */ 0591 U_CAPI UBool U_EXPORT2 0592 utext_moveIndex32(UText *ut, int32_t delta); 0593 0594 /** 0595 * Get the native index of the character preceding the current position. 0596 * If the iteration position is already at the start of the text, zero 0597 * is returned. 0598 * The value returned is the same as that obtained from the following sequence, 0599 * but without the side effect of changing the iteration position. 0600 * 0601 * \code 0602 * UText *ut = whatever; 0603 * ... 0604 * utext_previous(ut) 0605 * utext_getNativeIndex(ut); 0606 * \endcode 0607 * 0608 * This function is most useful during forwards iteration, where it will get the 0609 * native index of the character most recently returned from utext_next(). 0610 * 0611 * @param ut the text to be accessed 0612 * @return the native index of the character preceding the current index position, 0613 * or zero if the current position is at the start of the text. 0614 * @stable ICU 3.6 0615 */ 0616 U_CAPI int64_t U_EXPORT2 0617 utext_getPreviousNativeIndex(UText *ut); 0618 0619 0620 /** 0621 * 0622 * Extract text from a UText into a UChar buffer. The range of text to be extracted 0623 * is specified in the native indices of the UText provider. These may not necessarily 0624 * be UTF-16 indices. 0625 * <p> 0626 * The size (number of 16 bit UChars) of the data to be extracted is returned. The 0627 * full number of UChars is returned, even when the extracted text is truncated 0628 * because the specified buffer size is too small. 0629 * <p> 0630 * The extracted string will (if you are a user) / must (if you are a text provider) 0631 * be NUL-terminated if there is sufficient space in the destination buffer. This 0632 * terminating NUL is not included in the returned length. 0633 * <p> 0634 * The iteration index is left at the position following the last extracted character. 0635 * 0636 * @param ut the UText from which to extract data. 0637 * @param nativeStart the native index of the first character to extract.\ 0638 * If the specified index is out of range, 0639 * it will be pinned to be within 0 <= index <= textLength 0640 * @param nativeLimit the native string index of the position following the last 0641 * character to extract. If the specified index is out of range, 0642 * it will be pinned to be within 0 <= index <= textLength. 0643 * nativeLimit must be >= nativeStart. 0644 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 0645 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 0646 * for precomputing the required size. 0647 * @param status receives any error status. 0648 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 0649 * buffer was too small. Returns number of UChars for preflighting. 0650 * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. 0651 * 0652 * @stable ICU 3.4 0653 */ 0654 U_CAPI int32_t U_EXPORT2 0655 utext_extract(UText *ut, 0656 int64_t nativeStart, int64_t nativeLimit, 0657 UChar *dest, int32_t destCapacity, 0658 UErrorCode *status); 0659 0660 0661 0662 /************************************************************************************ 0663 * 0664 * #define inline versions of selected performance-critical text access functions 0665 * Caution: do not use auto increment++ or decrement-- expressions 0666 * as parameters to these macros. 0667 * 0668 * For most use, where there is no extreme performance constraint, the 0669 * normal, non-inline functions are a better choice. The resulting code 0670 * will be smaller, and, if the need ever arises, easier to debug. 0671 * 0672 * These are implemented as #defines rather than real functions 0673 * because there is no fully portable way to do inline functions in plain C. 0674 * 0675 ************************************************************************************/ 0676 0677 #ifndef U_HIDE_INTERNAL_API 0678 /** 0679 * inline version of utext_current32(), for performance-critical situations. 0680 * 0681 * Get the code point at the current iteration position of the UText. 0682 * Returns U_SENTINEL (-1) if the position is at the end of the 0683 * text. 0684 * 0685 * @internal ICU 4.4 technology preview 0686 */ 0687 #define UTEXT_CURRENT32(ut) \ 0688 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 0689 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) 0690 #endif /* U_HIDE_INTERNAL_API */ 0691 0692 /** 0693 * inline version of utext_next32(), for performance-critical situations. 0694 * 0695 * Get the code point at the current iteration position of the UText, and 0696 * advance the position to the first index following the character. 0697 * This is a post-increment operation. 0698 * Returns U_SENTINEL (-1) if the position is at the end of the 0699 * text. 0700 * 0701 * @stable ICU 3.4 0702 */ 0703 #define UTEXT_NEXT32(ut) \ 0704 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 0705 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) 0706 0707 /** 0708 * inline version of utext_previous32(), for performance-critical situations. 0709 * 0710 * Move the iterator position to the character (code point) whose 0711 * index precedes the current position, and return that character. 0712 * This is a pre-decrement operation. 0713 * Returns U_SENTINEL (-1) if the position is at the start of the text. 0714 * 0715 * @stable ICU 3.4 0716 */ 0717 #define UTEXT_PREVIOUS32(ut) \ 0718 ((ut)->chunkOffset > 0 && \ 0719 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ 0720 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) 0721 0722 /** 0723 * inline version of utext_getNativeIndex(), for performance-critical situations. 0724 * 0725 * Get the current iterator position, which can range from 0 to 0726 * the length of the text. 0727 * The position is a native index into the input text, in whatever format it 0728 * may have (possibly UTF-8 for example), and may not always be the same as 0729 * the corresponding UChar (UTF-16) index. 0730 * The returned position will always be aligned to a code point boundary. 0731 * 0732 * @stable ICU 3.6 0733 */ 0734 #define UTEXT_GETNATIVEINDEX(ut) \ 0735 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ 0736 (ut)->chunkNativeStart+(ut)->chunkOffset : \ 0737 (ut)->pFuncs->mapOffsetToNative(ut)) 0738 0739 /** 0740 * inline version of utext_setNativeIndex(), for performance-critical situations. 0741 * 0742 * Set the current iteration position to the nearest code point 0743 * boundary at or preceding the specified index. 0744 * The index is in the native units of the original input text. 0745 * If the index is out of range, it will be pinned to be within 0746 * the range of the input text. 0747 * 0748 * @stable ICU 3.8 0749 */ 0750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ 0751 int64_t __offset = (ix) - (ut)->chunkNativeStart; \ 0752 if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ 0753 (ut)->chunkOffset=(int32_t)__offset; \ 0754 } else { \ 0755 utext_setNativeIndex((ut), (ix)); \ 0756 } \ 0757 } UPRV_BLOCK_MACRO_END 0758 0759 0760 0761 /************************************************************************************ 0762 * 0763 * Functions related to writing or modifying the text. 0764 * These will work only with modifiable UTexts. Attempting to 0765 * modify a read-only UText will return an error status. 0766 * 0767 ************************************************************************************/ 0768 0769 0770 /** 0771 * Return true if the text can be written (modified) with utext_replace() or 0772 * utext_copy(). For the text to be writable, the text provider must 0773 * be of a type that supports writing and the UText must not be frozen. 0774 * 0775 * Attempting to modify text when utext_isWriteable() is false will fail - 0776 * the text will not be modified, and an error will be returned from the function 0777 * that attempted the modification. 0778 * 0779 * @param ut the UText to be tested. 0780 * @return true if the text is modifiable. 0781 * 0782 * @see utext_freeze() 0783 * @see utext_replace() 0784 * @see utext_copy() 0785 * @stable ICU 3.4 0786 * 0787 */ 0788 U_CAPI UBool U_EXPORT2 0789 utext_isWritable(const UText *ut); 0790 0791 0792 /** 0793 * Test whether there is meta data associated with the text. 0794 * @see Replaceable::hasMetaData() 0795 * 0796 * @param ut The UText to be tested 0797 * @return true if the underlying text includes meta data. 0798 * @stable ICU 3.4 0799 */ 0800 U_CAPI UBool U_EXPORT2 0801 utext_hasMetaData(const UText *ut); 0802 0803 0804 /** 0805 * Replace a range of the original text with a replacement text. 0806 * 0807 * Leaves the current iteration position at the position following the 0808 * newly inserted replacement text. 0809 * 0810 * This function is only available on UText types that support writing, 0811 * that is, ones where utext_isWritable() returns true. 0812 * 0813 * When using this function, there should be only a single UText opened onto the 0814 * underlying native text string. Behavior after a replace operation 0815 * on a UText is undefined for any other additional UTexts that refer to the 0816 * modified string. 0817 * 0818 * @param ut the UText representing the text to be operated on. 0819 * @param nativeStart the native index of the start of the region to be replaced 0820 * @param nativeLimit the native index of the character following the region to be replaced. 0821 * @param replacementText pointer to the replacement text 0822 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. 0823 * @param status receives any error status. Possible errors include 0824 * U_NO_WRITE_PERMISSION 0825 * 0826 * @return The signed number of (native) storage units by which 0827 * the length of the text expanded or contracted. 0828 * 0829 * @stable ICU 3.4 0830 */ 0831 U_CAPI int32_t U_EXPORT2 0832 utext_replace(UText *ut, 0833 int64_t nativeStart, int64_t nativeLimit, 0834 const UChar *replacementText, int32_t replacementLength, 0835 UErrorCode *status); 0836 0837 0838 0839 /** 0840 * 0841 * Copy or move a substring from one position to another within the text, 0842 * while retaining any metadata associated with the text. 0843 * This function is used to duplicate or reorder substrings. 0844 * The destination index must not overlap the source range. 0845 * 0846 * The text to be copied or moved is inserted at destIndex; 0847 * it does not replace or overwrite any existing text. 0848 * 0849 * The iteration position is left following the newly inserted text 0850 * at the destination position. 0851 * 0852 * This function is only available on UText types that support writing, 0853 * that is, ones where utext_isWritable() returns true. 0854 * 0855 * When using this function, there should be only a single UText opened onto the 0856 * underlying native text string. Behavior after a copy operation 0857 * on a UText is undefined in any other additional UTexts that refer to the 0858 * modified string. 0859 * 0860 * @param ut The UText representing the text to be operated on. 0861 * @param nativeStart The native index of the start of the region to be copied or moved 0862 * @param nativeLimit The native index of the character position following the region 0863 * to be copied. 0864 * @param destIndex The native destination index to which the source substring is 0865 * copied or moved. 0866 * @param move If true, then the substring is moved, not copied/duplicated. 0867 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 0868 * 0869 * @stable ICU 3.4 0870 */ 0871 U_CAPI void U_EXPORT2 0872 utext_copy(UText *ut, 0873 int64_t nativeStart, int64_t nativeLimit, 0874 int64_t destIndex, 0875 UBool move, 0876 UErrorCode *status); 0877 0878 0879 /** 0880 * <p> 0881 * Freeze a UText. This prevents any modification to the underlying text itself 0882 * by means of functions operating on this UText. 0883 * </p> 0884 * <p> 0885 * Once frozen, a UText can not be unfrozen. The intent is to ensure 0886 * that a the text underlying a frozen UText wrapper cannot be modified via that UText. 0887 * </p> 0888 * <p> 0889 * Caution: freezing a UText will disable changes made via the specific 0890 * frozen UText wrapper only; it will not have any effect on the ability to 0891 * directly modify the text by bypassing the UText. Any such backdoor modifications 0892 * are always an error while UText access is occurring because the underlying 0893 * text can get out of sync with UText's buffering. 0894 * </p> 0895 * 0896 * @param ut The UText to be frozen. 0897 * @see utext_isWritable() 0898 * @stable ICU 3.6 0899 */ 0900 U_CAPI void U_EXPORT2 0901 utext_freeze(UText *ut); 0902 0903 0904 /** 0905 * UText provider properties (bit field indexes). 0906 * 0907 * @see UText 0908 * @stable ICU 3.4 0909 */ 0910 enum { 0911 /** 0912 * It is potentially time consuming for the provider to determine the length of the text. 0913 * @stable ICU 3.4 0914 */ 0915 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, 0916 /** 0917 * Text chunks remain valid and usable until the text object is modified or 0918 * deleted, not just until the next time the access() function is called 0919 * (which is the default). 0920 * @stable ICU 3.4 0921 */ 0922 UTEXT_PROVIDER_STABLE_CHUNKS = 2, 0923 /** 0924 * The provider supports modifying the text via the replace() and copy() 0925 * functions. 0926 * @see Replaceable 0927 * @stable ICU 3.4 0928 */ 0929 UTEXT_PROVIDER_WRITABLE = 3, 0930 /** 0931 * There is meta data associated with the text. 0932 * @see Replaceable::hasMetaData() 0933 * @stable ICU 3.4 0934 */ 0935 UTEXT_PROVIDER_HAS_META_DATA = 4, 0936 /** 0937 * Text provider owns the text storage. 0938 * Generally occurs as the result of a deep clone of the UText. 0939 * When closing the UText, the associated text must 0940 * also be closed/deleted/freed/ whatever is appropriate. 0941 * @stable ICU 3.6 0942 */ 0943 UTEXT_PROVIDER_OWNS_TEXT = 5 0944 }; 0945 0946 /** 0947 * Function type declaration for UText.clone(). 0948 * 0949 * clone a UText. Much like opening a UText where the source text is itself 0950 * another UText. 0951 * 0952 * A deep clone will copy both the UText data structures and the underlying text. 0953 * The original and cloned UText will operate completely independently; modifications 0954 * made to the text in one will not effect the other. Text providers are not 0955 * required to support deep clones. The user of clone() must check the status return 0956 * and be prepared to handle failures. 0957 * 0958 * A shallow clone replicates only the UText data structures; it does not make 0959 * a copy of the underlying text. Shallow clones can be used as an efficient way to 0960 * have multiple iterators active in a single text string that is not being 0961 * modified. 0962 * 0963 * A shallow clone operation must not fail except for truly exceptional conditions such 0964 * as memory allocation failures. 0965 * 0966 * A UText and its clone may be safely concurrently accessed by separate threads. 0967 * This is true for both shallow and deep clones. 0968 * It is the responsibility of the Text Provider to ensure that this thread safety 0969 * constraint is met. 0970 0971 * 0972 * @param dest A UText struct to be filled in with the result of the clone operation, 0973 * or NULL if the clone function should heap-allocate a new UText struct. 0974 * @param src The UText to be cloned. 0975 * @param deep true to request a deep clone, false for a shallow clone. 0976 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 0977 * should be returned if the text provider is unable to clone the 0978 * original text. 0979 * @return The newly created clone, or NULL if the clone operation failed. 0980 * 0981 * @stable ICU 3.4 0982 */ 0983 typedef UText * U_CALLCONV 0984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); 0985 0986 0987 /** 0988 * Function type declaration for UText.nativeLength(). 0989 * 0990 * @param ut the UText to get the length of. 0991 * @return the length, in the native units of the original text string. 0992 * @see UText 0993 * @stable ICU 3.4 0994 */ 0995 typedef int64_t U_CALLCONV 0996 UTextNativeLength(UText *ut); 0997 0998 /** 0999 * Function type declaration for UText.access(). Get the description of the text chunk 1000 * containing the text at a requested native index. The UText's iteration 1001 * position will be left at the requested index. If the index is out 1002 * of bounds, the iteration position will be left at the start or end 1003 * of the string, as appropriate. 1004 * 1005 * Chunks must begin and end on code point boundaries. A single code point 1006 * comprised of multiple storage units must never span a chunk boundary. 1007 * 1008 * 1009 * @param ut the UText being accessed. 1010 * @param nativeIndex Requested index of the text to be accessed. 1011 * @param forward If true, then the returned chunk must contain text 1012 * starting from the index, so that start<=index<limit. 1013 * If false, then the returned chunk must contain text 1014 * before the index, so that start<index<=limit. 1015 * @return True if the requested index could be accessed. The chunk 1016 * will contain the requested text. 1017 * False value if a chunk cannot be accessed 1018 * (the requested index is out of bounds). 1019 * 1020 * @see UText 1021 * @stable ICU 3.4 1022 */ 1023 typedef UBool U_CALLCONV 1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); 1025 1026 /** 1027 * Function type declaration for UText.extract(). 1028 * 1029 * Extract text from a UText into a UChar buffer. The range of text to be extracted 1030 * is specified in the native indices of the UText provider. These may not necessarily 1031 * be UTF-16 indices. 1032 * <p> 1033 * The size (number of 16 bit UChars) in the data to be extracted is returned. The 1034 * full amount is returned, even when the specified buffer size is smaller. 1035 * <p> 1036 * The extracted string will (if you are a user) / must (if you are a text provider) 1037 * be NUL-terminated if there is sufficient space in the destination buffer. 1038 * 1039 * @param ut the UText from which to extract data. 1040 * @param nativeStart the native index of the first character to extract. 1041 * @param nativeLimit the native string index of the position following the last 1042 * character to extract. 1043 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 1044 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 1045 * for precomputing the required size. 1046 * @param status receives any error status. 1047 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for 1048 * preflighting. 1049 * @return Number of UChars in the data. Does not include a trailing NUL. 1050 * 1051 * @stable ICU 3.4 1052 */ 1053 typedef int32_t U_CALLCONV 1054 UTextExtract(UText *ut, 1055 int64_t nativeStart, int64_t nativeLimit, 1056 UChar *dest, int32_t destCapacity, 1057 UErrorCode *status); 1058 1059 /** 1060 * Function type declaration for UText.replace(). 1061 * 1062 * Replace a range of the original text with a replacement text. 1063 * 1064 * Leaves the current iteration position at the position following the 1065 * newly inserted replacement text. 1066 * 1067 * This function need only be implemented on UText types that support writing. 1068 * 1069 * When using this function, there should be only a single UText opened onto the 1070 * underlying native text string. The function is responsible for updating the 1071 * text chunk within the UText to reflect the updated iteration position, 1072 * taking into account any changes to the underlying string's structure caused 1073 * by the replace operation. 1074 * 1075 * @param ut the UText representing the text to be operated on. 1076 * @param nativeStart the index of the start of the region to be replaced 1077 * @param nativeLimit the index of the character following the region to be replaced. 1078 * @param replacementText pointer to the replacement text 1079 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. 1080 * @param status receives any error status. Possible errors include 1081 * U_NO_WRITE_PERMISSION 1082 * 1083 * @return The signed number of (native) storage units by which 1084 * the length of the text expanded or contracted. 1085 * 1086 * @stable ICU 3.4 1087 */ 1088 typedef int32_t U_CALLCONV 1089 UTextReplace(UText *ut, 1090 int64_t nativeStart, int64_t nativeLimit, 1091 const UChar *replacementText, int32_t replacmentLength, 1092 UErrorCode *status); 1093 1094 /** 1095 * Function type declaration for UText.copy(). 1096 * 1097 * Copy or move a substring from one position to another within the text, 1098 * while retaining any metadata associated with the text. 1099 * This function is used to duplicate or reorder substrings. 1100 * The destination index must not overlap the source range. 1101 * 1102 * The text to be copied or moved is inserted at destIndex; 1103 * it does not replace or overwrite any existing text. 1104 * 1105 * This function need only be implemented for UText types that support writing. 1106 * 1107 * When using this function, there should be only a single UText opened onto the 1108 * underlying native text string. The function is responsible for updating the 1109 * text chunk within the UText to reflect the updated iteration position, 1110 * taking into account any changes to the underlying string's structure caused 1111 * by the replace operation. 1112 * 1113 * @param ut The UText representing the text to be operated on. 1114 * @param nativeStart The index of the start of the region to be copied or moved 1115 * @param nativeLimit The index of the character following the region to be replaced. 1116 * @param nativeDest The destination index to which the source substring is copied or moved. 1117 * @param move If true, then the substring is moved, not copied/duplicated. 1118 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 1119 * 1120 * @stable ICU 3.4 1121 */ 1122 typedef void U_CALLCONV 1123 UTextCopy(UText *ut, 1124 int64_t nativeStart, int64_t nativeLimit, 1125 int64_t nativeDest, 1126 UBool move, 1127 UErrorCode *status); 1128 1129 /** 1130 * Function type declaration for UText.mapOffsetToNative(). 1131 * Map from the current UChar offset within the current text chunk to 1132 * the corresponding native index in the original source text. 1133 * 1134 * This is required only for text providers that do not use native UTF-16 indexes. 1135 * 1136 * @param ut the UText. 1137 * @return Absolute (native) index corresponding to chunkOffset in the current chunk. 1138 * The returned native index should always be to a code point boundary. 1139 * 1140 * @stable ICU 3.4 1141 */ 1142 typedef int64_t U_CALLCONV 1143 UTextMapOffsetToNative(const UText *ut); 1144 1145 /** 1146 * Function type declaration for UText.mapIndexToUTF16(). 1147 * Map from a native index to a UChar offset within a text chunk. 1148 * Behavior is undefined if the native index does not fall within the 1149 * current chunk. 1150 * 1151 * This function is required only for text providers that do not use native UTF-16 indexes. 1152 * 1153 * @param ut The UText containing the text chunk. 1154 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. 1155 * @return Chunk-relative UTF-16 offset corresponding to the specified native 1156 * index. 1157 * 1158 * @stable ICU 3.4 1159 */ 1160 typedef int32_t U_CALLCONV 1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); 1162 1163 1164 /** 1165 * Function type declaration for UText.utextClose(). 1166 * 1167 * A Text Provider close function is only required for provider types that make 1168 * allocations in their open function (or other functions) that must be 1169 * cleaned when the UText is closed. 1170 * 1171 * The allocation of the UText struct itself and any "extra" storage 1172 * associated with the UText is handled by the common UText implementation 1173 * and does not require provider specific cleanup in a close function. 1174 * 1175 * Most UText provider implementations do not need to implement this function. 1176 * 1177 * @param ut A UText object to be closed. 1178 * 1179 * @stable ICU 3.4 1180 */ 1181 typedef void U_CALLCONV 1182 UTextClose(UText *ut); 1183 1184 1185 /** 1186 * (public) Function dispatch table for UText. 1187 * Conceptually very much like a C++ Virtual Function Table. 1188 * This struct defines the organization of the table. 1189 * Each text provider implementation must provide an 1190 * actual table that is initialized with the appropriate functions 1191 * for the type of text being handled. 1192 * @stable ICU 3.6 1193 */ 1194 struct UTextFuncs { 1195 /** 1196 * (public) Function table size, sizeof(UTextFuncs) 1197 * Intended for use should the table grow to accommodate added 1198 * functions in the future, to allow tests for older format 1199 * function tables that do not contain the extensions. 1200 * 1201 * Fields are placed for optimal alignment on 1202 * 32/64/128-bit-pointer machines, by normally grouping together 1203 * 4 32-bit fields, 1204 * 4 pointers, 1205 * 2 64-bit fields 1206 * in sequence. 1207 * @stable ICU 3.6 1208 */ 1209 int32_t tableSize; 1210 1211 /** 1212 * (private) Alignment padding. 1213 * Do not use, reserved for use by the UText framework only. 1214 * @internal 1215 */ 1216 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; 1217 1218 1219 /** 1220 * (public) Function pointer for UTextClone 1221 * 1222 * @see UTextClone 1223 * @stable ICU 3.6 1224 */ 1225 UTextClone *clone; 1226 1227 /** 1228 * (public) function pointer for UTextLength 1229 * May be expensive to compute! 1230 * 1231 * @see UTextLength 1232 * @stable ICU 3.6 1233 */ 1234 UTextNativeLength *nativeLength; 1235 1236 /** 1237 * (public) Function pointer for UTextAccess. 1238 * 1239 * @see UTextAccess 1240 * @stable ICU 3.6 1241 */ 1242 UTextAccess *access; 1243 1244 /** 1245 * (public) Function pointer for UTextExtract. 1246 * 1247 * @see UTextExtract 1248 * @stable ICU 3.6 1249 */ 1250 UTextExtract *extract; 1251 1252 /** 1253 * (public) Function pointer for UTextReplace. 1254 * 1255 * @see UTextReplace 1256 * @stable ICU 3.6 1257 */ 1258 UTextReplace *replace; 1259 1260 /** 1261 * (public) Function pointer for UTextCopy. 1262 * 1263 * @see UTextCopy 1264 * @stable ICU 3.6 1265 */ 1266 UTextCopy *copy; 1267 1268 /** 1269 * (public) Function pointer for UTextMapOffsetToNative. 1270 * 1271 * @see UTextMapOffsetToNative 1272 * @stable ICU 3.6 1273 */ 1274 UTextMapOffsetToNative *mapOffsetToNative; 1275 1276 /** 1277 * (public) Function pointer for UTextMapNativeIndexToUTF16. 1278 * 1279 * @see UTextMapNativeIndexToUTF16 1280 * @stable ICU 3.6 1281 */ 1282 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; 1283 1284 /** 1285 * (public) Function pointer for UTextClose. 1286 * 1287 * @see UTextClose 1288 * @stable ICU 3.6 1289 */ 1290 UTextClose *close; 1291 1292 /** 1293 * (private) Spare function pointer 1294 * @internal 1295 */ 1296 UTextClose *spare1; 1297 1298 /** 1299 * (private) Spare function pointer 1300 * @internal 1301 */ 1302 UTextClose *spare2; 1303 1304 /** 1305 * (private) Spare function pointer 1306 * @internal 1307 */ 1308 UTextClose *spare3; 1309 1310 }; 1311 /** 1312 * Function dispatch table for UText 1313 * @see UTextFuncs 1314 */ 1315 typedef struct UTextFuncs UTextFuncs; 1316 1317 /** 1318 * UText struct. Provides the interface between the generic UText access code 1319 * and the UText provider code that works on specific kinds of 1320 * text (UTF-8, noncontiguous UTF-16, whatever.) 1321 * 1322 * Applications that are using predefined types of text providers 1323 * to pass text data to ICU services will have no need to view the 1324 * internals of the UText structs that they open. 1325 * 1326 * @stable ICU 3.6 1327 */ 1328 struct UText { 1329 /** 1330 * (private) Magic. Used to help detect when UText functions are handed 1331 * invalid or uninitialized UText structs. 1332 * utext_openXYZ() functions take an initialized, 1333 * but not necessarily open, UText struct as an 1334 * optional fill-in parameter. This magic field 1335 * is used to check for that initialization. 1336 * Text provider close functions must NOT clear 1337 * the magic field because that would prevent 1338 * reuse of the UText struct. 1339 * @internal 1340 */ 1341 uint32_t magic; 1342 1343 1344 /** 1345 * (private) Flags for managing the allocation and freeing of 1346 * memory associated with this UText. 1347 * @internal 1348 */ 1349 int32_t flags; 1350 1351 1352 /** 1353 * Text provider properties. This set of flags is maintained by the 1354 * text provider implementation. 1355 * @stable ICU 3.4 1356 */ 1357 int32_t providerProperties; 1358 1359 /** 1360 * (public) sizeOfStruct=sizeof(UText) 1361 * Allows possible backward compatible extension. 1362 * 1363 * @stable ICU 3.4 1364 */ 1365 int32_t sizeOfStruct; 1366 1367 /* ------ 16 byte alignment boundary ----------- */ 1368 1369 1370 /** 1371 * (protected) Native index of the first character position following 1372 * the current chunk. 1373 * @stable ICU 3.6 1374 */ 1375 int64_t chunkNativeLimit; 1376 1377 /** 1378 * (protected) Size in bytes of the extra space (pExtra). 1379 * @stable ICU 3.4 1380 */ 1381 int32_t extraSize; 1382 1383 /** 1384 * (protected) The highest chunk offset where native indexing and 1385 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value 1386 * will be equal to chunkLength. 1387 * 1388 * @stable ICU 3.6 1389 */ 1390 int32_t nativeIndexingLimit; 1391 1392 /* ---- 16 byte alignment boundary------ */ 1393 1394 /** 1395 * (protected) Native index of the first character in the text chunk. 1396 * @stable ICU 3.6 1397 */ 1398 int64_t chunkNativeStart; 1399 1400 /** 1401 * (protected) Current iteration position within the text chunk (UTF-16 buffer). 1402 * This is the index to the character that will be returned by utext_next32(). 1403 * @stable ICU 3.6 1404 */ 1405 int32_t chunkOffset; 1406 1407 /** 1408 * (protected) Length the text chunk (UTF-16 buffer), in UChars. 1409 * @stable ICU 3.6 1410 */ 1411 int32_t chunkLength; 1412 1413 /* ---- 16 byte alignment boundary-- */ 1414 1415 1416 /** 1417 * (protected) pointer to a chunk of text in UTF-16 format. 1418 * May refer either to original storage of the source of the text, or 1419 * if conversion was required, to a buffer owned by the UText. 1420 * @stable ICU 3.6 1421 */ 1422 const UChar *chunkContents; 1423 1424 /** 1425 * (public) Pointer to Dispatch table for accessing functions for this UText. 1426 * @stable ICU 3.6 1427 */ 1428 const UTextFuncs *pFuncs; 1429 1430 /** 1431 * (protected) Pointer to additional space requested by the 1432 * text provider during the utext_open operation. 1433 * @stable ICU 3.4 1434 */ 1435 void *pExtra; 1436 1437 /** 1438 * (protected) Pointer to string or text-containing object or similar. 1439 * This is the source of the text that this UText is wrapping, in a format 1440 * that is known to the text provider functions. 1441 * @stable ICU 3.4 1442 */ 1443 const void *context; 1444 1445 /* --- 16 byte alignment boundary--- */ 1446 1447 /** 1448 * (protected) Pointer fields available for use by the text provider. 1449 * Not used by UText common code. 1450 * @stable ICU 3.6 1451 */ 1452 const void *p; 1453 /** 1454 * (protected) Pointer fields available for use by the text provider. 1455 * Not used by UText common code. 1456 * @stable ICU 3.6 1457 */ 1458 const void *q; 1459 /** 1460 * (protected) Pointer fields available for use by the text provider. 1461 * Not used by UText common code. 1462 * @stable ICU 3.6 1463 */ 1464 const void *r; 1465 1466 /** 1467 * Private field reserved for future use by the UText framework 1468 * itself. This is not to be touched by the text providers. 1469 * @internal ICU 3.4 1470 */ 1471 void *privP; 1472 1473 1474 /* --- 16 byte alignment boundary--- */ 1475 1476 1477 /** 1478 * (protected) Integer field reserved for use by the text provider. 1479 * Not used by the UText framework, or by the client (user) of the UText. 1480 * @stable ICU 3.4 1481 */ 1482 int64_t a; 1483 1484 /** 1485 * (protected) Integer field reserved for use by the text provider. 1486 * Not used by the UText framework, or by the client (user) of the UText. 1487 * @stable ICU 3.4 1488 */ 1489 int32_t b; 1490 1491 /** 1492 * (protected) Integer field reserved for use by the text provider. 1493 * Not used by the UText framework, or by the client (user) of the UText. 1494 * @stable ICU 3.4 1495 */ 1496 int32_t c; 1497 1498 /* ---- 16 byte alignment boundary---- */ 1499 1500 1501 /** 1502 * Private field reserved for future use by the UText framework 1503 * itself. This is not to be touched by the text providers. 1504 * @internal ICU 3.4 1505 */ 1506 int64_t privA; 1507 /** 1508 * Private field reserved for future use by the UText framework 1509 * itself. This is not to be touched by the text providers. 1510 * @internal ICU 3.4 1511 */ 1512 int32_t privB; 1513 /** 1514 * Private field reserved for future use by the UText framework 1515 * itself. This is not to be touched by the text providers. 1516 * @internal ICU 3.4 1517 */ 1518 int32_t privC; 1519 }; 1520 1521 1522 /** 1523 * Common function for use by Text Provider implementations to allocate and/or initialize 1524 * a new UText struct. To be called in the implementation of utext_open() functions. 1525 * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. 1526 * If the supplied UText is already open, the provider's close function will be called 1527 * so that the struct can be reused by the open that is in progress. 1528 * 1529 * @param ut pointer to a UText struct to be re-used, or null if a new UText 1530 * should be allocated. 1531 * @param extraSpace The amount of additional space to be allocated as part 1532 * of this UText, for use by types of providers that require 1533 * additional storage. 1534 * @param status Errors are returned here. 1535 * @return pointer to the UText, allocated if necessary, with extra space set up if requested. 1536 * @stable ICU 3.4 1537 */ 1538 U_CAPI UText * U_EXPORT2 1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); 1540 1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following! 1542 /** 1543 * @internal 1544 * Value used to help identify correctly initialized UText structs. 1545 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. 1546 */ 1547 enum { 1548 UTEXT_MAGIC = 0x345ad82c 1549 }; 1550 1551 /** 1552 * initializer to be used with local (stack) instances of a UText 1553 * struct. UText structs must be initialized before passing 1554 * them to one of the utext_open functions. 1555 * 1556 * @stable ICU 3.6 1557 */ 1558 #define UTEXT_INITIALIZER { \ 1559 UTEXT_MAGIC, /* magic */ \ 1560 0, /* flags */ \ 1561 0, /* providerProps */ \ 1562 sizeof(UText), /* sizeOfStruct */ \ 1563 0, /* chunkNativeLimit */ \ 1564 0, /* extraSize */ \ 1565 0, /* nativeIndexingLimit */ \ 1566 0, /* chunkNativeStart */ \ 1567 0, /* chunkOffset */ \ 1568 0, /* chunkLength */ \ 1569 NULL, /* chunkContents */ \ 1570 NULL, /* pFuncs */ \ 1571 NULL, /* pExtra */ \ 1572 NULL, /* context */ \ 1573 NULL, NULL, NULL, /* p, q, r */ \ 1574 NULL, /* privP */ \ 1575 0, 0, 0, /* a, b, c */ \ 1576 0, 0, 0 /* privA,B,C, */ \ 1577 } 1578 1579 1580 U_CDECL_END 1581 1582 1583 #if U_SHOW_CPLUSPLUS_API 1584 1585 U_NAMESPACE_BEGIN 1586 1587 /** 1588 * \class LocalUTextPointer 1589 * "Smart pointer" class, closes a UText via utext_close(). 1590 * For most methods see the LocalPointerBase base class. 1591 * 1592 * @see LocalPointerBase 1593 * @see LocalPointer 1594 * @stable ICU 4.4 1595 */ 1596 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); 1597 1598 U_NAMESPACE_END 1599 1600 #endif 1601 1602 1603 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |