Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:04:20

0001 // Created on: 2013-01-28
0002 // Created by: Kirill GAVRILOV
0003 // Copyright (c) 2013-2014 OPEN CASCADE SAS
0004 //
0005 // This file is part of Open CASCADE Technology software library.
0006 //
0007 // This library is free software; you can redistribute it and/or modify it under
0008 // the terms of the GNU Lesser General Public License version 2.1 as published
0009 // by the Free Software Foundation, with special exception defined in the file
0010 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
0011 // distribution for complete text of the license and disclaimer of any warranty.
0012 //
0013 // Alternatively, this file may be used under the terms of Open CASCADE
0014 // commercial license or contractual agreement.
0015 
0016 #ifndef NCollection_UtfString_HeaderFile
0017 #define NCollection_UtfString_HeaderFile
0018 
0019 #include <NCollection_UtfIterator.hxx>
0020 
0021 #include <cstring>
0022 #include <cstdlib>
0023 
0024 //! This template class represent constant UTF-* string.
0025 //! String stored in memory continuously, always NULL-terminated
0026 //! and can be used as standard C-string using ToCString() method.
0027 //!
0028 //! Notice that changing the string is not allowed
0029 //! and any modifications should produce new string.
0030 //!
0031 //! In comments to this class, terms "Unicode symbol" is used as 
0032 //! synonym of "Unicode code point".
0033 template<typename Type>
0034 class NCollection_UtfString
0035 {
0036 
0037 public:
0038 
0039   NCollection_UtfIterator<Type> Iterator() const
0040   {
0041     return NCollection_UtfIterator<Type> (myString);
0042   }
0043 
0044   //! @return the size of the buffer in bytes, excluding NULL-termination symbol
0045   Standard_Integer Size() const
0046   {
0047     return mySize;
0048   }
0049 
0050   //! @return the length of the string in Unicode symbols
0051   Standard_Integer Length() const
0052   {
0053     return myLength;
0054   }
0055 
0056   //! Retrieve Unicode symbol at specified position.
0057   //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
0058   //! @param theCharIndex the index of the symbol, should be lesser than Length()
0059   //! @return the Unicode symbol value
0060   Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const;
0061 
0062   //! Retrieve string buffer at specified position.
0063   //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
0064   //! @param theCharIndex the index of the symbol, should be less than Length()
0065   //!        (first symbol of the string has index 0)
0066   //! @return the pointer to the symbol
0067   const Type* GetCharBuffer (const Standard_Integer theCharIndex) const;
0068 
0069   //! Retrieve Unicode symbol at specified position.
0070   //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
0071   Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const
0072   {
0073     return GetChar (theCharIndex);
0074   }
0075 
0076   //! Initialize empty string.
0077   NCollection_UtfString();
0078 
0079   //! Copy constructor.
0080   //! @param theCopy string to copy.
0081   NCollection_UtfString (const NCollection_UtfString& theCopy);
0082 
0083   //! Move constructor
0084   NCollection_UtfString (NCollection_UtfString&& theOther);
0085 
0086   //! Copy constructor from UTF-8 string.
0087   //! @param theCopyUtf8 UTF-8 string to copy
0088   //! @param theLength   optional length limit in Unicode symbols (NOT bytes!)
0089   //! The string is copied till NULL symbol or, if theLength >0, 
0090   //! till either NULL or theLength-th symbol (which comes first).
0091   NCollection_UtfString (const char*            theCopyUtf8,
0092                          const Standard_Integer theLength = -1);
0093 
0094   //! Copy constructor from UTF-16 string.
0095   //! @param theCopyUtf16 UTF-16 string to copy
0096   //! @param theLength    the length limit in Unicode symbols (NOT bytes!)
0097   //! The string is copied till NULL symbol or, if theLength >0, 
0098   //! till either NULL or theLength-th symbol (which comes first).
0099   NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16,
0100                          const Standard_Integer    theLength = -1);
0101 
0102   //! Copy constructor from UTF-32 string.
0103   //! @param theCopyUtf32 UTF-32 string to copy
0104   //! @param theLength    the length limit in Unicode symbols (NOT bytes!)
0105   //! The string is copied till NULL symbol or, if theLength >0, 
0106   //! till either NULL or theLength-th symbol (which comes first).
0107   NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32,
0108                          const Standard_Integer    theLength = -1);
0109 
0110 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900)
0111   //! Copy constructor from wide UTF string.
0112   //! @param theCopyUtfWide wide UTF string to copy
0113   //! @param theLength      the length limit in Unicode symbols (NOT bytes!)
0114   //! The string is copied till NULL symbol or, if theLength >0, 
0115   //! till either NULL or theLength-th symbol (which comes first).
0116   //!
0117   //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char.
0118   NCollection_UtfString (const Standard_WideChar* theCopyUtfWide,
0119                          const Standard_Integer   theLength = -1);
0120 #endif
0121 
0122   //! Copy from Unicode string in UTF-8, UTF-16, or UTF-32 encoding,
0123   //! determined by size of TypeFrom character type.
0124   //! @param theStringUtf Unicode string
0125   //! @param theLength    the length limit in Unicode symbols
0126   //! The string is copied till NULL symbol or, if theLength >0, 
0127   //! till either NULL or theLength-th symbol (which comes first).
0128   template <typename TypeFrom>
0129   inline void FromUnicode (const TypeFrom*        theStringUtf,
0130                            const Standard_Integer theLength = -1)
0131   {
0132     NCollection_UtfIterator<TypeFrom> anIterRead (theStringUtf);
0133     if (*anIterRead == 0)
0134     {
0135       // special case
0136       Clear();
0137       return;
0138     }
0139     fromUnicodeImpl (theStringUtf, theLength, anIterRead);
0140   }
0141 
0142   //! Copy from multibyte string in current system locale.
0143   //! @param theString multibyte string
0144   //! @param theLength the length limit in Unicode symbols
0145   //! The string is copied till NULL symbol or, if theLength >0, 
0146   //! till either NULL or theLength-th symbol (which comes first).
0147   void FromLocale (const char*            theString,
0148                    const Standard_Integer theLength = -1);
0149 
0150   //! Destructor.
0151   ~NCollection_UtfString();
0152 
0153   //! Compares this string with another one.
0154   bool IsEqual (const NCollection_UtfString& theCompare) const;
0155 
0156   //! Returns the substring.
0157   //! @param theStart start index (inclusive) of subString
0158   //! @param theEnd   end index   (exclusive) of subString
0159   //! @return the substring
0160   NCollection_UtfString SubString (const Standard_Integer theStart,
0161                                    const Standard_Integer theEnd) const;
0162 
0163   //! Returns NULL-terminated Unicode string.
0164   //! Should not be modified or deleted!
0165   //! @return (const Type* ) pointer to string
0166   const Type* ToCString() const
0167   {
0168     return myString;
0169   }
0170 
0171   //! @return copy in UTF-8 format
0172   const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const;
0173 
0174   //! @return copy in UTF-16 format
0175   const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const;
0176 
0177   //! @return copy in UTF-32 format
0178   const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const;
0179 
0180   //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux)
0181   const NCollection_UtfString<Standard_WideChar> ToUtfWide() const;
0182 
0183   //! Converts the string into string in the current system locale.
0184   //! @param theBuffer    output buffer
0185   //! @param theSizeBytes buffer size in bytes
0186   //! @return true on success
0187   bool ToLocale (char*                  theBuffer,
0188                  const Standard_Integer theSizeBytes) const;
0189 
0190   //! @return true if string is empty
0191   bool IsEmpty() const
0192   {
0193     return myString[0] == Type(0);
0194   }
0195 
0196   //! Zero string.
0197   void Clear();
0198 
0199 public: //! @name assign operators
0200 
0201   //! Copy from another string.
0202   const NCollection_UtfString& Assign (const NCollection_UtfString& theOther);
0203 
0204   //! Exchange the data of two strings (without reallocating memory).
0205   void Swap (NCollection_UtfString& theOther);
0206 
0207   //! Copy from another string.
0208   const NCollection_UtfString& operator= (const NCollection_UtfString& theOther) { return Assign (theOther); }
0209 
0210   //! Move assignment operator.
0211   NCollection_UtfString& operator= (NCollection_UtfString&& theOther) { Swap (theOther); return *this; }
0212 
0213   //! Copy from UTF-8 NULL-terminated string.
0214   const NCollection_UtfString& operator= (const char* theStringUtf8);
0215 
0216   //! Copy from wchar_t UTF NULL-terminated string.
0217   const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide);
0218 
0219   //! Join strings.
0220   NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend);
0221 
0222   //! Join two strings.
0223   friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft,
0224                                           const NCollection_UtfString& theRight)
0225   {
0226     NCollection_UtfString aSumm;
0227     strFree (aSumm.myString);
0228     aSumm.mySize   = theLeft.mySize   + theRight.mySize;
0229     aSumm.myLength = theLeft.myLength + theRight.myLength;
0230     aSumm.myString = strAlloc (aSumm.mySize);
0231 
0232     // copy bytes
0233     strCopy ((Standard_Byte* )aSumm.myString,                  (const Standard_Byte* )theLeft.myString,  theLeft.mySize);
0234     strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize);
0235     return aSumm;
0236   }
0237 
0238 public: //! @name compare operators
0239 
0240   bool operator== (const NCollection_UtfString& theCompare) const
0241   {
0242     return IsEqual (theCompare);
0243   }
0244   bool operator!= (const NCollection_UtfString& theCompare) const;
0245 
0246 private: //! @name low-level methods
0247 
0248   //! Implementation of copy routine for string of the same type
0249   void fromUnicodeImpl (const Type* theStringUtf, const Standard_Integer theLength, NCollection_UtfIterator<Type>& theIterator)
0250   {
0251     Type* anOldBuffer = myString; // necessary in case of self-copying
0252 
0253     // advance to the end
0254     const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
0255     for(; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) {}
0256 
0257     mySize   = Standard_Integer((Standard_Byte* )theIterator.BufferHere() - (Standard_Byte* )theStringUtf);
0258     myLength = theIterator.Index();
0259     myString = strAlloc (mySize);
0260     strCopy ((Standard_Byte* )myString, (const Standard_Byte* )theStringUtf, mySize);
0261 
0262     strFree (anOldBuffer);
0263   }
0264 
0265   //! Implementation of copy routine for string of other types
0266   template<typename TypeFrom>
0267   void fromUnicodeImpl (typename opencascade::std::enable_if<! opencascade::std::is_same<Type, TypeFrom>::value, const TypeFrom*>::type theStringUtf, 
0268                         const Standard_Integer theLength, NCollection_UtfIterator<TypeFrom>& theIterator)
0269   {
0270     Type* anOldBuffer = myString; // necessary in case of self-copying
0271 
0272     mySize = 0;
0273     const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
0274     for (; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator)
0275     {
0276       mySize += theIterator.template AdvanceBytesUtf<Type>();
0277     }
0278     myLength = theIterator.Index();
0279 
0280     myString = strAlloc (mySize);
0281 
0282     // copy string
0283     theIterator.Init (theStringUtf);
0284     Type* anIterWrite = myString;
0285     for (; *theIterator != 0 && theIterator.Index() < myLength; ++theIterator)
0286     {
0287       anIterWrite = theIterator.GetUtf (anIterWrite);
0288     }
0289 
0290     strFree (anOldBuffer);
0291   }
0292 
0293   //! Allocate NULL-terminated string buffer.
0294   static Type* strAlloc (const Standard_Size theSizeBytes)
0295   {
0296     Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type));
0297     if (aPtr != NULL)
0298     {
0299       // always NULL-terminate the string
0300       aPtr[theSizeBytes / sizeof(Type)] = Type(0);
0301     }
0302     return aPtr;
0303   }
0304 
0305   //! Release string buffer and nullify the pointer.
0306   static void strFree (Type*& thePtr)
0307   {
0308     Standard::Free (thePtr);
0309   }
0310 
0311   //! Provides bytes interface to avoid incorrect pointer arithmetics.
0312   static void strCopy (Standard_Byte*         theStrDst,
0313                        const Standard_Byte*   theStrSrc,
0314                        const Standard_Integer theSizeBytes)
0315   {
0316     std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes);
0317   }
0318 
0319   //! Compare two Unicode strings per-byte.
0320   static bool strAreEqual (const Type*            theString1,
0321                            const Standard_Integer theSizeBytes1,
0322                            const Type*            theString2,
0323                            const Standard_Integer theSizeBytes2)
0324   {
0325     return (theSizeBytes1 == theSizeBytes2)
0326         && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0);
0327   }
0328 
0329 private: //! @name private fields
0330 
0331   Type*            myString; //!< string buffer
0332   Standard_Integer mySize;   //!< buffer size in bytes, excluding NULL-termination symbol
0333   Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol)
0334 
0335 };
0336 
0337 typedef NCollection_UtfString<Standard_Utf8Char>  NCollection_Utf8String;
0338 typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String;
0339 typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String;
0340 typedef NCollection_UtfString<Standard_WideChar>  NCollection_UtfWideString;
0341 
0342 // template implementation (inline methods)
0343 #include "NCollection_UtfString.lxx"
0344 
0345 #endif // _NCollection_UtfString_H__