Warning, /include/SFML/System/Utf.inl is written in an unsupported language. File is not indexed.
0001 ////////////////////////////////////////////////////////////
0002 //
0003 // SFML - Simple and Fast Multimedia Library
0004 // Copyright (C) 2007-2023 Laurent Gomila (laurent@sfml-dev.org)
0005 //
0006 // This software is provided 'as-is', without any express or implied warranty.
0007 // In no event will the authors be held liable for any damages arising from the use of this software.
0008 //
0009 // Permission is granted to anyone to use this software for any purpose,
0010 // including commercial applications, and to alter it and redistribute it freely,
0011 // subject to the following restrictions:
0012 //
0013 // 1. The origin of this software must not be misrepresented;
0014 // you must not claim that you wrote the original software.
0015 // If you use this software in a product, an acknowledgment
0016 // in the product documentation would be appreciated but is not required.
0017 //
0018 // 2. Altered source versions must be plainly marked as such,
0019 // and must not be misrepresented as being the original software.
0020 //
0021 // 3. This notice may not be removed or altered from any source distribution.
0022 //
0023 ////////////////////////////////////////////////////////////
0024
0025
0026 ////////////////////////////////////////////////////////////
0027 // References:
0028 //
0029 // https://www.unicode.org/
0030 // https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
0031 // https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
0032 // https://people.w3.org/rishida/scripts/uniview/conversion
0033 //
0034 ////////////////////////////////////////////////////////////
0035
0036
0037 ////////////////////////////////////////////////////////////
0038 template <typename In>
0039 In Utf<8>::decode(In begin, In end, Uint32& output, Uint32 replacement)
0040 {
0041 // Some useful precomputed data
0042 static const int trailing[256] =
0043 {
0044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0050 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0051 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
0052 };
0053 static const Uint32 offsets[6] =
0054 {
0055 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
0056 };
0057
0058 // decode the character
0059 int trailingBytes = trailing[static_cast<Uint8>(*begin)];
0060 if (begin + trailingBytes < end)
0061 {
0062 output = 0;
0063 switch (trailingBytes)
0064 {
0065 case 5: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0066 case 4: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0067 case 3: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0068 case 2: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0069 case 1: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0070 case 0: output += static_cast<Uint8>(*begin++);
0071 }
0072 output -= offsets[trailingBytes];
0073 }
0074 else
0075 {
0076 // Incomplete character
0077 begin = end;
0078 output = replacement;
0079 }
0080
0081 return begin;
0082 }
0083
0084
0085 ////////////////////////////////////////////////////////////
0086 template <typename Out>
0087 Out Utf<8>::encode(Uint32 input, Out output, Uint8 replacement)
0088 {
0089 // Some useful precomputed data
0090 static const Uint8 firstBytes[7] =
0091 {
0092 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
0093 };
0094
0095 // encode the character
0096 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
0097 {
0098 // Invalid character
0099 if (replacement)
0100 output = std::copy(&replacement, &replacement + 1, output);
0101 }
0102 else
0103 {
0104 // Valid character
0105
0106 // Get the number of bytes to write
0107 std::size_t bytestoWrite = 1;
0108 if (input < 0x80) bytestoWrite = 1;
0109 else if (input < 0x800) bytestoWrite = 2;
0110 else if (input < 0x10000) bytestoWrite = 3;
0111 else if (input <= 0x0010FFFF) bytestoWrite = 4;
0112
0113 // Extract the bytes to write
0114 Uint8 bytes[4];
0115 switch (bytestoWrite)
0116 {
0117 case 4: bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0118 case 3: bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0119 case 2: bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0120 case 1: bytes[0] = static_cast<Uint8> (input | firstBytes[bytestoWrite]);
0121 }
0122
0123 // Add them to the output
0124 output = std::copy(bytes, bytes + bytestoWrite, output);
0125 }
0126
0127 return output;
0128 }
0129
0130
0131 ////////////////////////////////////////////////////////////
0132 template <typename In>
0133 In Utf<8>::next(In begin, In end)
0134 {
0135 Uint32 codepoint;
0136 return decode(begin, end, codepoint);
0137 }
0138
0139
0140 ////////////////////////////////////////////////////////////
0141 template <typename In>
0142 std::size_t Utf<8>::count(In begin, In end)
0143 {
0144 std::size_t length = 0;
0145 while (begin < end)
0146 {
0147 begin = next(begin, end);
0148 ++length;
0149 }
0150
0151 return length;
0152 }
0153
0154
0155 ////////////////////////////////////////////////////////////
0156 template <typename In, typename Out>
0157 Out Utf<8>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0158 {
0159 while (begin < end)
0160 {
0161 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
0162 output = encode(codepoint, output);
0163 }
0164
0165 return output;
0166 }
0167
0168
0169 ////////////////////////////////////////////////////////////
0170 template <typename In, typename Out>
0171 Out Utf<8>::fromWide(In begin, In end, Out output)
0172 {
0173 while (begin < end)
0174 {
0175 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
0176 output = encode(codepoint, output);
0177 }
0178
0179 return output;
0180 }
0181
0182
0183 ////////////////////////////////////////////////////////////
0184 template <typename In, typename Out>
0185 Out Utf<8>::fromLatin1(In begin, In end, Out output)
0186 {
0187 // Latin-1 is directly compatible with Unicode encodings,
0188 // and can thus be treated as (a sub-range of) UTF-32
0189 while (begin < end)
0190 output = encode(*begin++, output);
0191
0192 return output;
0193 }
0194
0195
0196 ////////////////////////////////////////////////////////////
0197 template <typename In, typename Out>
0198 Out Utf<8>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0199 {
0200 while (begin < end)
0201 {
0202 Uint32 codepoint;
0203 begin = decode(begin, end, codepoint);
0204 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
0205 }
0206
0207 return output;
0208 }
0209
0210
0211 ////////////////////////////////////////////////////////////
0212 template <typename In, typename Out>
0213 Out Utf<8>::toWide(In begin, In end, Out output, wchar_t replacement)
0214 {
0215 while (begin < end)
0216 {
0217 Uint32 codepoint;
0218 begin = decode(begin, end, codepoint);
0219 output = Utf<32>::encodeWide(codepoint, output, replacement);
0220 }
0221
0222 return output;
0223 }
0224
0225
0226 ////////////////////////////////////////////////////////////
0227 template <typename In, typename Out>
0228 Out Utf<8>::toLatin1(In begin, In end, Out output, char replacement)
0229 {
0230 // Latin-1 is directly compatible with Unicode encodings,
0231 // and can thus be treated as (a sub-range of) UTF-32
0232 while (begin < end)
0233 {
0234 Uint32 codepoint;
0235 begin = decode(begin, end, codepoint);
0236 *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
0237 }
0238
0239 return output;
0240 }
0241
0242
0243 ////////////////////////////////////////////////////////////
0244 template <typename In, typename Out>
0245 Out Utf<8>::toUtf8(In begin, In end, Out output)
0246 {
0247 return std::copy(begin, end, output);
0248 }
0249
0250
0251 ////////////////////////////////////////////////////////////
0252 template <typename In, typename Out>
0253 Out Utf<8>::toUtf16(In begin, In end, Out output)
0254 {
0255 while (begin < end)
0256 {
0257 Uint32 codepoint;
0258 begin = decode(begin, end, codepoint);
0259 output = Utf<16>::encode(codepoint, output);
0260 }
0261
0262 return output;
0263 }
0264
0265
0266 ////////////////////////////////////////////////////////////
0267 template <typename In, typename Out>
0268 Out Utf<8>::toUtf32(In begin, In end, Out output)
0269 {
0270 while (begin < end)
0271 {
0272 Uint32 codepoint;
0273 begin = decode(begin, end, codepoint);
0274 *output++ = codepoint;
0275 }
0276
0277 return output;
0278 }
0279
0280
0281 ////////////////////////////////////////////////////////////
0282 template <typename In>
0283 In Utf<16>::decode(In begin, In end, Uint32& output, Uint32 replacement)
0284 {
0285 Uint16 first = *begin++;
0286
0287 // If it's a surrogate pair, first convert to a single UTF-32 character
0288 if ((first >= 0xD800) && (first <= 0xDBFF))
0289 {
0290 if (begin < end)
0291 {
0292 Uint32 second = *begin++;
0293 if ((second >= 0xDC00) && (second <= 0xDFFF))
0294 {
0295 // The second element is valid: convert the two elements to a UTF-32 character
0296 output = ((first - 0xD800u) << 10) + (second - 0xDC00) + 0x0010000;
0297 }
0298 else
0299 {
0300 // Invalid character
0301 output = replacement;
0302 }
0303 }
0304 else
0305 {
0306 // Invalid character
0307 begin = end;
0308 output = replacement;
0309 }
0310 }
0311 else
0312 {
0313 // We can make a direct copy
0314 output = first;
0315 }
0316
0317 return begin;
0318 }
0319
0320
0321 ////////////////////////////////////////////////////////////
0322 template <typename Out>
0323 Out Utf<16>::encode(Uint32 input, Out output, Uint16 replacement)
0324 {
0325 if (input <= 0xFFFF)
0326 {
0327 // The character can be copied directly, we just need to check if it's in the valid range
0328 if ((input >= 0xD800) && (input <= 0xDFFF))
0329 {
0330 // Invalid character (this range is reserved)
0331 if (replacement)
0332 *output++ = replacement;
0333 }
0334 else
0335 {
0336 // Valid character directly convertible to a single UTF-16 character
0337 *output++ = static_cast<Uint16>(input);
0338 }
0339 }
0340 else if (input > 0x0010FFFF)
0341 {
0342 // Invalid character (greater than the maximum Unicode value)
0343 if (replacement)
0344 *output++ = replacement;
0345 }
0346 else
0347 {
0348 // The input character will be converted to two UTF-16 elements
0349 input -= 0x0010000;
0350 *output++ = static_cast<Uint16>((input >> 10) + 0xD800);
0351 *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
0352 }
0353
0354 return output;
0355 }
0356
0357
0358 ////////////////////////////////////////////////////////////
0359 template <typename In>
0360 In Utf<16>::next(In begin, In end)
0361 {
0362 Uint32 codepoint;
0363 return decode(begin, end, codepoint);
0364 }
0365
0366
0367 ////////////////////////////////////////////////////////////
0368 template <typename In>
0369 std::size_t Utf<16>::count(In begin, In end)
0370 {
0371 std::size_t length = 0;
0372 while (begin < end)
0373 {
0374 begin = next(begin, end);
0375 ++length;
0376 }
0377
0378 return length;
0379 }
0380
0381
0382 ////////////////////////////////////////////////////////////
0383 template <typename In, typename Out>
0384 Out Utf<16>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0385 {
0386 while (begin < end)
0387 {
0388 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
0389 output = encode(codepoint, output);
0390 }
0391
0392 return output;
0393 }
0394
0395
0396 ////////////////////////////////////////////////////////////
0397 template <typename In, typename Out>
0398 Out Utf<16>::fromWide(In begin, In end, Out output)
0399 {
0400 while (begin < end)
0401 {
0402 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
0403 output = encode(codepoint, output);
0404 }
0405
0406 return output;
0407 }
0408
0409
0410 ////////////////////////////////////////////////////////////
0411 template <typename In, typename Out>
0412 Out Utf<16>::fromLatin1(In begin, In end, Out output)
0413 {
0414 // Latin-1 is directly compatible with Unicode encodings,
0415 // and can thus be treated as (a sub-range of) UTF-32
0416 return std::copy(begin, end, output);
0417 }
0418
0419
0420 ////////////////////////////////////////////////////////////
0421 template <typename In, typename Out>
0422 Out Utf<16>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0423 {
0424 while (begin < end)
0425 {
0426 Uint32 codepoint;
0427 begin = decode(begin, end, codepoint);
0428 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
0429 }
0430
0431 return output;
0432 }
0433
0434
0435 ////////////////////////////////////////////////////////////
0436 template <typename In, typename Out>
0437 Out Utf<16>::toWide(In begin, In end, Out output, wchar_t replacement)
0438 {
0439 while (begin < end)
0440 {
0441 Uint32 codepoint;
0442 begin = decode(begin, end, codepoint);
0443 output = Utf<32>::encodeWide(codepoint, output, replacement);
0444 }
0445
0446 return output;
0447 }
0448
0449
0450 ////////////////////////////////////////////////////////////
0451 template <typename In, typename Out>
0452 Out Utf<16>::toLatin1(In begin, In end, Out output, char replacement)
0453 {
0454 // Latin-1 is directly compatible with Unicode encodings,
0455 // and can thus be treated as (a sub-range of) UTF-32
0456 while (begin < end)
0457 {
0458 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
0459 begin++;
0460 }
0461
0462 return output;
0463 }
0464
0465
0466 ////////////////////////////////////////////////////////////
0467 template <typename In, typename Out>
0468 Out Utf<16>::toUtf8(In begin, In end, Out output)
0469 {
0470 while (begin < end)
0471 {
0472 Uint32 codepoint;
0473 begin = decode(begin, end, codepoint);
0474 output = Utf<8>::encode(codepoint, output);
0475 }
0476
0477 return output;
0478 }
0479
0480
0481 ////////////////////////////////////////////////////////////
0482 template <typename In, typename Out>
0483 Out Utf<16>::toUtf16(In begin, In end, Out output)
0484 {
0485 return std::copy(begin, end, output);
0486 }
0487
0488
0489 ////////////////////////////////////////////////////////////
0490 template <typename In, typename Out>
0491 Out Utf<16>::toUtf32(In begin, In end, Out output)
0492 {
0493 while (begin < end)
0494 {
0495 Uint32 codepoint;
0496 begin = decode(begin, end, codepoint);
0497 *output++ = codepoint;
0498 }
0499
0500 return output;
0501 }
0502
0503
0504 ////////////////////////////////////////////////////////////
0505 template <typename In>
0506 In Utf<32>::decode(In begin, In /*end*/, Uint32& output, Uint32 /*replacement*/)
0507 {
0508 output = *begin++;
0509 return begin;
0510 }
0511
0512
0513 ////////////////////////////////////////////////////////////
0514 template <typename Out>
0515 Out Utf<32>::encode(Uint32 input, Out output, Uint32 /*replacement*/)
0516 {
0517 *output++ = input;
0518 return output;
0519 }
0520
0521
0522 ////////////////////////////////////////////////////////////
0523 template <typename In>
0524 In Utf<32>::next(In begin, In /*end*/)
0525 {
0526 return ++begin;
0527 }
0528
0529
0530 ////////////////////////////////////////////////////////////
0531 template <typename In>
0532 std::size_t Utf<32>::count(In begin, In end)
0533 {
0534 return begin - end;
0535 }
0536
0537
0538 ////////////////////////////////////////////////////////////
0539 template <typename In, typename Out>
0540 Out Utf<32>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0541 {
0542 while (begin < end)
0543 *output++ = decodeAnsi(*begin++, locale);
0544
0545 return output;
0546 }
0547
0548
0549 ////////////////////////////////////////////////////////////
0550 template <typename In, typename Out>
0551 Out Utf<32>::fromWide(In begin, In end, Out output)
0552 {
0553 while (begin < end)
0554 *output++ = decodeWide(*begin++);
0555
0556 return output;
0557 }
0558
0559
0560 ////////////////////////////////////////////////////////////
0561 template <typename In, typename Out>
0562 Out Utf<32>::fromLatin1(In begin, In end, Out output)
0563 {
0564 // Latin-1 is directly compatible with Unicode encodings,
0565 // and can thus be treated as (a sub-range of) UTF-32
0566 return std::copy(begin, end, output);
0567 }
0568
0569
0570 ////////////////////////////////////////////////////////////
0571 template <typename In, typename Out>
0572 Out Utf<32>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0573 {
0574 while (begin < end)
0575 output = encodeAnsi(*begin++, output, replacement, locale);
0576
0577 return output;
0578 }
0579
0580
0581 ////////////////////////////////////////////////////////////
0582 template <typename In, typename Out>
0583 Out Utf<32>::toWide(In begin, In end, Out output, wchar_t replacement)
0584 {
0585 while (begin < end)
0586 output = encodeWide(*begin++, output, replacement);
0587
0588 return output;
0589 }
0590
0591
0592 ////////////////////////////////////////////////////////////
0593 template <typename In, typename Out>
0594 Out Utf<32>::toLatin1(In begin, In end, Out output, char replacement)
0595 {
0596 // Latin-1 is directly compatible with Unicode encodings,
0597 // and can thus be treated as (a sub-range of) UTF-32
0598 while (begin < end)
0599 {
0600 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
0601 begin++;
0602 }
0603
0604 return output;
0605 }
0606
0607
0608 ////////////////////////////////////////////////////////////
0609 template <typename In, typename Out>
0610 Out Utf<32>::toUtf8(In begin, In end, Out output)
0611 {
0612 while (begin < end)
0613 output = Utf<8>::encode(*begin++, output);
0614
0615 return output;
0616 }
0617
0618 ////////////////////////////////////////////////////////////
0619 template <typename In, typename Out>
0620 Out Utf<32>::toUtf16(In begin, In end, Out output)
0621 {
0622 while (begin < end)
0623 output = Utf<16>::encode(*begin++, output);
0624
0625 return output;
0626 }
0627
0628
0629 ////////////////////////////////////////////////////////////
0630 template <typename In, typename Out>
0631 Out Utf<32>::toUtf32(In begin, In end, Out output)
0632 {
0633 return std::copy(begin, end, output);
0634 }
0635
0636
0637 ////////////////////////////////////////////////////////////
0638 template <typename In>
0639 Uint32 Utf<32>::decodeAnsi(In input, const std::locale& locale)
0640 {
0641 // On Windows, GCC's standard library (glibc++) has almost
0642 // no support for Unicode stuff. As a consequence, in this
0643 // context we can only use the default locale and ignore
0644 // the one passed as parameter.
0645
0646 #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
0647 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
0648 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
0649
0650 (void)locale; // to avoid warnings
0651
0652 wchar_t character = 0;
0653 mbtowc(&character, &input, 1);
0654 return static_cast<Uint32>(character);
0655
0656 #else
0657
0658 // Get the facet of the locale which deals with character conversion
0659 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
0660
0661 // Use the facet to convert each character of the input string
0662 return static_cast<Uint32>(facet.widen(input));
0663
0664 #endif
0665 }
0666
0667
0668 ////////////////////////////////////////////////////////////
0669 template <typename In>
0670 Uint32 Utf<32>::decodeWide(In input)
0671 {
0672 // The encoding of wide characters is not well defined and is left to the system;
0673 // however we can safely assume that it is UCS-2 on Windows and
0674 // UCS-4 on Unix systems.
0675 // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
0676 // and UCS-4 *is* UTF-32).
0677
0678 return static_cast<Uint32>(input);
0679 }
0680
0681
0682 ////////////////////////////////////////////////////////////
0683 template <typename Out>
0684 Out Utf<32>::encodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
0685 {
0686 // On Windows, gcc's standard library (glibc++) has almost
0687 // no support for Unicode stuff. As a consequence, in this
0688 // context we can only use the default locale and ignore
0689 // the one passed as parameter.
0690
0691 #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
0692 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
0693 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
0694
0695 (void)locale; // to avoid warnings
0696
0697 char character = 0;
0698 if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
0699 *output++ = character;
0700 else if (replacement)
0701 *output++ = replacement;
0702
0703 return output;
0704
0705 #else
0706
0707 // Get the facet of the locale which deals with character conversion
0708 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
0709
0710 // Use the facet to convert each character of the input string
0711 *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
0712
0713 return output;
0714
0715 #endif
0716 }
0717
0718
0719 ////////////////////////////////////////////////////////////
0720 template <typename Out>
0721 Out Utf<32>::encodeWide(Uint32 codepoint, Out output, wchar_t replacement)
0722 {
0723 // The encoding of wide characters is not well defined and is left to the system;
0724 // however we can safely assume that it is UCS-2 on Windows and
0725 // UCS-4 on Unix systems.
0726 // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
0727 // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
0728
0729 switch (sizeof(wchar_t))
0730 {
0731 case 4:
0732 {
0733 *output++ = static_cast<wchar_t>(codepoint);
0734 break;
0735 }
0736
0737 default:
0738 {
0739 if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
0740 {
0741 *output++ = static_cast<wchar_t>(codepoint);
0742 }
0743 else if (replacement)
0744 {
0745 *output++ = replacement;
0746 }
0747 break;
0748 }
0749 }
0750
0751 return output;
0752 }