SFML/System/Utf.inl

0001 ////////////////////////////////////////////////////////////
0002 //
0003 // SFML - Simple and Fast Multimedia Library
0004 // Copyright (C) 2007-2023 Laurent Gomila (laurent@sfml-dev.org)
0005 //
0006 // This software is provided 'as-is', without any express or implied warranty.
0007 // In no event will the authors be held liable for any damages arising from the use of this software.
0008 //
0009 // Permission is granted to anyone to use this software for any purpose,
0010 // including commercial applications, and to alter it and redistribute it freely,
0011 // subject to the following restrictions:
0012 //
0013 // 1. The origin of this software must not be misrepresented;
0014 //    you must not claim that you wrote the original software.
0015 //    If you use this software in a product, an acknowledgment
0016 //    in the product documentation would be appreciated but is not required.
0017 //
0018 // 2. Altered source versions must be plainly marked as such,
0019 //    and must not be misrepresented as being the original software.
0020 //
0021 // 3. This notice may not be removed or altered from any source distribution.
0022 //
0023 ////////////////////////////////////////////////////////////
0024
0025
0026 ////////////////////////////////////////////////////////////
0027 // References:
0028 //
0029 // https://www.unicode.org/
0030 // https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
0031 // https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
0032 // https://people.w3.org/rishida/scripts/uniview/conversion
0033 //
0034 ////////////////////////////////////////////////////////////
0035
0036
0037 ////////////////////////////////////////////////////////////
0038 template <typename In>
0039 In Utf<8>::decode(In begin, In end, Uint32& output, Uint32 replacement)
0040 {
0041     // Some useful precomputed data
0042     static const int trailing[256] =
0043     {
0044         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0045         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0046         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0047         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0048         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0049         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0050         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0051         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
0052     };
0053     static const Uint32 offsets[6] =
0054     {
0055         0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
0056     };
0057
0058     // decode the character
0059     int trailingBytes = trailing[static_cast<Uint8>(*begin)];
0060     if (begin + trailingBytes < end)
0061     {
0062         output = 0;
0063         switch (trailingBytes)
0064         {
0065             case 5: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0066             case 4: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0067             case 3: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0068             case 2: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0069             case 1: output += static_cast<Uint8>(*begin++); output <<= 6; // fallthrough
0070             case 0: output += static_cast<Uint8>(*begin++);
0071         }
0072         output -= offsets[trailingBytes];
0073     }
0074     else
0075     {
0076         // Incomplete character
0077         begin = end;
0078         output = replacement;
0079     }
0080
0081     return begin;
0082 }
0083
0084
0085 ////////////////////////////////////////////////////////////
0086 template <typename Out>
0087 Out Utf<8>::encode(Uint32 input, Out output, Uint8 replacement)
0088 {
0089     // Some useful precomputed data
0090     static const Uint8 firstBytes[7] =
0091     {
0092         0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
0093     };
0094
0095     // encode the character
0096     if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
0097     {
0098         // Invalid character
0099         if (replacement)
0100             output = std::copy(&replacement, &replacement + 1, output);
0101     }
0102     else
0103     {
0104         // Valid character
0105
0106         // Get the number of bytes to write
0107         std::size_t bytestoWrite = 1;
0108         if      (input <  0x80)       bytestoWrite = 1;
0109         else if (input <  0x800)      bytestoWrite = 2;
0110         else if (input <  0x10000)    bytestoWrite = 3;
0111         else if (input <= 0x0010FFFF) bytestoWrite = 4;
0112
0113         // Extract the bytes to write
0114         Uint8 bytes[4];
0115         switch (bytestoWrite)
0116         {
0117             case 4: bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0118             case 3: bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0119             case 2: bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6; // fallthrough
0120             case 1: bytes[0] = static_cast<Uint8> (input | firstBytes[bytestoWrite]);
0121         }
0122
0123         // Add them to the output
0124         output = std::copy(bytes, bytes + bytestoWrite, output);
0125     }
0126
0127     return output;
0128 }
0129
0130
0131 ////////////////////////////////////////////////////////////
0132 template <typename In>
0133 In Utf<8>::next(In begin, In end)
0134 {
0135     Uint32 codepoint;
0136     return decode(begin, end, codepoint);
0137 }
0138
0139
0140 ////////////////////////////////////////////////////////////
0141 template <typename In>
0142 std::size_t Utf<8>::count(In begin, In end)
0143 {
0144     std::size_t length = 0;
0145     while (begin < end)
0146     {
0147         begin = next(begin, end);
0148         ++length;
0149     }
0150
0151     return length;
0152 }
0153
0154
0155 ////////////////////////////////////////////////////////////
0156 template <typename In, typename Out>
0157 Out Utf<8>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0158 {
0159     while (begin < end)
0160     {
0161         Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
0162         output = encode(codepoint, output);
0163     }
0164
0165     return output;
0166 }
0167
0168
0169 ////////////////////////////////////////////////////////////
0170 template <typename In, typename Out>
0171 Out Utf<8>::fromWide(In begin, In end, Out output)
0172 {
0173     while (begin < end)
0174     {
0175         Uint32 codepoint = Utf<32>::decodeWide(*begin++);
0176         output = encode(codepoint, output);
0177     }
0178
0179     return output;
0180 }
0181
0182
0183 ////////////////////////////////////////////////////////////
0184 template <typename In, typename Out>
0185 Out Utf<8>::fromLatin1(In begin, In end, Out output)
0186 {
0187     // Latin-1 is directly compatible with Unicode encodings,
0188     // and can thus be treated as (a sub-range of) UTF-32
0189     while (begin < end)
0190         output = encode(*begin++, output);
0191
0192     return output;
0193 }
0194
0195
0196 ////////////////////////////////////////////////////////////
0197 template <typename In, typename Out>
0198 Out Utf<8>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0199 {
0200     while (begin < end)
0201     {
0202         Uint32 codepoint;
0203         begin = decode(begin, end, codepoint);
0204         output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
0205     }
0206
0207     return output;
0208 }
0209
0210
0211 ////////////////////////////////////////////////////////////
0212 template <typename In, typename Out>
0213 Out Utf<8>::toWide(In begin, In end, Out output, wchar_t replacement)
0214 {
0215     while (begin < end)
0216     {
0217         Uint32 codepoint;
0218         begin = decode(begin, end, codepoint);
0219         output = Utf<32>::encodeWide(codepoint, output, replacement);
0220     }
0221
0222     return output;
0223 }
0224
0225
0226 ////////////////////////////////////////////////////////////
0227 template <typename In, typename Out>
0228 Out Utf<8>::toLatin1(In begin, In end, Out output, char replacement)
0229 {
0230     // Latin-1 is directly compatible with Unicode encodings,
0231     // and can thus be treated as (a sub-range of) UTF-32
0232     while (begin < end)
0233     {
0234         Uint32 codepoint;
0235         begin = decode(begin, end, codepoint);
0236         *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
0237     }
0238
0239     return output;
0240 }
0241
0242
0243 ////////////////////////////////////////////////////////////
0244 template <typename In, typename Out>
0245 Out Utf<8>::toUtf8(In begin, In end, Out output)
0246 {
0247     return std::copy(begin, end, output);
0248 }
0249
0250
0251 ////////////////////////////////////////////////////////////
0252 template <typename In, typename Out>
0253 Out Utf<8>::toUtf16(In begin, In end, Out output)
0254 {
0255     while (begin < end)
0256     {
0257         Uint32 codepoint;
0258         begin = decode(begin, end, codepoint);
0259         output = Utf<16>::encode(codepoint, output);
0260     }
0261
0262     return output;
0263 }
0264
0265
0266 ////////////////////////////////////////////////////////////
0267 template <typename In, typename Out>
0268 Out Utf<8>::toUtf32(In begin, In end, Out output)
0269 {
0270     while (begin < end)
0271     {
0272         Uint32 codepoint;
0273         begin = decode(begin, end, codepoint);
0274         *output++ = codepoint;
0275     }
0276
0277     return output;
0278 }
0279
0280
0281 ////////////////////////////////////////////////////////////
0282 template <typename In>
0283 In Utf<16>::decode(In begin, In end, Uint32& output, Uint32 replacement)
0284 {
0285     Uint16 first = *begin++;
0286
0287     // If it's a surrogate pair, first convert to a single UTF-32 character
0288     if ((first >= 0xD800) && (first <= 0xDBFF))
0289     {
0290         if (begin < end)
0291         {
0292             Uint32 second = *begin++;
0293             if ((second >= 0xDC00) && (second <= 0xDFFF))
0294             {
0295                 // The second element is valid: convert the two elements to a UTF-32 character
0296                 output = ((first - 0xD800u) << 10) + (second - 0xDC00) + 0x0010000;
0297             }
0298             else
0299             {
0300                 // Invalid character
0301                 output = replacement;
0302             }
0303         }
0304         else
0305         {
0306             // Invalid character
0307             begin = end;
0308             output = replacement;
0309         }
0310     }
0311     else
0312     {
0313         // We can make a direct copy
0314         output = first;
0315     }
0316
0317     return begin;
0318 }
0319
0320
0321 ////////////////////////////////////////////////////////////
0322 template <typename Out>
0323 Out Utf<16>::encode(Uint32 input, Out output, Uint16 replacement)
0324 {
0325     if (input <= 0xFFFF)
0326     {
0327         // The character can be copied directly, we just need to check if it's in the valid range
0328         if ((input >= 0xD800) && (input <= 0xDFFF))
0329         {
0330             // Invalid character (this range is reserved)
0331             if (replacement)
0332                 *output++ = replacement;
0333         }
0334         else
0335         {
0336             // Valid character directly convertible to a single UTF-16 character
0337             *output++ = static_cast<Uint16>(input);
0338         }
0339     }
0340     else if (input > 0x0010FFFF)
0341     {
0342         // Invalid character (greater than the maximum Unicode value)
0343         if (replacement)
0344             *output++ = replacement;
0345     }
0346     else
0347     {
0348         // The input character will be converted to two UTF-16 elements
0349         input -= 0x0010000;
0350         *output++ = static_cast<Uint16>((input >> 10)     + 0xD800);
0351         *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
0352     }
0353
0354     return output;
0355 }
0356
0357
0358 ////////////////////////////////////////////////////////////
0359 template <typename In>
0360 In Utf<16>::next(In begin, In end)
0361 {
0362     Uint32 codepoint;
0363     return decode(begin, end, codepoint);
0364 }
0365
0366
0367 ////////////////////////////////////////////////////////////
0368 template <typename In>
0369 std::size_t Utf<16>::count(In begin, In end)
0370 {
0371     std::size_t length = 0;
0372     while (begin < end)
0373     {
0374         begin = next(begin, end);
0375         ++length;
0376     }
0377
0378     return length;
0379 }
0380
0381
0382 ////////////////////////////////////////////////////////////
0383 template <typename In, typename Out>
0384 Out Utf<16>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0385 {
0386     while (begin < end)
0387     {
0388         Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
0389         output = encode(codepoint, output);
0390     }
0391
0392     return output;
0393 }
0394
0395
0396 ////////////////////////////////////////////////////////////
0397 template <typename In, typename Out>
0398 Out Utf<16>::fromWide(In begin, In end, Out output)
0399 {
0400     while (begin < end)
0401     {
0402         Uint32 codepoint = Utf<32>::decodeWide(*begin++);
0403         output = encode(codepoint, output);
0404     }
0405
0406     return output;
0407 }
0408
0409
0410 ////////////////////////////////////////////////////////////
0411 template <typename In, typename Out>
0412 Out Utf<16>::fromLatin1(In begin, In end, Out output)
0413 {
0414     // Latin-1 is directly compatible with Unicode encodings,
0415     // and can thus be treated as (a sub-range of) UTF-32
0416     return std::copy(begin, end, output);
0417 }
0418
0419
0420 ////////////////////////////////////////////////////////////
0421 template <typename In, typename Out>
0422 Out Utf<16>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0423 {
0424     while (begin < end)
0425     {
0426         Uint32 codepoint;
0427         begin = decode(begin, end, codepoint);
0428         output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
0429     }
0430
0431     return output;
0432 }
0433
0434
0435 ////////////////////////////////////////////////////////////
0436 template <typename In, typename Out>
0437 Out Utf<16>::toWide(In begin, In end, Out output, wchar_t replacement)
0438 {
0439     while (begin < end)
0440     {
0441         Uint32 codepoint;
0442         begin = decode(begin, end, codepoint);
0443         output = Utf<32>::encodeWide(codepoint, output, replacement);
0444     }
0445
0446     return output;
0447 }
0448
0449
0450 ////////////////////////////////////////////////////////////
0451 template <typename In, typename Out>
0452 Out Utf<16>::toLatin1(In begin, In end, Out output, char replacement)
0453 {
0454     // Latin-1 is directly compatible with Unicode encodings,
0455     // and can thus be treated as (a sub-range of) UTF-32
0456     while (begin < end)
0457     {
0458         *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
0459         begin++;
0460     }
0461
0462     return output;
0463 }
0464
0465
0466 ////////////////////////////////////////////////////////////
0467 template <typename In, typename Out>
0468 Out Utf<16>::toUtf8(In begin, In end, Out output)
0469 {
0470     while (begin < end)
0471     {
0472         Uint32 codepoint;
0473         begin = decode(begin, end, codepoint);
0474         output = Utf<8>::encode(codepoint, output);
0475     }
0476
0477     return output;
0478 }
0479
0480
0481 ////////////////////////////////////////////////////////////
0482 template <typename In, typename Out>
0483 Out Utf<16>::toUtf16(In begin, In end, Out output)
0484 {
0485     return std::copy(begin, end, output);
0486 }
0487
0488
0489 ////////////////////////////////////////////////////////////
0490 template <typename In, typename Out>
0491 Out Utf<16>::toUtf32(In begin, In end, Out output)
0492 {
0493     while (begin < end)
0494     {
0495         Uint32 codepoint;
0496         begin = decode(begin, end, codepoint);
0497         *output++ = codepoint;
0498     }
0499
0500     return output;
0501 }
0502
0503
0504 ////////////////////////////////////////////////////////////
0505 template <typename In>
0506 In Utf<32>::decode(In begin, In /*end*/, Uint32& output, Uint32 /*replacement*/)
0507 {
0508     output = *begin++;
0509     return begin;
0510 }
0511
0512
0513 ////////////////////////////////////////////////////////////
0514 template <typename Out>
0515 Out Utf<32>::encode(Uint32 input, Out output, Uint32 /*replacement*/)
0516 {
0517     *output++ = input;
0518     return output;
0519 }
0520
0521
0522 ////////////////////////////////////////////////////////////
0523 template <typename In>
0524 In Utf<32>::next(In begin, In /*end*/)
0525 {
0526     return ++begin;
0527 }
0528
0529
0530 ////////////////////////////////////////////////////////////
0531 template <typename In>
0532 std::size_t Utf<32>::count(In begin, In end)
0533 {
0534     return begin - end;
0535 }
0536
0537
0538 ////////////////////////////////////////////////////////////
0539 template <typename In, typename Out>
0540 Out Utf<32>::fromAnsi(In begin, In end, Out output, const std::locale& locale)
0541 {
0542     while (begin < end)
0543         *output++ = decodeAnsi(*begin++, locale);
0544
0545     return output;
0546 }
0547
0548
0549 ////////////////////////////////////////////////////////////
0550 template <typename In, typename Out>
0551 Out Utf<32>::fromWide(In begin, In end, Out output)
0552 {
0553     while (begin < end)
0554         *output++ = decodeWide(*begin++);
0555
0556     return output;
0557 }
0558
0559
0560 ////////////////////////////////////////////////////////////
0561 template <typename In, typename Out>
0562 Out Utf<32>::fromLatin1(In begin, In end, Out output)
0563 {
0564     // Latin-1 is directly compatible with Unicode encodings,
0565     // and can thus be treated as (a sub-range of) UTF-32
0566     return std::copy(begin, end, output);
0567 }
0568
0569
0570 ////////////////////////////////////////////////////////////
0571 template <typename In, typename Out>
0572 Out Utf<32>::toAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
0573 {
0574     while (begin < end)
0575         output = encodeAnsi(*begin++, output, replacement, locale);
0576
0577     return output;
0578 }
0579
0580
0581 ////////////////////////////////////////////////////////////
0582 template <typename In, typename Out>
0583 Out Utf<32>::toWide(In begin, In end, Out output, wchar_t replacement)
0584 {
0585     while (begin < end)
0586         output = encodeWide(*begin++, output, replacement);
0587
0588     return output;
0589 }
0590
0591
0592 ////////////////////////////////////////////////////////////
0593 template <typename In, typename Out>
0594 Out Utf<32>::toLatin1(In begin, In end, Out output, char replacement)
0595 {
0596     // Latin-1 is directly compatible with Unicode encodings,
0597     // and can thus be treated as (a sub-range of) UTF-32
0598     while (begin < end)
0599     {
0600         *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
0601         begin++;
0602     }
0603
0604     return output;
0605 }
0606
0607
0608 ////////////////////////////////////////////////////////////
0609 template <typename In, typename Out>
0610 Out Utf<32>::toUtf8(In begin, In end, Out output)
0611 {
0612     while (begin < end)
0613         output = Utf<8>::encode(*begin++, output);
0614
0615     return output;
0616 }
0617
0618 ////////////////////////////////////////////////////////////
0619 template <typename In, typename Out>
0620 Out Utf<32>::toUtf16(In begin, In end, Out output)
0621 {
0622     while (begin < end)
0623         output = Utf<16>::encode(*begin++, output);
0624
0625     return output;
0626 }
0627
0628
0629 ////////////////////////////////////////////////////////////
0630 template <typename In, typename Out>
0631 Out Utf<32>::toUtf32(In begin, In end, Out output)
0632 {
0633     return std::copy(begin, end, output);
0634 }
0635
0636
0637 ////////////////////////////////////////////////////////////
0638 template <typename In>
0639 Uint32 Utf<32>::decodeAnsi(In input, const std::locale& locale)
0640 {
0641     // On Windows, GCC's standard library (glibc++) has almost
0642     // no support for Unicode stuff. As a consequence, in this
0643     // context we can only use the default locale and ignore
0644     // the one passed as parameter.
0645
0646     #if defined(SFML_SYSTEM_WINDOWS) &&                       /* if Windows ... */                          \
0647        (defined(__GLIBCPP__) || defined (__GLIBCXX__)) &&     /* ... and standard library is glibc++ ... */ \
0648       !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
0649
0650         (void)locale; // to avoid warnings
0651
0652         wchar_t character = 0;
0653         mbtowc(&character, &input, 1);
0654         return static_cast<Uint32>(character);
0655
0656     #else
0657
0658         // Get the facet of the locale which deals with character conversion
0659         const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
0660
0661         // Use the facet to convert each character of the input string
0662         return static_cast<Uint32>(facet.widen(input));
0663
0664     #endif
0665 }
0666
0667
0668 ////////////////////////////////////////////////////////////
0669 template <typename In>
0670 Uint32 Utf<32>::decodeWide(In input)
0671 {
0672     // The encoding of wide characters is not well defined and is left to the system;
0673     // however we can safely assume that it is UCS-2 on Windows and
0674     // UCS-4 on Unix systems.
0675     // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
0676     // and UCS-4 *is* UTF-32).
0677
0678     return static_cast<Uint32>(input);
0679 }
0680
0681
0682 ////////////////////////////////////////////////////////////
0683 template <typename Out>
0684 Out Utf<32>::encodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
0685 {
0686     // On Windows, gcc's standard library (glibc++) has almost
0687     // no support for Unicode stuff. As a consequence, in this
0688     // context we can only use the default locale and ignore
0689     // the one passed as parameter.
0690
0691     #if defined(SFML_SYSTEM_WINDOWS) &&                       /* if Windows ... */                          \
0692        (defined(__GLIBCPP__) || defined (__GLIBCXX__)) &&     /* ... and standard library is glibc++ ... */ \
0693       !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
0694
0695         (void)locale; // to avoid warnings
0696
0697         char character = 0;
0698         if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
0699             *output++ = character;
0700         else if (replacement)
0701             *output++ = replacement;
0702
0703         return output;
0704
0705     #else
0706
0707         // Get the facet of the locale which deals with character conversion
0708         const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
0709
0710         // Use the facet to convert each character of the input string
0711         *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
0712
0713         return output;
0714
0715     #endif
0716 }
0717
0718
0719 ////////////////////////////////////////////////////////////
0720 template <typename Out>
0721 Out Utf<32>::encodeWide(Uint32 codepoint, Out output, wchar_t replacement)
0722 {
0723     // The encoding of wide characters is not well defined and is left to the system;
0724     // however we can safely assume that it is UCS-2 on Windows and
0725     // UCS-4 on Unix systems.
0726     // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
0727     // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
0728
0729     switch (sizeof(wchar_t))
0730     {
0731         case 4:
0732         {
0733             *output++ = static_cast<wchar_t>(codepoint);
0734             break;
0735         }
0736
0737         default:
0738         {
0739             if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
0740             {
0741                 *output++ = static_cast<wchar_t>(codepoint);
0742             }
0743             else if (replacement)
0744             {
0745                 *output++ = replacement;
0746             }
0747             break;
0748         }
0749     }
0750
0751     return output;
0752 }