Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:50:09

0001 // Copyright (c) 2022 Klemens D. Morgenstern
0002 //
0003 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0004 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0005 #ifndef BOOST_PROCESS_V2_DETAIL_IMPL_UTF8_HPP
0006 #define BOOST_PROCESS_V2_DETAIL_IMPL_UTF8_HPP
0007 
0008 #include <boost/process/v2/detail/utf8.hpp>
0009 #include <boost/process/v2/detail/config.hpp>
0010 #include <boost/process/v2/detail/last_error.hpp>
0011 #include <boost/process/v2/error.hpp>
0012 
0013 #if defined(BOOST_PROCESS_V2_WINDOWS)
0014 #include <Windows.h>
0015 #endif
0016 
0017 BOOST_PROCESS_V2_BEGIN_NAMESPACE
0018 
0019 namespace detail
0020 {
0021 
0022 #if defined(BOOST_PROCESS_V2_WINDOWS)
0023 
0024 inline void handle_error(error_code & ec)
0025 {
0026     const auto err = ::GetLastError();
0027     switch (err)
0028     {
0029     case ERROR_INSUFFICIENT_BUFFER:
0030         BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::utf8_category)
0031         break;
0032     case ERROR_NO_UNICODE_TRANSLATION:
0033         BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::utf8_category)
0034         break;
0035     default:
0036         BOOST_PROCESS_V2_ASSIGN_EC(ec, err, system_category())
0037     }
0038 }
0039 
0040 std::size_t size_as_utf8(const wchar_t * in, std::size_t size, error_code & ec)
0041 {
0042     auto res = WideCharToMultiByte(
0043                           CP_UTF8,                // CodePage,
0044                           0,                      // dwFlags,
0045                           in,                     // lpWideCharStr,
0046                           static_cast<int>(size), // cchWideChar,
0047                           nullptr,                // lpMultiByteStr,
0048                           0,                      // cbMultiByte,
0049                           nullptr,                // lpDefaultChar,
0050                           FALSE);                 // lpUsedDefaultChar
0051     
0052     if (res == 0u)
0053         handle_error(ec);
0054     return static_cast<std::size_t>(res);
0055 }
0056 
0057 std::size_t size_as_wide(const char * in, std::size_t size, error_code & ec)
0058 {
0059     auto res = ::MultiByteToWideChar(
0060                           CP_UTF8,                // CodePage
0061                           0,                      // dwFlags
0062                           in,                     // lpMultiByteStr
0063                           static_cast<int>(size), // cbMultiByte
0064                           nullptr,                // lpWideCharStr
0065                           0);                     // cchWideChar
0066     if (res == 0u)
0067         handle_error(ec);
0068 
0069     return static_cast<std::size_t>(res);
0070 }
0071 
0072 std::size_t convert_to_utf8(const wchar_t *in, std::size_t size,  char * out, 
0073                             std::size_t max_size, error_code & ec)
0074 {
0075     auto res = ::WideCharToMultiByte(
0076                     CP_UTF8,                    // CodePage
0077                     0,                          // dwFlags
0078                     in,                         // lpWideCharStr
0079                     static_cast<int>(size),     // cchWideChar
0080                     out,                        // lpMultiByteStr
0081                     static_cast<int>(max_size), // cbMultiByte
0082                     nullptr,                    // lpDefaultChar
0083                     FALSE);                     // lpUsedDefaultChar
0084     if (res == 0u)
0085         handle_error(ec);
0086 
0087     return static_cast<std::size_t>(res);
0088 }
0089 
0090 std::size_t convert_to_wide(const char *in, std::size_t size,  wchar_t * out, 
0091                             std::size_t max_size, error_code & ec)
0092 {
0093     auto res = ::MultiByteToWideChar(
0094                           CP_UTF8,                     // CodePage
0095                           0,                           // dwFlags
0096                           in,                          // lpMultiByteStr
0097                           static_cast<int>(size),      // cbMultiByte
0098                           out,                         // lpWideCharStr
0099                           static_cast<int>(max_size)); // cchWideChar
0100     if (res == 0u)
0101         handle_error(ec);
0102 
0103     return static_cast<std::size_t>(res);
0104 }
0105 
0106 #else
0107 
0108 
0109 template<std::size_t s>
0110 inline int get_cont_octet_out_count_impl(wchar_t word) {
0111     if (word < 0x80) {
0112         return 0;
0113     }
0114     if (word < 0x800) {
0115         return 1;
0116     }
0117     return 2;
0118 }
0119 
0120 template<>
0121 inline int get_cont_octet_out_count_impl<4>(wchar_t word) {
0122     if (word < 0x80) {
0123         return 0;
0124     }
0125     if (word < 0x800) {
0126         return 1;
0127     }
0128 
0129     // Note that the following code will generate warnings on some platforms
0130     // where wchar_t is defined as UCS2.  The warnings are superfluous as the
0131     // specialization is never instantiated with such compilers, but this
0132     // can cause problems if warnings are being treated as errors, so we guard
0133     // against that. Including <boost/detail/utf8_codecvt_facet.hpp> as we do
0134     // should be enough to get WCHAR_MAX defined.
0135 #if !defined(WCHAR_MAX)
0136 #   error WCHAR_MAX not defined!
0137 #endif
0138     // cope with VC++ 7.1 or earlier having invalid WCHAR_MAX
0139 #if defined(_MSC_VER) && _MSC_VER <= 1310 // 7.1 or earlier
0140     return 2;
0141 #elif WCHAR_MAX > 0x10000
0142 
0143     if (word < 0x10000) {
0144         return 2;
0145     }
0146     if (word < 0x200000) {
0147         return 3;
0148     }
0149     if (word < 0x4000000) {
0150         return 4;
0151     }
0152     return 5;
0153 
0154 #else
0155     return 2;
0156 #endif
0157 }
0158 
0159 inline int get_cont_octet_out_count(wchar_t word)
0160 {
0161     return detail::get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);
0162 }
0163 
0164 // copied from boost/detail/utf8_codecvt_facet.ipp
0165 // Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
0166 // Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
0167 
0168 inline unsigned int get_octet_count(unsigned char lead_octet)
0169 {
0170     // if the 0-bit (MSB) is 0, then 1 character
0171     if (lead_octet <= 0x7f) return 1;
0172 
0173     // Otherwise the count number of consecutive 1 bits starting at MSB
0174 //    assert(0xc0 <= lead_octet && lead_octet <= 0xfd);
0175 
0176     if (0xc0 <= lead_octet && lead_octet <= 0xdf) return 2;
0177     else if (0xe0 <= lead_octet && lead_octet <= 0xef) return 3;
0178     else if (0xf0 <= lead_octet && lead_octet <= 0xf7) return 4;
0179     else if (0xf8 <= lead_octet && lead_octet <= 0xfb) return 5;
0180     else return 6;
0181 }
0182 
0183 inline bool invalid_continuing_octet(unsigned char octet_1) {
0184     return (octet_1 < 0x80|| 0xbf< octet_1);
0185 }
0186 
0187 inline unsigned int get_cont_octet_count(unsigned char lead_octet)
0188 {
0189     return get_octet_count(lead_octet) - 1;
0190 }
0191 
0192 inline const wchar_t * get_octet1_modifier_table() noexcept
0193 {
0194     static const wchar_t octet1_modifier_table[] = {
0195         0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
0196     };
0197     return octet1_modifier_table;
0198 }
0199 
0200 
0201 std::size_t size_as_utf8(const wchar_t * in, std::size_t size, error_code & ec)
0202 {
0203     std::size_t res = 0u;
0204     const auto from_end = in + size;
0205     for (auto from = in; from != from_end; from++)
0206         res += get_cont_octet_out_count(*from) + 1;
0207     return res;
0208 }
0209 
0210 std::size_t size_as_wide(const  char   * in, std::size_t size, error_code & ec)
0211 {
0212     const auto from = in;
0213     const auto from_end = from + size;
0214     const char * from_next = from;
0215     for (std::size_t char_count = 0u; from_next < from_end; ++char_count) {
0216         unsigned int octet_count = get_octet_count(*from_next);
0217         // The buffer may represent incomplete characters, so terminate early if one is found
0218         if (octet_count > static_cast<std::size_t>(from_end - from_next))
0219             break;
0220         from_next += octet_count;
0221     }
0222 
0223     return from_next - from;
0224 }
0225 
0226 std::size_t convert_to_utf8(const wchar_t * in, std::size_t size,
0227                             char   * out, std::size_t max_size, error_code & ec)
0228 {
0229 
0230     const wchar_t * from = in;
0231     const wchar_t * from_end = from + size;
0232     const wchar_t * & from_next = from;
0233     char * to = out;
0234     char * to_end = out + max_size;
0235     char * & to_next = to;
0236 
0237     const wchar_t * const octet1_modifier_table = get_octet1_modifier_table();
0238     wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();
0239     while (from != from_end && to != to_end) {
0240 
0241         // Check for invalid UCS-4 character
0242         if (*from  > max_wchar) {
0243             from_next = from;
0244             to_next = to;
0245             BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0246             return 0u;
0247         }
0248 
0249         int cont_octet_count = get_cont_octet_out_count(*from);
0250 
0251         // RG - comment this formula better
0252         int shift_exponent = cont_octet_count * 6;
0253 
0254         // Process the first character
0255         *to++ = static_cast<char>(octet1_modifier_table[cont_octet_count] +
0256                                   (unsigned char)(*from / (1 << shift_exponent)));
0257 
0258         // Process the continuation characters
0259         // Invariants: At the start of the loop:
0260         //   1) 'i' continuing octets have been generated
0261         //   2) '*to' points to the next location to place an octet
0262         //   3) shift_exponent is 6 more than needed for the next octet
0263         int i = 0;
0264         while (i != cont_octet_count && to != to_end) {
0265             shift_exponent -= 6;
0266             *to++ = static_cast<char>(0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));
0267             ++i;
0268         }
0269         // If we filled up the out buffer before encoding the character
0270         if (to == to_end && i != cont_octet_count) {
0271             from_next = from;
0272             to_next = to - (i + 1);
0273             BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0274             return 0u;
0275         }
0276         ++from;
0277     }
0278     from_next = from;
0279     to_next = to;
0280 
0281     // Were we done or did we run out of destination space
0282     if (from != from_end)
0283         BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0284 
0285     return to_next - out;
0286 }
0287 
0288 inline bool invalid_leading_octet(unsigned char octet_1) {
0289     return (0x7f < octet_1 && octet_1 < 0xc0) ||
0290            (octet_1 > 0xfd);
0291 }
0292 
0293 std::size_t convert_to_wide(const  char   * in, std::size_t size,
0294                             wchar_t * out, std::size_t max_size, error_code & ec)
0295 {
0296     const char * from = in;
0297     const char * from_end = from + size;
0298     const char * & from_next = from;
0299     wchar_t * to = out;
0300     wchar_t * to_end = out + max_size;
0301     wchar_t * & to_next = to;
0302 
0303     // Basic algorithm: The first octet determines how many
0304     // octets total make up the UCS-4 character. The remaining
0305     // "continuing octets" all begin with "10". To convert, subtract
0306     // the amount that specifies the number of octets from the first
0307     // octet. Subtract 0x80 (1000 0000) from each continuing octet,
0308     // then mash the whole lot together. Note that each continuing
0309     // octet only uses 6 bits as unique values, so only shift by
0310     // multiples of 6 to combine.
0311     const wchar_t * const octet1_modifier_table = detail::get_octet1_modifier_table();
0312     while (from != from_end && to != to_end) {
0313 
0314         // Error checking on the first octet
0315         if (invalid_leading_octet(*from)) {
0316             from_next = from;
0317             to_next = to;
0318             BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0319             return 0u;
0320         }
0321 
0322         // The first octet is adjusted by a value dependent upon
0323         // the number of "continuing octets" encoding the character
0324         const int cont_octet_count = get_cont_octet_count(*from);
0325 
0326         // The unsigned char conversion is necessary in case char is
0327         // signed (I learned this the hard way)
0328         wchar_t ucs_result =
0329                 (unsigned char)(*from++) - octet1_modifier_table[cont_octet_count];
0330 
0331         // Invariants:
0332         //   1) At the start of the loop, 'i' continuing characters have been
0333         //      processed
0334         //   2) *from points to the next continuing character to be processed.
0335         int i = 0;
0336         while (i != cont_octet_count && from != from_end) {
0337 
0338             // Error checking on continuing characters
0339             if (invalid_continuing_octet(*from)) {
0340                 from_next = from;
0341                 to_next = to;
0342                 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0343                 return 0u;
0344             }
0345 
0346             ucs_result *= (1 << 6);
0347 
0348             // each continuing character has an extra (10xxxxxx)b attached to
0349             // it that must be removed.
0350             ucs_result += (unsigned char)(*from++) - 0x80;
0351             ++i;
0352         }
0353 
0354         // If the buffer ends with an incomplete unicode character...
0355         if (from == from_end && i != cont_octet_count) {
0356             // rewind "from" to before the current character translation
0357             from_next = from - (i + 1);
0358             to_next = to;
0359             BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0360             return 0u;
0361         }
0362         *to++ = ucs_result;
0363     }
0364     from_next = from;
0365     to_next = to;
0366 
0367     if (from != from_end)
0368         BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0369 
0370     return to_next - out;
0371 }
0372 
0373 #endif
0374 
0375 }
0376 
0377 BOOST_PROCESS_V2_END_NAMESPACE
0378 
0379 #endif //BOOST_PROCESS_V2_DETAIL_IMPL_UTF8_HPP