File indexing completed on 2025-01-18 09:50:09
0001
0002
0003
0004
0005 #ifndef BOOST_PROCESS_V2_DETAIL_IMPL_UTF8_HPP
0006 #define BOOST_PROCESS_V2_DETAIL_IMPL_UTF8_HPP
0007
0008 #include <boost/process/v2/detail/utf8.hpp>
0009 #include <boost/process/v2/detail/config.hpp>
0010 #include <boost/process/v2/detail/last_error.hpp>
0011 #include <boost/process/v2/error.hpp>
0012
0013 #if defined(BOOST_PROCESS_V2_WINDOWS)
0014 #include <Windows.h>
0015 #endif
0016
0017 BOOST_PROCESS_V2_BEGIN_NAMESPACE
0018
0019 namespace detail
0020 {
0021
0022 #if defined(BOOST_PROCESS_V2_WINDOWS)
0023
0024 inline void handle_error(error_code & ec)
0025 {
0026 const auto err = ::GetLastError();
0027 switch (err)
0028 {
0029 case ERROR_INSUFFICIENT_BUFFER:
0030 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::utf8_category)
0031 break;
0032 case ERROR_NO_UNICODE_TRANSLATION:
0033 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::utf8_category)
0034 break;
0035 default:
0036 BOOST_PROCESS_V2_ASSIGN_EC(ec, err, system_category())
0037 }
0038 }
0039
0040 std::size_t size_as_utf8(const wchar_t * in, std::size_t size, error_code & ec)
0041 {
0042 auto res = WideCharToMultiByte(
0043 CP_UTF8,
0044 0,
0045 in,
0046 static_cast<int>(size),
0047 nullptr,
0048 0,
0049 nullptr,
0050 FALSE);
0051
0052 if (res == 0u)
0053 handle_error(ec);
0054 return static_cast<std::size_t>(res);
0055 }
0056
0057 std::size_t size_as_wide(const char * in, std::size_t size, error_code & ec)
0058 {
0059 auto res = ::MultiByteToWideChar(
0060 CP_UTF8,
0061 0,
0062 in,
0063 static_cast<int>(size),
0064 nullptr,
0065 0);
0066 if (res == 0u)
0067 handle_error(ec);
0068
0069 return static_cast<std::size_t>(res);
0070 }
0071
0072 std::size_t convert_to_utf8(const wchar_t *in, std::size_t size, char * out,
0073 std::size_t max_size, error_code & ec)
0074 {
0075 auto res = ::WideCharToMultiByte(
0076 CP_UTF8,
0077 0,
0078 in,
0079 static_cast<int>(size),
0080 out,
0081 static_cast<int>(max_size),
0082 nullptr,
0083 FALSE);
0084 if (res == 0u)
0085 handle_error(ec);
0086
0087 return static_cast<std::size_t>(res);
0088 }
0089
0090 std::size_t convert_to_wide(const char *in, std::size_t size, wchar_t * out,
0091 std::size_t max_size, error_code & ec)
0092 {
0093 auto res = ::MultiByteToWideChar(
0094 CP_UTF8,
0095 0,
0096 in,
0097 static_cast<int>(size),
0098 out,
0099 static_cast<int>(max_size));
0100 if (res == 0u)
0101 handle_error(ec);
0102
0103 return static_cast<std::size_t>(res);
0104 }
0105
0106 #else
0107
0108
0109 template<std::size_t s>
0110 inline int get_cont_octet_out_count_impl(wchar_t word) {
0111 if (word < 0x80) {
0112 return 0;
0113 }
0114 if (word < 0x800) {
0115 return 1;
0116 }
0117 return 2;
0118 }
0119
0120 template<>
0121 inline int get_cont_octet_out_count_impl<4>(wchar_t word) {
0122 if (word < 0x80) {
0123 return 0;
0124 }
0125 if (word < 0x800) {
0126 return 1;
0127 }
0128
0129
0130
0131
0132
0133
0134
0135 #if !defined(WCHAR_MAX)
0136 # error WCHAR_MAX not defined!
0137 #endif
0138
0139 #if defined(_MSC_VER) && _MSC_VER <= 1310
0140 return 2;
0141 #elif WCHAR_MAX > 0x10000
0142
0143 if (word < 0x10000) {
0144 return 2;
0145 }
0146 if (word < 0x200000) {
0147 return 3;
0148 }
0149 if (word < 0x4000000) {
0150 return 4;
0151 }
0152 return 5;
0153
0154 #else
0155 return 2;
0156 #endif
0157 }
0158
0159 inline int get_cont_octet_out_count(wchar_t word)
0160 {
0161 return detail::get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);
0162 }
0163
0164
0165
0166
0167
0168 inline unsigned int get_octet_count(unsigned char lead_octet)
0169 {
0170
0171 if (lead_octet <= 0x7f) return 1;
0172
0173
0174
0175
0176 if (0xc0 <= lead_octet && lead_octet <= 0xdf) return 2;
0177 else if (0xe0 <= lead_octet && lead_octet <= 0xef) return 3;
0178 else if (0xf0 <= lead_octet && lead_octet <= 0xf7) return 4;
0179 else if (0xf8 <= lead_octet && lead_octet <= 0xfb) return 5;
0180 else return 6;
0181 }
0182
0183 inline bool invalid_continuing_octet(unsigned char octet_1) {
0184 return (octet_1 < 0x80|| 0xbf< octet_1);
0185 }
0186
0187 inline unsigned int get_cont_octet_count(unsigned char lead_octet)
0188 {
0189 return get_octet_count(lead_octet) - 1;
0190 }
0191
0192 inline const wchar_t * get_octet1_modifier_table() noexcept
0193 {
0194 static const wchar_t octet1_modifier_table[] = {
0195 0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
0196 };
0197 return octet1_modifier_table;
0198 }
0199
0200
0201 std::size_t size_as_utf8(const wchar_t * in, std::size_t size, error_code & ec)
0202 {
0203 std::size_t res = 0u;
0204 const auto from_end = in + size;
0205 for (auto from = in; from != from_end; from++)
0206 res += get_cont_octet_out_count(*from) + 1;
0207 return res;
0208 }
0209
0210 std::size_t size_as_wide(const char * in, std::size_t size, error_code & ec)
0211 {
0212 const auto from = in;
0213 const auto from_end = from + size;
0214 const char * from_next = from;
0215 for (std::size_t char_count = 0u; from_next < from_end; ++char_count) {
0216 unsigned int octet_count = get_octet_count(*from_next);
0217
0218 if (octet_count > static_cast<std::size_t>(from_end - from_next))
0219 break;
0220 from_next += octet_count;
0221 }
0222
0223 return from_next - from;
0224 }
0225
0226 std::size_t convert_to_utf8(const wchar_t * in, std::size_t size,
0227 char * out, std::size_t max_size, error_code & ec)
0228 {
0229
0230 const wchar_t * from = in;
0231 const wchar_t * from_end = from + size;
0232 const wchar_t * & from_next = from;
0233 char * to = out;
0234 char * to_end = out + max_size;
0235 char * & to_next = to;
0236
0237 const wchar_t * const octet1_modifier_table = get_octet1_modifier_table();
0238 wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();
0239 while (from != from_end && to != to_end) {
0240
0241
0242 if (*from > max_wchar) {
0243 from_next = from;
0244 to_next = to;
0245 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0246 return 0u;
0247 }
0248
0249 int cont_octet_count = get_cont_octet_out_count(*from);
0250
0251
0252 int shift_exponent = cont_octet_count * 6;
0253
0254
0255 *to++ = static_cast<char>(octet1_modifier_table[cont_octet_count] +
0256 (unsigned char)(*from / (1 << shift_exponent)));
0257
0258
0259
0260
0261
0262
0263 int i = 0;
0264 while (i != cont_octet_count && to != to_end) {
0265 shift_exponent -= 6;
0266 *to++ = static_cast<char>(0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));
0267 ++i;
0268 }
0269
0270 if (to == to_end && i != cont_octet_count) {
0271 from_next = from;
0272 to_next = to - (i + 1);
0273 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0274 return 0u;
0275 }
0276 ++from;
0277 }
0278 from_next = from;
0279 to_next = to;
0280
0281
0282 if (from != from_end)
0283 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0284
0285 return to_next - out;
0286 }
0287
0288 inline bool invalid_leading_octet(unsigned char octet_1) {
0289 return (0x7f < octet_1 && octet_1 < 0xc0) ||
0290 (octet_1 > 0xfd);
0291 }
0292
0293 std::size_t convert_to_wide(const char * in, std::size_t size,
0294 wchar_t * out, std::size_t max_size, error_code & ec)
0295 {
0296 const char * from = in;
0297 const char * from_end = from + size;
0298 const char * & from_next = from;
0299 wchar_t * to = out;
0300 wchar_t * to_end = out + max_size;
0301 wchar_t * & to_next = to;
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311 const wchar_t * const octet1_modifier_table = detail::get_octet1_modifier_table();
0312 while (from != from_end && to != to_end) {
0313
0314
0315 if (invalid_leading_octet(*from)) {
0316 from_next = from;
0317 to_next = to;
0318 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0319 return 0u;
0320 }
0321
0322
0323
0324 const int cont_octet_count = get_cont_octet_count(*from);
0325
0326
0327
0328 wchar_t ucs_result =
0329 (unsigned char)(*from++) - octet1_modifier_table[cont_octet_count];
0330
0331
0332
0333
0334
0335 int i = 0;
0336 while (i != cont_octet_count && from != from_end) {
0337
0338
0339 if (invalid_continuing_octet(*from)) {
0340 from_next = from;
0341 to_next = to;
0342 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::invalid_character, error::get_utf8_category())
0343 return 0u;
0344 }
0345
0346 ucs_result *= (1 << 6);
0347
0348
0349
0350 ucs_result += (unsigned char)(*from++) - 0x80;
0351 ++i;
0352 }
0353
0354
0355 if (from == from_end && i != cont_octet_count) {
0356
0357 from_next = from - (i + 1);
0358 to_next = to;
0359 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0360 return 0u;
0361 }
0362 *to++ = ucs_result;
0363 }
0364 from_next = from;
0365 to_next = to;
0366
0367 if (from != from_end)
0368 BOOST_PROCESS_V2_ASSIGN_EC(ec, error::insufficient_buffer, error::get_utf8_category())
0369
0370 return to_next - out;
0371 }
0372
0373 #endif
0374
0375 }
0376
0377 BOOST_PROCESS_V2_END_NAMESPACE
0378
0379 #endif