Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-09 08:28:04

0001 //
0002 // Copyright (c) 2019-2024 Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 
0008 #ifndef BOOST_MYSQL_IMPL_FORMAT_SQL_IPP
0009 #define BOOST_MYSQL_IMPL_FORMAT_SQL_IPP
0010 
0011 #include <boost/mysql/blob_view.hpp>
0012 #include <boost/mysql/character_set.hpp>
0013 #include <boost/mysql/client_errc.hpp>
0014 #include <boost/mysql/constant_string_view.hpp>
0015 #include <boost/mysql/diagnostics.hpp>
0016 #include <boost/mysql/error_code.hpp>
0017 #include <boost/mysql/field_kind.hpp>
0018 #include <boost/mysql/field_view.hpp>
0019 #include <boost/mysql/format_sql.hpp>
0020 #include <boost/mysql/string_view.hpp>
0021 
0022 #include <boost/mysql/detail/access.hpp>
0023 #include <boost/mysql/detail/escape_string.hpp>
0024 #include <boost/mysql/detail/format_sql.hpp>
0025 #include <boost/mysql/detail/output_string.hpp>
0026 
0027 #include <boost/mysql/impl/internal/byte_to_hex.hpp>
0028 #include <boost/mysql/impl/internal/call_next_char.hpp>
0029 #include <boost/mysql/impl/internal/dt_to_string.hpp>
0030 
0031 #include <boost/charconv/from_chars.hpp>
0032 #include <boost/charconv/to_chars.hpp>
0033 #include <boost/core/detail/string_view.hpp>
0034 #include <boost/system/result.hpp>
0035 #include <boost/system/system_error.hpp>
0036 #include <boost/throw_exception.hpp>
0037 
0038 #include <cmath>
0039 #include <cstddef>
0040 #include <limits>
0041 #include <string>
0042 
0043 namespace boost {
0044 namespace mysql {
0045 namespace detail {
0046 
0047 // Helpers to format fundamental types
0048 inline void append_quoted_identifier(string_view name, format_context_base& ctx)
0049 {
0050     ctx.append_raw("`");
0051     auto& impl = access::get_impl(ctx);
0052     auto ec = detail::escape_string(name, impl.opts, '`', impl.output);
0053     if (ec)
0054         ctx.add_error(ec);
0055     ctx.append_raw("`");
0056 }
0057 
0058 template <class T>
0059 void append_int(T integer, format_context_base& ctx)
0060 {
0061     // Make sure our buffer is big enough. 2: sign + digits10 is only 1 below max
0062     constexpr std::size_t buffsize = 32;
0063     static_assert(2 + std::numeric_limits<double>::digits10 < buffsize, "");
0064 
0065     char buff[buffsize];
0066 
0067     auto res = charconv::to_chars(buff, buff + buffsize, integer);
0068 
0069     // Can only fail becuase of buffer being too small
0070     BOOST_ASSERT(res.ec == std::errc());
0071 
0072     // Copy
0073     access::get_impl(ctx).output.append(string_view(buff, res.ptr - buff));
0074 }
0075 
0076 inline void append_double(double number, format_context_base& ctx)
0077 {
0078     // Make sure our buffer is big enough. 4: sign, radix point, e+
0079     // 3: max exponent digits
0080     constexpr std::size_t buffsize = 32;
0081     static_assert(4 + std::numeric_limits<double>::max_digits10 + 3 < buffsize, "");
0082 
0083     // inf and nan are not supported by MySQL
0084     if (std::isinf(number) || std::isnan(number))
0085     {
0086         ctx.add_error(client_errc::unformattable_value);
0087         return;
0088     }
0089 
0090     char buff[buffsize];
0091 
0092     // We format as scientific to make MySQL understand the number as a double.
0093     // Otherwise, it takes it as a DECIMAL.
0094     auto res = charconv::to_chars(buff, buff + buffsize, number, charconv::chars_format::scientific);
0095 
0096     // Can only fail because of buffer being too small
0097     BOOST_ASSERT(res.ec == std::errc());
0098 
0099     // Copy
0100     access::get_impl(ctx).output.append(string_view(buff, res.ptr - buff));
0101 }
0102 
0103 inline void append_quoted_string(string_view str, format_context_base& ctx)
0104 {
0105     auto& impl = access::get_impl(ctx);
0106     impl.output.append("'");
0107     auto ec = detail::escape_string(str, impl.opts, '\'', impl.output);
0108     if (ec)
0109         ctx.add_error(ec);
0110     impl.output.append("'");
0111 }
0112 
0113 inline void append_string(string_view str, string_view format_spec, format_context_base& ctx)
0114 {
0115     // Parse format spec
0116     if (format_spec.size() > 1u)
0117     {
0118         ctx.add_error(client_errc::format_string_invalid_specifier);
0119         return;
0120     }
0121 
0122     // No specifier: quoted string
0123     if (format_spec.empty())
0124         return append_quoted_string(str, ctx);
0125 
0126     // We got a specifier
0127     switch (format_spec[0])
0128     {
0129     case 'i':
0130         // format as identifier
0131         return append_quoted_identifier(str, ctx);
0132     case 'r':
0133         // append raw SQL
0134         ctx.append_raw(runtime(str));
0135         break;
0136     default: ctx.add_error(client_errc::format_string_invalid_specifier);
0137     }
0138 }
0139 
0140 inline void append_blob(blob_view b, format_context_base& ctx)
0141 {
0142     // Blobs have a binary character set, which may include characters
0143     // that are not valid in the current character set. However, escaping
0144     // is always performed using the character_set_connection.
0145     // mysql_real_escape_string escapes multibyte characters with a backslash,
0146     // but this behavior is not documented, so we don't want to rely on it.
0147     // The most reliable way to encode blobs is using hex strings.
0148 
0149     // Output string
0150     auto output = access::get_impl(ctx).output;
0151 
0152     // We output characters to a temporary buffer, batching append calls
0153     constexpr std::size_t buffer_size = 64;
0154     char buffer[buffer_size]{};
0155     char* it = buffer;
0156     char* const end = buffer + buffer_size;
0157 
0158     // Binary string introducer
0159     output.append("x'");
0160 
0161     // Serialize contents
0162     for (unsigned char byte : b)
0163     {
0164         // Serialize the byte
0165         it = byte_to_hex(byte, it);
0166 
0167         // If we filled the buffer, dump it
0168         if (it == end)
0169         {
0170             output.append({buffer, buffer_size});
0171             it = buffer;
0172         }
0173     }
0174 
0175     // Dump anything that didn't fill the buffer
0176     output.append({buffer, static_cast<std::size_t>(it - buffer)});
0177 
0178     // Closing quote
0179     ctx.append_raw("'");
0180 }
0181 
0182 inline void append_quoted_date(date d, format_context_base& ctx)
0183 {
0184     char buffer[34];
0185     buffer[0] = '\'';
0186     std::size_t sz = detail::date_to_string(d.year(), d.month(), d.day(), span<char, 32>(buffer + 1, 32));
0187     buffer[sz + 1] = '\'';
0188     access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
0189 }
0190 
0191 inline void append_quoted_datetime(datetime d, format_context_base& ctx)
0192 {
0193     char buffer[66];
0194     buffer[0] = '\'';
0195     std::size_t sz = detail::datetime_to_string(
0196         d.year(),
0197         d.month(),
0198         d.day(),
0199         d.hour(),
0200         d.minute(),
0201         d.second(),
0202         d.microsecond(),
0203         span<char, 64>(buffer + 1, 64)
0204     );
0205     buffer[sz + 1] = '\'';
0206     access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
0207 }
0208 
0209 inline void append_quoted_time(time t, format_context_base& ctx)
0210 {
0211     char buffer[66];
0212     buffer[0] = '\'';
0213     std::size_t sz = time_to_string(t, span<char, 64>(buffer + 1, 64));
0214     buffer[sz + 1] = '\'';
0215     access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
0216 }
0217 
0218 inline void append_field_view(
0219     field_view fv,
0220     string_view format_spec,
0221     bool allow_specs,
0222     format_context_base& ctx
0223 )
0224 {
0225     auto kind = fv.kind();
0226 
0227     // String types may allow specs
0228     if (allow_specs && kind == field_kind::string)
0229     {
0230         append_string(fv.get_string(), format_spec, ctx);
0231         return;
0232     }
0233 
0234     // Reject specifiers if !allow_specs or for other types
0235     if (!format_spec.empty())
0236     {
0237         ctx.add_error(client_errc::format_string_invalid_specifier);
0238         return;
0239     }
0240 
0241     // Perform the formatting operation
0242     switch (fv.kind())
0243     {
0244     case field_kind::null: ctx.append_raw("NULL"); return;
0245     case field_kind::int64: return append_int(fv.get_int64(), ctx);
0246     case field_kind::uint64: return append_int(fv.get_uint64(), ctx);
0247     case field_kind::float_:
0248         // float is formatted as double because it's parsed as such
0249         return append_double(fv.get_float(), ctx);
0250     case field_kind::double_: return append_double(fv.get_double(), ctx);
0251     case field_kind::string: return append_quoted_string(fv.get_string(), ctx);
0252     case field_kind::blob: return append_blob(fv.get_blob(), ctx);
0253     case field_kind::date: return append_quoted_date(fv.get_date(), ctx);
0254     case field_kind::datetime: return append_quoted_datetime(fv.get_datetime(), ctx);
0255     case field_kind::time: return append_quoted_time(fv.get_time(), ctx);
0256     default: BOOST_ASSERT(false); return;
0257     }
0258 }
0259 
0260 // Helpers for parsing format strings
0261 inline bool is_number(char c) { return c >= '0' && c <= '9'; }
0262 
0263 inline bool is_name_start(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; }
0264 
0265 inline bool is_format_spec_char(char c)
0266 {
0267     return c != '{' && c != '}' && static_cast<unsigned char>(c) >= 0x20 &&
0268            static_cast<unsigned char>(c) <= 0x7e;
0269 }
0270 
0271 class format_state
0272 {
0273     format_context_base& ctx_;
0274     span<const format_arg> args_;
0275 
0276     // Borrowed from fmt
0277     // 0: we haven't used any args yet
0278     // -1: we're doing explicit indexing
0279     // >0: we're doing auto indexing
0280     int next_arg_id_{0};
0281 
0282     BOOST_ATTRIBUTE_NODISCARD
0283     bool advance(const char*& it, const char* end)
0284     {
0285         std::size_t size = detail::call_next_char(ctx_.impl_.opts.charset, it, end);
0286         if (size == 0)
0287         {
0288             ctx_.add_error(client_errc::format_string_invalid_encoding);
0289             return false;
0290         }
0291         it += size;
0292         return true;
0293     }
0294 
0295     bool uses_auto_ids() const noexcept { return next_arg_id_ > 0; }
0296     bool uses_explicit_ids() const noexcept { return next_arg_id_ == -1; }
0297 
0298     void do_field(format_arg arg, string_view format_spec)
0299     {
0300         ctx_.format_arg(access::get_impl(arg).value, format_spec);
0301     }
0302 
0303     BOOST_ATTRIBUTE_NODISCARD
0304     bool do_indexed_field(int arg_id, string_view format_spec)
0305     {
0306         BOOST_ASSERT(arg_id >= 0);
0307         if (static_cast<std::size_t>(arg_id) >= args_.size())
0308         {
0309             ctx_.add_error(client_errc::format_arg_not_found);
0310             return false;
0311         }
0312         do_field(args_[arg_id], format_spec);
0313         return true;
0314     }
0315 
0316     struct arg_id_t
0317     {
0318         enum class type_t
0319         {
0320             none,
0321             integral,
0322             identifier
0323         };
0324         union data_t
0325         {
0326             unsigned short integral;
0327             string_view identifier;
0328 
0329             data_t() noexcept : integral{} {}
0330         };
0331 
0332         type_t type;
0333         data_t data;
0334 
0335         arg_id_t() noexcept : type(type_t::none), data() {}
0336         arg_id_t(unsigned short v) noexcept : type(type_t::integral) { data.integral = v; }
0337         arg_id_t(string_view v) noexcept : type(type_t::identifier) { data.identifier = v; }
0338     };
0339 
0340     BOOST_ATTRIBUTE_NODISCARD
0341     static arg_id_t parse_arg_id(const char*& it, const char* format_end)
0342     {
0343         if (is_number(*it))
0344         {
0345             unsigned short field_index = 0;
0346             auto res = charconv::from_chars(it, format_end, field_index);
0347             if (res.ec != std::errc{})
0348                 return arg_id_t();
0349             it = res.ptr;
0350             return field_index;
0351         }
0352         else if (is_name_start(*it))
0353         {
0354             const char* name_begin = it;
0355             while (it != format_end && (is_name_start(*it) || is_number(*it)))
0356                 ++it;
0357             string_view field_name(name_begin, it);
0358             return field_name;
0359         }
0360         else
0361         {
0362             return arg_id_t();
0363         }
0364     }
0365 
0366     BOOST_ATTRIBUTE_NODISCARD
0367     static string_view parse_format_spec(const char*& it, const char* format_end)
0368     {
0369         if (it != format_end && *it == ':')
0370         {
0371             ++it;
0372             const char* first = it;
0373             while (it != format_end && is_format_spec_char(*it))
0374                 ++it;
0375             return {first, it};
0376         }
0377         else
0378         {
0379             return string_view();
0380         }
0381     }
0382 
0383     BOOST_ATTRIBUTE_NODISCARD
0384     bool parse_field(const char*& it, const char* format_end)
0385     {
0386         // Taken from fmtlib and adapted to our requirements
0387         // it points to the character next to the opening '{'
0388         // replacement_field ::=  "{" [arg_id] [":" (format_spec)] "}"
0389         // arg_id            ::=  integer | identifier
0390         // integer           ::=  <decimal, unsigned short, parsed by from_chars>
0391         // identifier        ::=  id_start id_continue*
0392         // id_start          ::=  "a"..."z" | "A"..."Z" | "_"
0393         // id_continue       ::=  id_start | digit
0394         // digit             ::=  "0"..."9"
0395         // format_spec       ::=  <any character >= 0x20 && <= 0x7e && != "{", "}">
0396 
0397         // Parse the ID and spec components
0398         auto arg_id = parse_arg_id(it, format_end);
0399         auto spec = parse_format_spec(it, format_end);
0400 
0401         // If we're not at the end on the string, it's a syntax error
0402         if (it == format_end || *it != '}')
0403         {
0404             ctx_.add_error(client_errc::format_string_invalid_syntax);
0405             return false;
0406         }
0407         ++it;
0408 
0409         // Process what was parsed
0410         switch (arg_id.type)
0411         {
0412         case arg_id_t::type_t::none: return append_auto_field(spec);
0413         case arg_id_t::type_t::integral: return append_indexed_field(arg_id.data.integral, spec);
0414         case arg_id_t::type_t::identifier: return append_named_field(arg_id.data.identifier, spec);
0415         default: BOOST_ASSERT(false); return false;
0416         }
0417     }
0418 
0419     BOOST_ATTRIBUTE_NODISCARD
0420     bool append_named_field(string_view field_name, string_view format_spec)
0421     {
0422         // Find the argument
0423         for (const auto& arg : args_)
0424         {
0425             if (access::get_impl(arg).name == field_name)
0426             {
0427                 do_field(arg, format_spec);
0428                 return true;
0429             }
0430         }
0431 
0432         // Not found
0433         ctx_.add_error(client_errc::format_arg_not_found);
0434         return false;
0435     }
0436 
0437     BOOST_ATTRIBUTE_NODISCARD
0438     bool append_indexed_field(int index, string_view format_spec)
0439     {
0440         if (uses_auto_ids())
0441         {
0442             ctx_.add_error(client_errc::format_string_manual_auto_mix);
0443             return false;
0444         }
0445         next_arg_id_ = -1;
0446         return do_indexed_field(index, format_spec);
0447     }
0448 
0449     BOOST_ATTRIBUTE_NODISCARD
0450     bool append_auto_field(string_view format_spec)
0451     {
0452         if (uses_explicit_ids())
0453         {
0454             ctx_.add_error(client_errc::format_string_manual_auto_mix);
0455             return false;
0456         }
0457         return do_indexed_field(next_arg_id_++, format_spec);
0458     }
0459 
0460 public:
0461     format_state(format_context_base& ctx, span<const format_arg> args) noexcept : ctx_(ctx), args_(args) {}
0462 
0463     void format(string_view format_str)
0464     {
0465         // We can use operator++ when we know a character is ASCII. Some charsets
0466         // allow ASCII continuation bytes, so we need to skip the entire character otherwise
0467         auto cur_begin = format_str.data();
0468         auto it = format_str.data();
0469         auto end = format_str.data() + format_str.size();
0470         while (it != end)
0471         {
0472             if (*it == '{')
0473             {
0474                 // May be a replacement field or a literal brace. In any case, dump accumulated output
0475                 ctx_.impl_.output.append({cur_begin, it});
0476                 ++it;
0477 
0478                 if (it == end)
0479                 {
0480                     // If the string ends here, it's en error
0481                     ctx_.add_error(client_errc::format_string_invalid_syntax);
0482                     return;
0483                 }
0484                 else if (*it == '{')
0485                 {
0486                     // A double brace is the escaped form of '{'
0487                     ctx_.append_raw("{");
0488                     ++it;
0489                 }
0490                 else
0491                 {
0492                     // It's a replacement field. Process it
0493                     if (!parse_field(it, end))
0494                         return;
0495                 }
0496                 cur_begin = it;
0497             }
0498             else if (*it == '}')
0499             {
0500                 // A lonely } is only legal as a escape curly brace (i.e. }})
0501                 ctx_.impl_.output.append({cur_begin, it});
0502                 ++it;
0503                 if (it == end || *it != '}')
0504                 {
0505                     ctx_.add_error(client_errc::format_string_invalid_syntax);
0506                     return;
0507                 }
0508                 ctx_.impl_.output.append("}");
0509                 ++it;
0510                 cur_begin = it;
0511             }
0512             else
0513             {
0514                 if (!advance(it, end))
0515                     return;
0516             }
0517         }
0518 
0519         // Dump any remaining SQL
0520         ctx_.impl_.output.append({cur_begin, end});
0521     }
0522 };
0523 
0524 }  // namespace detail
0525 }  // namespace mysql
0526 }  // namespace boost
0527 
0528 void boost::mysql::format_context_base::format_arg(detail::formattable_ref_impl arg, string_view format_spec)
0529 {
0530     switch (arg.type)
0531     {
0532     case detail::formattable_ref_impl::type_t::field:
0533         detail::append_field_view(arg.data.fv, format_spec, false, *this);
0534         break;
0535     case detail::formattable_ref_impl::type_t::field_with_specs:
0536         detail::append_field_view(arg.data.fv, format_spec, true, *this);
0537         break;
0538     case detail::formattable_ref_impl::type_t::fn_and_ptr:
0539         if (!arg.data.custom.format_fn(arg.data.custom.obj, format_spec.begin(), format_spec.end(), *this))
0540         {
0541             add_error(client_errc::format_string_invalid_specifier);
0542         }
0543         break;
0544     default: BOOST_ASSERT(false);
0545     }
0546 }
0547 
0548 void boost::mysql::format_sql_to(
0549     format_context_base& ctx,
0550     constant_string_view format_str,
0551     std::initializer_list<format_arg> args
0552 )
0553 {
0554     detail::format_state(ctx, {args.begin(), args.end()}).format(format_str.get());
0555 }
0556 
0557 std::string boost::mysql::format_sql(
0558     format_options opts,
0559     constant_string_view format_str,
0560     std::initializer_list<format_arg> args
0561 )
0562 {
0563     format_context ctx(opts);
0564     format_sql_to(ctx, format_str, args);
0565     return std::move(ctx).get().value();
0566 }
0567 
0568 std::pair<bool, boost::mysql::string_view> boost::mysql::detail::parse_range_specifiers(
0569     const char* spec_begin,
0570     const char* spec_end
0571 )
0572 {
0573     // range_format_spec ::=  [":" [underlying_spec]]
0574     // Example: {::i} => format an array of strings as identifiers
0575 
0576     // Empty: no specifiers
0577     if (spec_begin == spec_end)
0578         return {true, {}};
0579 
0580     // If the first character is not a ':', the spec is invalid.
0581     if (*spec_begin != ':')
0582         return {false, {}};
0583     ++spec_begin;
0584 
0585     // Return the rest of the range
0586     return {
0587         true,
0588         {spec_begin, spec_end}
0589     };
0590 }
0591 
0592 #endif