Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:53:53

0001 //////////////////////////////////////////////////////////////////////////////
0002 /// \file c_regex_traits.hpp
0003 /// Contains the definition of the c_regex_traits\<\> template, which is a
0004 /// wrapper for the C locale functions that can be used to customize the
0005 /// behavior of static and dynamic regexes.
0006 //
0007 //  Copyright 2008 Eric Niebler. Distributed under the Boost
0008 //  Software License, Version 1.0. (See accompanying file
0009 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0010 
0011 #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
0012 #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
0013 
0014 // MS compatible compilers support #pragma once
0015 #if defined(_MSC_VER)
0016 # pragma once
0017 #endif
0018 
0019 #include <cstdlib>
0020 #include <boost/config.hpp>
0021 #include <boost/assert.hpp>
0022 #include <boost/xpressive/traits/detail/c_ctype.hpp>
0023 
0024 namespace boost { namespace xpressive
0025 {
0026 
0027 namespace detail
0028 {
0029     ///////////////////////////////////////////////////////////////////////////////
0030     // empty_locale
0031     struct empty_locale
0032     {
0033     };
0034 
0035     ///////////////////////////////////////////////////////////////////////////////
0036     // c_regex_traits_base
0037     template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
0038     struct c_regex_traits_base
0039     {
0040     protected:
0041         template<typename Traits>
0042         void imbue(Traits const &tr)
0043         {
0044         }
0045     };
0046 
0047     template<typename Char>
0048     struct c_regex_traits_base<Char, 1>
0049     {
0050     protected:
0051         template<typename Traits>
0052         static void imbue(Traits const &)
0053         {
0054         }
0055     };
0056 
0057     #ifndef BOOST_XPRESSIVE_NO_WREGEX
0058     template<std::size_t SizeOfChar>
0059     struct c_regex_traits_base<wchar_t, SizeOfChar>
0060     {
0061     protected:
0062         template<typename Traits>
0063         static void imbue(Traits const &)
0064         {
0065         }
0066     };
0067     #endif
0068 
0069     template<typename Char>
0070     Char c_tolower(Char);
0071 
0072     template<typename Char>
0073     Char c_toupper(Char);
0074 
0075     template<>
0076     inline char c_tolower(char ch)
0077     {
0078         using namespace std;
0079         return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
0080     }
0081 
0082     template<>
0083     inline char c_toupper(char ch)
0084     {
0085         using namespace std;
0086         return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
0087     }
0088 
0089     #ifndef BOOST_XPRESSIVE_NO_WREGEX
0090     template<>
0091     inline wchar_t c_tolower(wchar_t ch)
0092     {
0093         using namespace std;
0094         return towlower(ch);
0095     }
0096 
0097     template<>
0098     inline wchar_t c_toupper(wchar_t ch)
0099     {
0100         using namespace std;
0101         return towupper(ch);
0102     }
0103     #endif
0104 
0105 } // namespace detail
0106 
0107 ///////////////////////////////////////////////////////////////////////////////
0108 // regex_traits_version_1_tag
0109 //
0110 struct regex_traits_version_1_tag;
0111 
0112 ///////////////////////////////////////////////////////////////////////////////
0113 // c_regex_traits
0114 //
0115 /// \brief Encapsaulates the standard C locale functions for use by the
0116 /// \c basic_regex\<\> class template.
0117 template<typename Char>
0118 struct c_regex_traits
0119   : detail::c_regex_traits_base<Char>
0120 {
0121     typedef Char char_type;
0122     typedef std::basic_string<char_type> string_type;
0123     typedef detail::empty_locale locale_type;
0124     typedef typename detail::char_class_impl<Char>::char_class_type char_class_type;
0125     typedef regex_traits_version_2_tag version_tag;
0126     typedef detail::c_regex_traits_base<Char> base_type;
0127 
0128     /// Initialize a c_regex_traits object to use the global C locale.
0129     ///
0130     c_regex_traits(locale_type const &loc = locale_type())
0131       : base_type()
0132     {
0133         this->imbue(loc);
0134     }
0135 
0136     /// Checks two c_regex_traits objects for equality
0137     ///
0138     /// \return true.
0139     bool operator ==(c_regex_traits<char_type> const &) const
0140     {
0141         return true;
0142     }
0143 
0144     /// Checks two c_regex_traits objects for inequality
0145     ///
0146     /// \return false.
0147     bool operator !=(c_regex_traits<char_type> const &) const
0148     {
0149         return false;
0150     }
0151 
0152     /// Convert a char to a Char
0153     ///
0154     /// \param ch The source character.
0155     /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t.
0156     static char_type widen(char ch);
0157 
0158     /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
0159     ///
0160     /// \param ch The source character.
0161     /// \return a value between 0 and UCHAR_MAX, inclusive.
0162     static unsigned char hash(char_type ch)
0163     {
0164         return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
0165     }
0166 
0167     /// No-op
0168     ///
0169     /// \param ch The source character.
0170     /// \return ch
0171     static char_type translate(char_type ch)
0172     {
0173         return ch;
0174     }
0175 
0176     /// Converts a character to lower-case using the current global C locale.
0177     ///
0178     /// \param ch The source character.
0179     /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
0180     static char_type translate_nocase(char_type ch)
0181     {
0182         return detail::c_tolower(ch);
0183     }
0184 
0185     /// Converts a character to lower-case using the current global C locale.
0186     ///
0187     /// \param ch The source character.
0188     /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
0189     static char_type tolower(char_type ch)
0190     {
0191         return detail::c_tolower(ch);
0192     }
0193 
0194     /// Converts a character to upper-case using the current global C locale.
0195     ///
0196     /// \param ch The source character.
0197     /// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t.
0198     static char_type toupper(char_type ch)
0199     {
0200         return detail::c_toupper(ch);
0201     }
0202 
0203     /// Returns a \c string_type containing all the characters that compare equal
0204     /// disregrarding case to the one passed in. This function can only be called
0205     /// if <tt>has_fold_case\<c_regex_traits\<Char\> \>::value</tt> is \c true.
0206     ///
0207     /// \param ch The source character.
0208     /// \return \c string_type containing all chars which are equal to \c ch when disregarding
0209     ///     case
0210     //typedef array<char_type, 2> fold_case_type;
0211     string_type fold_case(char_type ch) const
0212     {
0213         BOOST_MPL_ASSERT((is_same<char_type, char>));
0214         char_type ntcs[] = {
0215             detail::c_tolower(ch)
0216           , detail::c_toupper(ch)
0217           , 0
0218         };
0219         if(ntcs[1] == ntcs[0])
0220             ntcs[1] = 0;
0221         return string_type(ntcs);
0222     }
0223 
0224     /// Checks to see if a character is within a character range.
0225     ///
0226     /// \param first The bottom of the range, inclusive.
0227     /// \param last The top of the range, inclusive.
0228     /// \param ch The source character.
0229     /// \return first <= ch && ch <= last.
0230     static bool in_range(char_type first, char_type last, char_type ch)
0231     {
0232         return first <= ch && ch <= last;
0233     }
0234 
0235     /// Checks to see if a character is within a character range, irregardless of case.
0236     ///
0237     /// \param first The bottom of the range, inclusive.
0238     /// \param last The top of the range, inclusive.
0239     /// \param ch The source character.
0240     /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first,
0241     ///     last, toupper(ch))
0242     /// \attention The default implementation doesn't do proper Unicode
0243     ///     case folding, but this is the best we can do with the standard
0244     ///     C locale functions.
0245     static bool in_range_nocase(char_type first, char_type last, char_type ch)
0246     {
0247         return c_regex_traits::in_range(first, last, ch)
0248             || c_regex_traits::in_range(first, last, detail::c_tolower(ch))
0249             || c_regex_traits::in_range(first, last, detail::c_toupper(ch));
0250     }
0251 
0252     /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
0253     /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
0254     /// then v.transform(G1, G2) < v.transform(H1, H2).
0255     ///
0256     /// \attention Not currently used
0257     template<typename FwdIter>
0258     static string_type transform(FwdIter begin, FwdIter end)
0259     {
0260         BOOST_ASSERT(false); // BUGBUG implement me
0261     }
0262 
0263     /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
0264     /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
0265     /// when character case is not considered then
0266     /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2).
0267     ///
0268     /// \attention Not currently used
0269     template<typename FwdIter>
0270     static string_type transform_primary(FwdIter begin, FwdIter end)
0271     {
0272         BOOST_ASSERT(false); // BUGBUG implement me
0273     }
0274 
0275     /// Returns a sequence of characters that represents the collating element
0276     /// consisting of the character sequence designated by the iterator range [F1, F2).
0277     /// Returns an empty string if the character sequence is not a valid collating element.
0278     ///
0279     /// \attention Not currently used
0280     template<typename FwdIter>
0281     static string_type lookup_collatename(FwdIter begin, FwdIter end)
0282     {
0283         BOOST_ASSERT(false); // BUGBUG implement me
0284     }
0285 
0286     /// For the character class name represented by the specified character sequence,
0287     /// return the corresponding bitmask representation.
0288     ///
0289     /// \param begin A forward iterator to the start of the character sequence representing
0290     ///     the name of the character class.
0291     /// \param end The end of the character sequence.
0292     /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
0293     ///     version of the character class.
0294     /// \return A bitmask representing the character class.
0295     template<typename FwdIter>
0296     static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase)
0297     {
0298         return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase);
0299     }
0300 
0301     /// Tests a character against a character class bitmask.
0302     ///
0303     /// \param ch The character to test.
0304     /// \param mask The character class bitmask against which to test.
0305     /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
0306     ///     together.
0307     /// \return true if the character is a member of any of the specified character classes, false
0308     ///     otherwise.
0309     static bool isctype(char_type ch, char_class_type mask)
0310     {
0311         return detail::char_class_impl<char_type>::isctype(ch, mask);
0312     }
0313 
0314     /// Convert a digit character into the integer it represents.
0315     ///
0316     /// \param ch The digit character.
0317     /// \param radix The radix to use for the conversion.
0318     /// \pre radix is one of 8, 10, or 16.
0319     /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If
0320     ///     char_type is char, std::strtol is used for the conversion. If char_type is wchar_t,
0321     ///     std::wcstol is used.
0322     static int value(char_type ch, int radix);
0323 
0324     /// No-op
0325     ///
0326     locale_type imbue(locale_type loc)
0327     {
0328         this->base_type::imbue(*this);
0329         return loc;
0330     }
0331 
0332     /// No-op
0333     ///
0334     static locale_type getloc()
0335     {
0336         locale_type loc;
0337         return loc;
0338     }
0339 };
0340 
0341 ///////////////////////////////////////////////////////////////////////////////
0342 // c_regex_traits<>::widen specializations
0343 /// INTERNAL ONLY
0344 template<>
0345 inline char c_regex_traits<char>::widen(char ch)
0346 {
0347     return ch;
0348 }
0349 
0350 #ifndef BOOST_XPRESSIVE_NO_WREGEX
0351 /// INTERNAL ONLY
0352 template<>
0353 inline wchar_t c_regex_traits<wchar_t>::widen(char ch)
0354 {
0355     using namespace std;
0356     return btowc(ch);
0357 }
0358 #endif
0359 
0360 ///////////////////////////////////////////////////////////////////////////////
0361 // c_regex_traits<>::hash specializations
0362 /// INTERNAL ONLY
0363 template<>
0364 inline unsigned char c_regex_traits<char>::hash(char ch)
0365 {
0366     return static_cast<unsigned char>(ch);
0367 }
0368 
0369 #ifndef BOOST_XPRESSIVE_NO_WREGEX
0370 /// INTERNAL ONLY
0371 template<>
0372 inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch)
0373 {
0374     return static_cast<unsigned char>(ch);
0375 }
0376 #endif
0377 
0378 ///////////////////////////////////////////////////////////////////////////////
0379 // c_regex_traits<>::value specializations
0380 /// INTERNAL ONLY
0381 template<>
0382 inline int c_regex_traits<char>::value(char ch, int radix)
0383 {
0384     using namespace std;
0385     BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
0386     char begin[2] = { ch, '\0' }, *end = 0;
0387     int val = strtol(begin, &end, radix);
0388     return begin == end ? -1 : val;
0389 }
0390 
0391 #ifndef BOOST_XPRESSIVE_NO_WREGEX
0392 /// INTERNAL ONLY
0393 template<>
0394 inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix)
0395 {
0396     using namespace std;
0397     BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
0398     wchar_t begin[2] = { ch, L'\0' }, *end = 0;
0399     int val = wcstol(begin, &end, radix);
0400     return begin == end ? -1 : val;
0401 }
0402 #endif
0403 
0404 // Narrow C traits has fold_case() member function.
0405 template<>
0406 struct has_fold_case<c_regex_traits<char> >
0407   : mpl::true_
0408 {
0409 };
0410 
0411 }}
0412 
0413 #endif