File indexing completed on 2025-01-19 09:47:48
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
0011 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
0012
0013 #include <boost/cstdint.hpp>
0014
0015 # include "category_table.hpp"
0016 # include "script_table.hpp"
0017 # include "lowercase_table.hpp"
0018 # include "uppercase_table.hpp"
0019
0020 namespace boost { namespace spirit { namespace ucd
0021 {
0022
0023
0024
0025 struct properties
0026 {
0027
0028
0029
0030
0031 enum major_category
0032 {
0033 letter,
0034 mark,
0035 number,
0036 separator,
0037 other,
0038 punctuation,
0039 symbol
0040 };
0041
0042 enum category
0043 {
0044 uppercase_letter = 0,
0045 lowercase_letter,
0046 titlecase_letter,
0047 modifier_letter,
0048 other_letter,
0049
0050 nonspacing_mark = 8,
0051 enclosing_mark,
0052 spacing_mark,
0053
0054 decimal_number = 16,
0055 letter_number,
0056 other_number,
0057
0058 space_separator = 24,
0059 line_separator,
0060 paragraph_separator,
0061
0062 control = 32,
0063 format,
0064 private_use,
0065 surrogate,
0066 unassigned,
0067
0068 dash_punctuation = 40,
0069 open_punctuation,
0070 close_punctuation,
0071 connector_punctuation,
0072 other_punctuation,
0073 initial_punctuation,
0074 final_punctuation,
0075
0076 math_symbol = 48,
0077 currency_symbol,
0078 modifier_symbol,
0079 other_symbol
0080 };
0081
0082 enum derived_properties
0083 {
0084 alphabetic = 64,
0085 uppercase = 128,
0086 lowercase = 256,
0087 white_space = 512,
0088 hex_digit = 1024,
0089 noncharacter_code_point = 2048,
0090 default_ignorable_code_point = 4096
0091 };
0092
0093 enum script
0094 {
0095 adlam,
0096 caucasian_albanian,
0097 ahom,
0098 arabic,
0099 imperial_aramaic,
0100 armenian,
0101 avestan,
0102 balinese,
0103 bamum,
0104 bassa_vah,
0105 batak,
0106 bengali,
0107 bhaiksuki,
0108 bopomofo,
0109 brahmi,
0110 braille,
0111 buginese,
0112 buhid,
0113 chakma,
0114 canadian_aboriginal,
0115 carian,
0116 cham,
0117 cherokee,
0118 chorasmian,
0119 coptic,
0120 cypro_minoan,
0121 cypriot,
0122 cyrillic,
0123 devanagari,
0124 dives_akuru,
0125 dogra,
0126 deseret,
0127 duployan,
0128 egyptian_hieroglyphs,
0129 elbasan,
0130 elymaic,
0131 ethiopic,
0132 georgian,
0133 glagolitic,
0134 gunjala_gondi,
0135 masaram_gondi,
0136 gothic,
0137 grantha,
0138 greek,
0139 gujarati,
0140 gurmukhi,
0141 hangul,
0142 han,
0143 hanunoo,
0144 hatran,
0145 hebrew,
0146 hiragana,
0147 anatolian_hieroglyphs,
0148 pahawh_hmong,
0149 nyiakeng_puachue_hmong,
0150 katakana_or_hiragana,
0151 old_hungarian,
0152 old_italic,
0153 javanese,
0154 kayah_li,
0155 katakana,
0156 kawi,
0157 kharoshthi,
0158 khmer,
0159 khojki,
0160 khitan_small_script,
0161 kannada,
0162 kaithi,
0163 tai_tham,
0164 lao,
0165 latin,
0166 lepcha,
0167 limbu,
0168 linear_a,
0169 linear_b,
0170 lisu,
0171 lycian,
0172 lydian,
0173 mahajani,
0174 makasar,
0175 mandaic,
0176 manichaean,
0177 marchen,
0178 medefaidrin,
0179 mende_kikakui,
0180 meroitic_cursive,
0181 meroitic_hieroglyphs,
0182 malayalam,
0183 modi,
0184 mongolian,
0185 mro,
0186 meetei_mayek,
0187 multani,
0188 myanmar,
0189 nag_mundari,
0190 nandinagari,
0191 old_north_arabian,
0192 nabataean,
0193 newa,
0194 nko,
0195 nushu,
0196 ogham,
0197 ol_chiki,
0198 old_turkic,
0199 oriya,
0200 osage,
0201 osmanya,
0202 old_uyghur,
0203 palmyrene,
0204 pau_cin_hau,
0205 old_permic,
0206 phags_pa,
0207 inscriptional_pahlavi,
0208 psalter_pahlavi,
0209 phoenician,
0210 miao,
0211 inscriptional_parthian,
0212 rejang,
0213 hanifi_rohingya,
0214 runic,
0215 samaritan,
0216 old_south_arabian,
0217 saurashtra,
0218 signwriting,
0219 shavian,
0220 sharada,
0221 siddham,
0222 khudawadi,
0223 sinhala,
0224 sogdian,
0225 old_sogdian,
0226 sora_sompeng,
0227 soyombo,
0228 sundanese,
0229 syloti_nagri,
0230 syriac,
0231 tagbanwa,
0232 takri,
0233 tai_le,
0234 new_tai_lue,
0235 tamil,
0236 tangut,
0237 tai_viet,
0238 telugu,
0239 tifinagh,
0240 tagalog,
0241 thaana,
0242 thai,
0243 tibetan,
0244 tirhuta,
0245 tangsa,
0246 toto,
0247 ugaritic,
0248 vai,
0249 vithkuqi,
0250 warang_citi,
0251 wancho,
0252 old_persian,
0253 cuneiform,
0254 yezidi,
0255 yi,
0256 zanabazar_square,
0257 inherited,
0258 common,
0259 unknown
0260 };
0261 };
0262
0263 inline properties::category get_category(::boost::uint32_t ch)
0264 {
0265 return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
0266 }
0267
0268 inline properties::major_category get_major_category(::boost::uint32_t ch)
0269 {
0270 return static_cast<properties::major_category>(get_category(ch) >> 3);
0271 }
0272
0273 inline bool is_punctuation(::boost::uint32_t ch)
0274 {
0275 return get_major_category(ch) == properties::punctuation;
0276 }
0277
0278 inline bool is_decimal_number(::boost::uint32_t ch)
0279 {
0280 return get_category(ch) == properties::decimal_number;
0281 }
0282
0283 inline bool is_hex_digit(::boost::uint32_t ch)
0284 {
0285 return (detail::category_lookup(ch) & properties::hex_digit) != 0;
0286 }
0287
0288 inline bool is_control(::boost::uint32_t ch)
0289 {
0290 return get_category(ch) == properties::control;
0291 }
0292
0293 inline bool is_alphabetic(::boost::uint32_t ch)
0294 {
0295 return (detail::category_lookup(ch) & properties::alphabetic) != 0;
0296 }
0297
0298 inline bool is_alphanumeric(::boost::uint32_t ch)
0299 {
0300 return is_decimal_number(ch) || is_alphabetic(ch);
0301 }
0302
0303 inline bool is_uppercase(::boost::uint32_t ch)
0304 {
0305 return (detail::category_lookup(ch) & properties::uppercase) != 0;
0306 }
0307
0308 inline bool is_lowercase(::boost::uint32_t ch)
0309 {
0310 return (detail::category_lookup(ch) & properties::lowercase) != 0;
0311 }
0312
0313 inline bool is_white_space(::boost::uint32_t ch)
0314 {
0315 return (detail::category_lookup(ch) & properties::white_space) != 0;
0316 }
0317
0318 inline bool is_blank(::boost::uint32_t ch)
0319 {
0320 switch (ch)
0321 {
0322 case '\n': case '\v': case '\f': case '\r':
0323 return false;
0324 default:
0325 return is_white_space(ch)
0326 && !( get_category(ch) == properties::line_separator
0327 || get_category(ch) == properties::paragraph_separator
0328 );
0329 }
0330 }
0331
0332 inline bool is_graph(::boost::uint32_t ch)
0333 {
0334 return !( is_white_space(ch)
0335 || get_category(ch) == properties::control
0336 || get_category(ch) == properties::surrogate
0337 || get_category(ch) == properties::unassigned
0338 );
0339 }
0340
0341 inline bool is_print(::boost::uint32_t ch)
0342 {
0343 return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
0344 }
0345
0346 inline bool is_noncharacter_code_point(::boost::uint32_t ch)
0347 {
0348 return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
0349 }
0350
0351 inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
0352 {
0353 return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
0354 }
0355
0356 inline properties::script get_script(::boost::uint32_t ch)
0357 {
0358 return static_cast<properties::script>(detail::script_lookup(ch));
0359 }
0360
0361 inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
0362 {
0363
0364 ::boost::uint32_t r = detail::lowercase_lookup(ch);
0365 return (r == 0)? ch : r;
0366 }
0367
0368 inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
0369 {
0370
0371 ::boost::uint32_t r = detail::uppercase_lookup(ch);
0372 return (r == 0)? ch : r;
0373 }
0374 }}}
0375
0376 #endif