Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:44:56

0001 //
0002 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0.
0005 // https://www.boost.org/LICENSE_1_0.txt
0006 
0007 #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
0008 #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
0009 
0010 #include <boost/locale/config.hpp>
0011 #include <cstdint>
0012 
0013 #ifdef BOOST_MSVC
0014 #    pragma warning(push)
0015 #    pragma warning(disable : 4275 4251 4231 4660)
0016 #endif
0017 
0018 namespace boost { namespace locale {
0019 
0020     /// \brief This namespace contains all operations required for boundary analysis of text
0021     namespace boundary {
0022         /// \defgroup boundary Boundary Analysis
0023         ///
0024         /// This module contains all operations required for boundary analysis of text: character, word, like and
0025         /// sentence boundaries
0026         ///
0027         /// @{
0028 
0029         /// This type describes a possible boundary analysis alternatives.
0030         enum boundary_type {
0031             character, ///< Analyse the text for character boundaries
0032             word,      ///< Analyse the text for word boundaries
0033             sentence,  ///< Analyse the text for Find sentence boundaries
0034             line       ///< Analyse the text for positions suitable for line breaks
0035         };
0036 
0037         /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found.
0038         ///
0039         /// It is a bit-mask that represents various combinations of rules used to select this specific boundary.
0040         typedef uint32_t rule_type;
0041 
0042         /// \anchor bl_boundary_word_rules
0043         /// \name Flags that describe a type of word selected
0044         /// @{
0045         constexpr rule_type word_none = 0x0000F, ///< Not a word, like white space or punctuation mark
0046           word_number = 0x000F0,                 ///< Word that appear to be a number
0047           word_letter = 0x00F00,    ///< Word that contains letters, excluding kana and ideographic characters
0048           word_kana = 0x0F000,      ///< Word that contains kana characters
0049           word_ideo = 0xF0000,      ///< Word that contains ideographic characters
0050           word_any = 0xFFFF0,       ///< Any word including numbers, 0 is special flag, equivalent to 15
0051           word_letters = 0xFFF00,   ///< Any word, excluding numbers but including letters, kana and ideograms.
0052           word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters
0053           word_mask = 0xFFFFF;      ///< Full word mask - select all possible variants
0054         /// @}
0055 
0056         /// \anchor bl_boundary_line_rules
0057         /// \name Flags that describe a type of line break
0058         /// @{
0059         constexpr rule_type line_soft = 0x0F, ///< Soft line break: optional but not required
0060           line_hard = 0xF0,                   ///< Hard line break: like break is required (as per CR/LF)
0061           line_any = 0xFF,                    ///< Soft or Hard line break
0062           line_mask = 0xFF;                   ///< Select all types of line breaks
0063 
0064         /// @}
0065 
0066         /// \anchor bl_boundary_sentence_rules
0067         /// \name Flags that describe a type of sentence break
0068         ///
0069         /// @{
0070         constexpr rule_type sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator
0071                                                   ///  like ".", "!" possible followed by hard separator like CR, LF, PS
0072           sentence_sep =
0073             0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator
0074                   ///  like CR, LF, PS or end of input.
0075           sentence_any = 0xFF,  ///< Either first or second sentence break type;.
0076           sentence_mask = 0xFF; ///< Select all sentence breaking points
0077 
0078         ///@}
0079 
0080         /// \name  Flags that describe a type of character break.
0081         ///
0082         /// At this point break iterator does not distinguish different
0083         /// kinds of characters so it is used for consistency.
0084         ///@{
0085         constexpr rule_type character_any = 0xF, ///< Not in use, just for consistency
0086           character_mask = 0xF;                  ///< Select all character breaking points
0087 
0088         ///@}
0089 
0090         /// This function returns the mask that covers all variants for specific boundary type
0091         inline rule_type boundary_rule(boundary_type t)
0092         {
0093             switch(t) {
0094                 case character: return character_mask;
0095                 case word: return word_mask;
0096                 case sentence: return sentence_mask;
0097                 case line: return line_mask;
0098             }
0099             return 0;
0100         }
0101 
0102         ///@}
0103     } // namespace boundary
0104 }}    // namespace boost::locale
0105 
0106 #ifdef BOOST_MSVC
0107 #    pragma warning(pop)
0108 #endif
0109 
0110 #endif