Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:44:56

0001 //
0002 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0.
0005 // https://www.boost.org/LICENSE_1_0.txt
0006 
0007 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
0008 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
0009 
0010 #include <boost/locale/boundary/boundary_point.hpp>
0011 #include <boost/locale/boundary/facets.hpp>
0012 #include <boost/locale/boundary/segment.hpp>
0013 #include <boost/locale/boundary/types.hpp>
0014 #include <boost/iterator/iterator_facade.hpp>
0015 #include <algorithm>
0016 #include <cstdint>
0017 #include <iterator>
0018 #include <locale>
0019 #include <memory>
0020 #include <stdexcept>
0021 #include <string>
0022 #include <type_traits>
0023 #include <vector>
0024 
0025 #ifdef BOOST_MSVC
0026 #    pragma warning(push)
0027 #    pragma warning(disable : 4275 4251 4231 4660)
0028 #endif
0029 
0030 namespace boost { namespace locale { namespace boundary {
0031     ///
0032     /// \defgroup boundary Boundary Analysis
0033     ///
0034     /// This module contains all operations required for %boundary analysis of text: character, word, line and sentence
0035     /// boundaries
0036     ///
0037     /// @{
0038     ///
0039 
0040     /// \cond INTERNAL
0041 
0042     namespace detail {
0043         template<typename Char>
0044         const boundary_indexing<Char>& get_boundary_indexing(const std::locale& l)
0045         {
0046             using facet_type = boundary_indexing<Char>;
0047             if(!std::has_facet<facet_type>(l))
0048                 throw std::runtime_error("Locale was generated without segmentation support!");
0049             return std::use_facet<facet_type>(l);
0050         }
0051 
0052         template<typename IteratorType,
0053                  typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
0054         struct mapping_traits {
0055             typedef typename std::iterator_traits<IteratorType>::value_type char_type;
0056             static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
0057             {
0058                 std::basic_string<char_type> str(b, e);
0059                 return get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
0060             }
0061         };
0062 
0063         template<typename CharType, typename SomeIteratorType>
0064         struct linear_iterator_traits {
0065             static constexpr bool is_linear =
0066               std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
0067               || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
0068               || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
0069               || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
0070               || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
0071         };
0072 
0073         template<typename IteratorType>
0074         struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
0075             typedef typename std::iterator_traits<IteratorType>::value_type char_type;
0076 
0077             static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
0078             {
0079                 index_type result;
0080 
0081                 // Optimize for most common cases
0082                 //
0083                 // C++11 requires that string is continuous in memory and all known
0084                 // string implementations do this because of c_str() support.
0085 
0086                 if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
0087                     const char_type* begin = &*b;
0088                     const char_type* end = begin + (e - b);
0089                     index_type tmp = get_boundary_indexing<char_type>(l).map(t, begin, end);
0090                     result.swap(tmp);
0091                 } else {
0092                     std::basic_string<char_type> str(b, e);
0093                     index_type tmp = get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
0094                     result.swap(tmp);
0095                 }
0096                 return result;
0097             }
0098         };
0099 
0100         template<typename BaseIterator>
0101         class mapping {
0102         public:
0103             typedef BaseIterator base_iterator;
0104             typedef typename std::iterator_traits<base_iterator>::value_type char_type;
0105 
0106             mapping(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc) :
0107                 index_(new index_type()), begin_(begin), end_(end)
0108             {
0109                 index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
0110                 index_->swap(idx);
0111             }
0112 
0113             mapping() {}
0114 
0115             const index_type& index() const { return *index_; }
0116 
0117             base_iterator begin() const { return begin_; }
0118 
0119             base_iterator end() const { return end_; }
0120 
0121         private:
0122             std::shared_ptr<index_type> index_;
0123             base_iterator begin_, end_;
0124         };
0125 
0126         template<typename BaseIterator>
0127         class segment_index_iterator : public boost::iterator_facade<segment_index_iterator<BaseIterator>,
0128                                                                      segment<BaseIterator>,
0129                                                                      boost::bidirectional_traversal_tag,
0130                                                                      const segment<BaseIterator>&> {
0131         public:
0132             typedef BaseIterator base_iterator;
0133             typedef mapping<base_iterator> mapping_type;
0134             typedef segment<base_iterator> segment_type;
0135 
0136             segment_index_iterator() : current_(0, 0), map_(nullptr), mask_(0), full_select_(false) {}
0137 
0138             segment_index_iterator(base_iterator p, const mapping_type* map, rule_type mask, bool full_select) :
0139                 map_(map), mask_(mask), full_select_(full_select)
0140             {
0141                 set(p);
0142             }
0143             segment_index_iterator(bool is_begin, const mapping_type* map, rule_type mask, bool full_select) :
0144                 map_(map), mask_(mask), full_select_(full_select)
0145             {
0146                 if(is_begin)
0147                     set_begin();
0148                 else
0149                     set_end();
0150             }
0151 
0152             const segment_type& dereference() const { return value_; }
0153 
0154             bool equal(const segment_index_iterator& other) const
0155             {
0156                 return map_ == other.map_ && current_.second == other.current_.second;
0157             }
0158 
0159             void increment()
0160             {
0161                 std::pair<size_t, size_t> next = current_;
0162                 if(full_select_) {
0163                     next.first = next.second;
0164                     while(next.second < size()) {
0165                         next.second++;
0166                         if(valid_offset(next.second))
0167                             break;
0168                     }
0169                     if(next.second == size())
0170                         next.first = next.second - 1;
0171                 } else {
0172                     while(next.second < size()) {
0173                         next.first = next.second;
0174                         next.second++;
0175                         if(valid_offset(next.second))
0176                             break;
0177                     }
0178                 }
0179                 update_current(next);
0180             }
0181 
0182             void decrement()
0183             {
0184                 std::pair<size_t, size_t> next = current_;
0185                 if(full_select_) {
0186                     while(next.second > 1) {
0187                         next.second--;
0188                         if(valid_offset(next.second))
0189                             break;
0190                     }
0191                     next.first = next.second;
0192                     while(next.first > 0) {
0193                         next.first--;
0194                         if(valid_offset(next.first))
0195                             break;
0196                     }
0197                 } else {
0198                     while(next.second > 1) {
0199                         next.second--;
0200                         if(valid_offset(next.second))
0201                             break;
0202                     }
0203                     next.first = next.second - 1;
0204                 }
0205                 update_current(next);
0206             }
0207 
0208         private:
0209             void set_end()
0210             {
0211                 current_.first = size() - 1;
0212                 current_.second = size();
0213                 value_ = segment_type(map_->end(), map_->end(), 0);
0214             }
0215             void set_begin()
0216             {
0217                 current_.first = current_.second = 0;
0218                 value_ = segment_type(map_->begin(), map_->begin(), 0);
0219                 increment();
0220             }
0221 
0222             void set(base_iterator p)
0223             {
0224                 const auto b = map_->index().begin(), e = map_->index().end();
0225                 auto boundary_point = std::upper_bound(b, e, break_info(std::distance(map_->begin(), p)));
0226                 while(boundary_point != e && (boundary_point->rule & mask_) == 0)
0227                     ++boundary_point;
0228 
0229                 current_.first = current_.second = boundary_point - b;
0230 
0231                 if(full_select_) {
0232                     while(current_.first > 0) {
0233                         current_.first--;
0234                         if(valid_offset(current_.first))
0235                             break;
0236                     }
0237                 } else {
0238                     if(current_.first > 0)
0239                         current_.first--;
0240                 }
0241                 value_.first = map_->begin();
0242                 std::advance(value_.first, get_offset(current_.first));
0243                 value_.second = value_.first;
0244                 std::advance(value_.second, get_offset(current_.second) - get_offset(current_.first));
0245 
0246                 update_rule();
0247             }
0248 
0249             void update_current(std::pair<size_t, size_t> pos)
0250             {
0251                 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
0252                 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
0253                 std::advance(value_.first, first_diff);
0254                 std::advance(value_.second, second_diff);
0255                 current_ = pos;
0256                 update_rule();
0257             }
0258 
0259             void update_rule()
0260             {
0261                 if(current_.second != size())
0262                     value_.rule(index()[current_.second].rule);
0263             }
0264             size_t get_offset(size_t ind) const
0265             {
0266                 if(ind == size())
0267                     return index().back().offset;
0268                 return index()[ind].offset;
0269             }
0270 
0271             bool valid_offset(size_t offset) const
0272             {
0273                 return offset == 0 || offset == size() // make sure we not acess index[size]
0274                        || (index()[offset].rule & mask_) != 0;
0275             }
0276 
0277             size_t size() const { return index().size(); }
0278 
0279             const index_type& index() const { return map_->index(); }
0280 
0281             segment_type value_;
0282             std::pair<size_t, size_t> current_;
0283             const mapping_type* map_;
0284             rule_type mask_;
0285             bool full_select_;
0286         };
0287 
0288         template<typename BaseIterator>
0289         class boundary_point_index_iterator : public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
0290                                                                             boundary_point<BaseIterator>,
0291                                                                             boost::bidirectional_traversal_tag,
0292                                                                             const boundary_point<BaseIterator>&> {
0293         public:
0294             typedef BaseIterator base_iterator;
0295             typedef mapping<base_iterator> mapping_type;
0296             typedef boundary_point<base_iterator> boundary_point_type;
0297 
0298             boundary_point_index_iterator() : current_(0), map_(nullptr), mask_(0) {}
0299 
0300             boundary_point_index_iterator(bool is_begin, const mapping_type* map, rule_type mask) :
0301                 map_(map), mask_(mask)
0302             {
0303                 if(is_begin)
0304                     set_begin();
0305                 else
0306                     set_end();
0307             }
0308             boundary_point_index_iterator(base_iterator p, const mapping_type* map, rule_type mask) :
0309                 map_(map), mask_(mask)
0310             {
0311                 set(p);
0312             }
0313 
0314             const boundary_point_type& dereference() const { return value_; }
0315 
0316             bool equal(const boundary_point_index_iterator& other) const
0317             {
0318                 return map_ == other.map_ && current_ == other.current_;
0319             }
0320 
0321             void increment()
0322             {
0323                 size_t next = current_;
0324                 while(next < size()) {
0325                     next++;
0326                     if(valid_offset(next))
0327                         break;
0328                 }
0329                 update_current(next);
0330             }
0331 
0332             void decrement()
0333             {
0334                 size_t next = current_;
0335                 while(next > 0) {
0336                     next--;
0337                     if(valid_offset(next))
0338                         break;
0339                 }
0340                 update_current(next);
0341             }
0342 
0343         private:
0344             void set_end()
0345             {
0346                 current_ = size();
0347                 value_ = boundary_point_type(map_->end(), 0);
0348             }
0349             void set_begin()
0350             {
0351                 current_ = 0;
0352                 value_ = boundary_point_type(map_->begin(), 0);
0353             }
0354 
0355             void set(base_iterator p)
0356             {
0357                 size_t dist = std::distance(map_->begin(), p);
0358 
0359                 const auto b = index().begin(), e = index().end();
0360                 const auto ptr = std::lower_bound(b, e, break_info(dist));
0361 
0362                 if(ptr == e)
0363                     current_ = size() - 1;
0364                 else
0365                     current_ = ptr - b;
0366 
0367                 while(!valid_offset(current_))
0368                     current_++;
0369 
0370                 std::ptrdiff_t diff = get_offset(current_) - dist;
0371                 std::advance(p, diff);
0372                 value_.iterator(p);
0373                 update_rule();
0374             }
0375 
0376             void update_current(size_t pos)
0377             {
0378                 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
0379                 base_iterator i = value_.iterator();
0380                 std::advance(i, diff);
0381                 current_ = pos;
0382                 value_.iterator(i);
0383                 update_rule();
0384             }
0385 
0386             void update_rule()
0387             {
0388                 if(current_ != size())
0389                     value_.rule(index()[current_].rule);
0390             }
0391             size_t get_offset(size_t ind) const
0392             {
0393                 if(ind == size())
0394                     return index().back().offset;
0395                 return index()[ind].offset;
0396             }
0397 
0398             bool valid_offset(size_t offset) const
0399             {
0400                 return offset == 0 || offset + 1 >= size() // last and first are always valid regardless of mark
0401                        || (index()[offset].rule & mask_) != 0;
0402             }
0403 
0404             size_t size() const { return index().size(); }
0405 
0406             const index_type& index() const { return map_->index(); }
0407 
0408             boundary_point_type value_;
0409             size_t current_;
0410             const mapping_type* map_;
0411             rule_type mask_;
0412         };
0413 
0414     } // namespace detail
0415 
0416     /// \endcond
0417 
0418     template<typename BaseIterator>
0419     class segment_index;
0420 
0421     template<typename BaseIterator>
0422     class boundary_point_index;
0423 
0424     /// \brief This class holds an index of segments in the text range and allows to iterate over them
0425     ///
0426     /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
0427     /// to the \ref segment objects.
0428     ///
0429     /// It provides two options on way of selecting segments:
0430     ///
0431     /// -   \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to
0432     ///     various masks %as \ref word_any.
0433     ///     \n
0434     ///     The default is to select any types of boundaries.
0435     ///     \n
0436     ///     For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators
0437     ///     would iterate only over the words containing Kana letters and \ref word_any would select all types of
0438     ///     words excluding ranges that consist of white space and punctuation marks. So iterating over the text
0439     ///     "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be",
0440     ///     instead of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?".
0441     /// -   \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous
0442     ///     %boundary point does not fit the selected rule.
0443     ///     \n
0444     ///     For example: We want to fetch all sentences from the following text: "Hello! How\nare you?".
0445     ///     \n
0446     ///     This text contains three %boundary points separating it to sentences by different rules:
0447     ///     - The exclamation mark "!" ends the sentence "Hello!"
0448     ///     - The line feed that splits the sentence "How\nare you?" into two parts.
0449     ///     - The question mark that ends the second sentence.
0450     ///     \n
0451     ///     If you would only change the \ref rule() to \ref sentence_term then the segment_index would
0452     ///     provide two sentences "Hello!" and "are you?" %as only them actually terminated with required
0453     ///     terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include
0454     ///     all the text up to previous valid %boundary point and would return two expected sentences:
0455     ///     "Hello!" and "How\nare you?".
0456     ///
0457     /// This class allows to find a segment according to the given iterator in range using \ref find() member
0458     /// function.
0459     ///
0460     /// \note
0461     ///
0462     /// -   Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text
0463     ///     invalidates existing iterators and they can't be used any more.
0464     /// -   segment_index can be created from boundary_point_index or other segment_index that was created with
0465     ///     same \ref boundary_type.  This is very fast operation %as they shared same index
0466     ///     and it does not require its regeneration.
0467     ///
0468     /// \see
0469     ///
0470     /// - \ref boundary_point_index
0471     /// - \ref segment
0472     /// - \ref boundary_point
0473 
0474     template<typename BaseIterator>
0475     class segment_index {
0476     public:
0477         /// The type of the iterator used to iterate over the original text
0478         typedef BaseIterator base_iterator;
0479 
0480 #ifdef BOOST_LOCALE_DOXYGEN
0481         /// The bidirectional iterator that iterates over \ref value_type objects.
0482         ///
0483         /// -   The iterators may be invalidated by use of any non-const member function
0484         ///     including but not limited to \ref rule(rule_type) and \ref full_select(bool).
0485         /// -   The returned value_type object is valid %as long %as iterator points to it.
0486         ///     So this following code is wrong %as t used after p was updated:
0487         ///     \code
0488         ///     segment_index<some_iterator>::iterator p=index.begin();
0489         ///     segment<some_iterator> &t = *p;
0490         ///     ++p;
0491         ///     std::cout << t.str() << std::endl;
0492         ///     \endcode
0493         typedef unspecified_iterator_type iterator;
0494         /// \copydoc iterator
0495         typedef unspecified_iterator_type const_iterator;
0496 #else
0497         typedef detail::segment_index_iterator<base_iterator> iterator;
0498         typedef detail::segment_index_iterator<base_iterator> const_iterator;
0499 #endif
0500         /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
0501         /// an object that represents selected segment.
0502         typedef segment<base_iterator> value_type;
0503 
0504         /// Default constructor.
0505         ///
0506         /// \note
0507         ///
0508         /// When this object is constructed by default it does not include a valid index, thus
0509         /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
0510         /// behavior
0511         segment_index() : mask_(0xFFFFFFFFu), full_select_(false) {}
0512         /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
0513         /// in range [begin,end) using a rule \a mask for locale \a loc.
0514         segment_index(boundary_type type,
0515                       base_iterator begin,
0516                       base_iterator end,
0517                       rule_type mask,
0518                       const std::locale& loc = std::locale()) :
0519             map_(type, begin, end, loc),
0520             mask_(mask), full_select_(false)
0521         {}
0522         /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
0523         /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc.
0524         segment_index(boundary_type type,
0525                       base_iterator begin,
0526                       base_iterator end,
0527                       const std::locale& loc = std::locale()) :
0528             map_(type, begin, end, loc),
0529             mask_(0xFFFFFFFFu), full_select_(false)
0530         {}
0531 
0532         /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information
0533         /// and used default rule (all possible segments)
0534         ///
0535         /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
0536         /// range it is much better to create one from another rather then indexing the same
0537         /// range twice.
0538         ///
0539         /// \note \ref rule() flags are not copied
0540         segment_index(const boundary_point_index<base_iterator>&);
0541 
0542         /// Copy an index from a \ref boundary_point_index. It copies all indexing information
0543         /// and uses the default rule (all possible segments)
0544         ///
0545         /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
0546         /// range it is much better to create one from another rather then indexing the same
0547         /// range twice.
0548         ///
0549         /// \note \ref rule() flags are not copied
0550         segment_index& operator=(const boundary_point_index<base_iterator>&);
0551 
0552         /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
0553         /// in range [begin,end) for locale \a loc.
0554         ///
0555         /// \note \ref rule() and \ref full_select() remain unchanged.
0556         void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
0557         {
0558             map_ = mapping_type(type, begin, end, loc);
0559         }
0560 
0561         /// Get the \ref iterator on the beginning of the segments range.
0562         ///
0563         /// Preconditions: the segment_index should have a mapping
0564         ///
0565         /// \note
0566         ///
0567         /// The returned iterator is invalidated by access to any non-const member functions of this object
0568         iterator begin() const
0569         {
0570             return iterator(true, &map_, mask_, full_select_);
0571         }
0572 
0573         /// Get the \ref iterator on the ending of the segments range.
0574         ///
0575         /// Preconditions: the segment_index should have a mapping
0576         ///
0577         /// The returned iterator is invalidated by access to any non-const member functions of this object
0578         iterator end() const
0579         {
0580             return iterator(false, &map_, mask_, full_select_);
0581         }
0582 
0583         /// Find a first valid segment following a position \a p.
0584         ///
0585         /// If \a p is inside a valid segment this segment is selected:
0586         ///
0587         /// For example: For \ref word %boundary analysis with \ref word_any rule():
0588         ///
0589         /// - "to| be or ", would point to "be",
0590         /// - "t|o be or ", would point to "to",
0591         /// - "to be or| ", would point to end.
0592         ///
0593         ///
0594         /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator
0595         /// to the text in the mapped range.
0596         ///
0597         /// The returned iterator is invalidated by access to any non-const member functions of this object
0598         iterator find(base_iterator p) const
0599         {
0600             return iterator(p, &map_, mask_, full_select_);
0601         }
0602 
0603         /// Get the mask of rules that are used
0604         rule_type rule() const
0605         {
0606             return mask_;
0607         }
0608         /// Set the mask of rules that are used
0609         void rule(rule_type v)
0610         {
0611             mask_ = v;
0612         }
0613 
0614         /// Get the full_select property value -  should segment include in the range
0615         /// values that not belong to specific \ref rule() or not.
0616         ///
0617         /// The default value is false.
0618         ///
0619         /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
0620         /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
0621         /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
0622         /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
0623         /// following part "are you?"
0624         bool full_select() const
0625         {
0626             return full_select_;
0627         }
0628 
0629         /// Set the full_select property value -  should segment include in the range
0630         /// values that not belong to specific \ref rule() or not.
0631         ///
0632         /// The default value is false.
0633         ///
0634         /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
0635         /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
0636         /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
0637         /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
0638         /// following part "are you?"
0639         void full_select(bool v)
0640         {
0641             full_select_ = v;
0642         }
0643 
0644     private:
0645         friend class boundary_point_index<base_iterator>;
0646         typedef detail::mapping<base_iterator> mapping_type;
0647         mapping_type map_;
0648         rule_type mask_;
0649         bool full_select_;
0650     };
0651 
0652     /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating
0653     /// over them.
0654     ///
0655     /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
0656     /// to the \ref boundary_point objects.
0657     ///
0658     /// It provides an option that affects selecting %boundary points according to different rules:
0659     /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific
0660     /// types of %boundary points like \ref sentence_term.
0661     ///
0662     /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default
0663     /// rule is used the %boundary points would be:
0664     ///
0665     /// - "|Hello! How\nare you?"
0666     /// - "Hello! |How\nare you?"
0667     /// - "Hello! How\n|are you?"
0668     /// - "Hello! How\nare you?|"
0669     ///
0670     /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be:
0671     ///
0672     /// - "|Hello! How\nare you?"
0673     /// - "Hello! |How\nare you?"
0674     /// - "Hello! How\nare you?|"
0675     ///
0676     /// Such that a %boundary point defined by a line feed character would be ignored.
0677     ///
0678     /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member
0679     /// function.
0680     ///
0681     /// \note
0682     /// -   Even an empty text range [x,x) considered to have a one %boundary point x.
0683     /// -   \a a and \a b points of the range [a,b) are always considered %boundary points
0684     ///     regardless the rules used.
0685     /// -   Changing any of the option \ref rule() or course re-indexing the text
0686     ///     invalidates existing iterators and they can't be used any more.
0687     /// -   boundary_point_index can be created from segment_index or other boundary_point_index that was created with
0688     ///     same \ref boundary_type.  This is very fast operation %as they shared same index
0689     ///     and it does not require its regeneration.
0690     ///
0691     /// \see
0692     ///
0693     /// - \ref segment_index
0694     /// - \ref boundary_point
0695     /// - \ref segment
0696     template<typename BaseIterator>
0697     class boundary_point_index {
0698     public:
0699         /// The type of the iterator used to iterate over the original text
0700         typedef BaseIterator base_iterator;
0701 
0702 #ifdef BOOST_LOCALE_DOXYGEN
0703         /// The bidirectional iterator that iterates over \ref value_type objects.
0704         ///
0705         /// -   The iterators may be invalidated by use of any non-const member function
0706         ///     including but not limited to \ref rule(rule_type) member function.
0707         /// -   The returned value_type object is valid %as long %as iterator points to it.
0708         ///     So this following code is wrong %as t used after p was updated:
0709         ///     \code
0710         ///     boundary_point_index<some_iterator>::iterator p=index.begin();
0711         ///     boundary_point<some_iterator> &t = *p;
0712         ///     ++p;
0713         ///     rule_type r = t->rule();
0714         ///     \endcode
0715         ///
0716         typedef unspecified_iterator_type iterator;
0717         /// \copydoc iterator
0718         typedef unspecified_iterator_type const_iterator;
0719 #else
0720         typedef detail::boundary_point_index_iterator<base_iterator> iterator;
0721         typedef detail::boundary_point_index_iterator<base_iterator> const_iterator;
0722 #endif
0723         /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
0724         /// an object that represents the selected \ref boundary_point "boundary point".
0725         typedef boundary_point<base_iterator> value_type;
0726 
0727         /// Default constructor.
0728         ///
0729         /// \note
0730         ///
0731         /// When this object is constructed by default it does not include a valid index, thus
0732         /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
0733         /// behavior
0734         boundary_point_index() : mask_(0xFFFFFFFFu) {}
0735 
0736         /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
0737         /// in range [begin,end) using a rule \a mask for locale \a loc.
0738         boundary_point_index(boundary_type type,
0739                              base_iterator begin,
0740                              base_iterator end,
0741                              rule_type mask,
0742                              const std::locale& loc = std::locale()) :
0743             map_(type, begin, end, loc),
0744             mask_(mask)
0745         {}
0746         /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
0747         /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc.
0748         boundary_point_index(boundary_type type,
0749                              base_iterator begin,
0750                              base_iterator end,
0751                              const std::locale& loc = std::locale()) :
0752             map_(type, begin, end, loc),
0753             mask_(0xFFFFFFFFu)
0754         {}
0755 
0756         /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
0757         /// and uses the default rule (all possible %boundary points)
0758         ///
0759         /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
0760         /// range it is much better to create one from another rather then indexing the same
0761         /// range twice.
0762         ///
0763         /// \note \ref rule() flags are not copied
0764         boundary_point_index(const segment_index<base_iterator>& other);
0765         /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
0766         /// and keeps the current \ref rule() unchanged
0767         ///
0768         /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
0769         /// range it is much better to create one from another rather then indexing the same
0770         /// range twice.
0771         ///
0772         /// \note \ref rule() flags are not copied
0773         boundary_point_index& operator=(const segment_index<base_iterator>& other);
0774 
0775         /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
0776         /// in range [begin,end) for locale \a loc.
0777         ///
0778         /// \note \ref rule() remains unchanged.
0779         void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
0780         {
0781             map_ = mapping_type(type, begin, end, loc);
0782         }
0783 
0784         /// Get the \ref iterator on the beginning of the %boundary points range.
0785         ///
0786         /// Preconditions: this boundary_point_index should have a mapping
0787         ///
0788         /// \note
0789         ///
0790         /// The returned iterator is invalidated by access to any non-const member functions of this object
0791         iterator begin() const
0792         {
0793             return iterator(true, &map_, mask_);
0794         }
0795 
0796         /// Get the \ref iterator on the ending of the %boundary points range.
0797         ///
0798         /// Preconditions: this boundary_point_index should have a mapping
0799         ///
0800         /// \note
0801         ///
0802         /// The returned iterator is invalidated by access to any non-const member functions of this object
0803         iterator end() const
0804         {
0805             return iterator(false, &map_, mask_);
0806         }
0807 
0808         /// Find a first valid %boundary point on a position \a p or following it.
0809         ///
0810         /// For example: For \ref word %boundary analysis of the text "to be or"
0811         ///
0812         /// - "|to be", would return %boundary point at "|to be",
0813         /// - "t|o be", would point to "to| be"
0814         ///
0815         /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator
0816         /// to the text in the mapped range.
0817         ///
0818         /// The returned iterator is invalidated by access to any non-const member functions of this object
0819         iterator find(base_iterator p) const
0820         {
0821             return iterator(p, &map_, mask_);
0822         }
0823 
0824         /// Get the mask of rules that are used
0825         rule_type rule() const
0826         {
0827             return mask_;
0828         }
0829         /// Set the mask of rules that are used
0830         void rule(rule_type v)
0831         {
0832             mask_ = v;
0833         }
0834 
0835     private:
0836         friend class segment_index<base_iterator>;
0837         typedef detail::mapping<base_iterator> mapping_type;
0838         mapping_type map_;
0839         rule_type mask_;
0840     };
0841 
0842     /// \cond INTERNAL
0843     template<typename BaseIterator>
0844     segment_index<BaseIterator>::segment_index(const boundary_point_index<BaseIterator>& other) :
0845         map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
0846     {}
0847 
0848     template<typename BaseIterator>
0849     boundary_point_index<BaseIterator>::boundary_point_index(const segment_index<BaseIterator>& other) :
0850         map_(other.map_), mask_(0xFFFFFFFFu)
0851     {}
0852 
0853     template<typename BaseIterator>
0854     segment_index<BaseIterator>& segment_index<BaseIterator>::operator=(const boundary_point_index<BaseIterator>& other)
0855     {
0856         map_ = other.map_;
0857         return *this;
0858     }
0859 
0860     template<typename BaseIterator>
0861     boundary_point_index<BaseIterator>&
0862     boundary_point_index<BaseIterator>::operator=(const segment_index<BaseIterator>& other)
0863     {
0864         map_ = other.map_;
0865         return *this;
0866     }
0867     /// \endcond
0868 
0869     typedef segment_index<std::string::const_iterator> ssegment_index;   ///< convenience typedef
0870     typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef
0871 #ifndef BOOST_LOCALE_NO_CXX20_STRING8
0872     typedef segment_index<std::u8string::const_iterator> u8ssegment_index; ///< convenience typedef
0873 #endif
0874 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0875     typedef segment_index<std::u16string::const_iterator> u16ssegment_index; ///< convenience typedef
0876 #endif
0877 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0878     typedef segment_index<std::u32string::const_iterator> u32ssegment_index; ///< convenience typedef
0879 #endif
0880 
0881     typedef segment_index<const char*> csegment_index;     ///< convenience typedef
0882     typedef segment_index<const wchar_t*> wcsegment_index; ///< convenience typedef
0883 #ifdef __cpp_char8_t
0884     typedef segment_index<const char8_t*> u8csegment_index; ///< convenience typedef
0885 #endif
0886 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0887     typedef segment_index<const char16_t*> u16csegment_index; ///< convenience typedef
0888 #endif
0889 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0890     typedef segment_index<const char32_t*> u32csegment_index; ///< convenience typedef
0891 #endif
0892 
0893     typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;   ///< convenience typedef
0894     typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index; ///< convenience typedef
0895 #ifndef BOOST_LOCALE_NO_CXX20_STRING8
0896     typedef boundary_point_index<std::u8string::const_iterator> u8sboundary_point_index; ///< convenience typedef
0897 #endif
0898 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0899     typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index; ///< convenience typedef
0900 #endif
0901 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0902     typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index; ///< convenience typedef
0903 #endif
0904 
0905     typedef boundary_point_index<const char*> cboundary_point_index;     ///< convenience typedef
0906     typedef boundary_point_index<const wchar_t*> wcboundary_point_index; ///< convenience typedef
0907 #ifdef __cpp_char8_t
0908     typedef boundary_point_index<const char8_t*> u8cboundary_point_index; ///< convenience typedef
0909 #endif
0910 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0911     typedef boundary_point_index<const char16_t*> u16cboundary_point_index; ///< convenience typedef
0912 #endif
0913 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0914     typedef boundary_point_index<const char32_t*> u32cboundary_point_index; ///< convenience typedef
0915 #endif
0916 
0917 }}} // namespace boost::locale::boundary
0918 
0919 ///
0920 /// \example boundary.cpp
0921 /// Example of using segment_index
0922 /// \example wboundary.cpp
0923 /// Example of using segment_index over wide strings
0924 ///
0925 
0926 #ifdef BOOST_MSVC
0927 #    pragma warning(pop)
0928 #endif
0929 
0930 #endif