File indexing completed on 2025-01-30 09:44:56
0001
0002
0003
0004
0005
0006
0007 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
0008 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
0009
0010 #include <boost/locale/boundary/boundary_point.hpp>
0011 #include <boost/locale/boundary/facets.hpp>
0012 #include <boost/locale/boundary/segment.hpp>
0013 #include <boost/locale/boundary/types.hpp>
0014 #include <boost/iterator/iterator_facade.hpp>
0015 #include <algorithm>
0016 #include <cstdint>
0017 #include <iterator>
0018 #include <locale>
0019 #include <memory>
0020 #include <stdexcept>
0021 #include <string>
0022 #include <type_traits>
0023 #include <vector>
0024
0025 #ifdef BOOST_MSVC
0026 # pragma warning(push)
0027 # pragma warning(disable : 4275 4251 4231 4660)
0028 #endif
0029
0030 namespace boost { namespace locale { namespace boundary {
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042 namespace detail {
0043 template<typename Char>
0044 const boundary_indexing<Char>& get_boundary_indexing(const std::locale& l)
0045 {
0046 using facet_type = boundary_indexing<Char>;
0047 if(!std::has_facet<facet_type>(l))
0048 throw std::runtime_error("Locale was generated without segmentation support!");
0049 return std::use_facet<facet_type>(l);
0050 }
0051
0052 template<typename IteratorType,
0053 typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
0054 struct mapping_traits {
0055 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
0056 static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
0057 {
0058 std::basic_string<char_type> str(b, e);
0059 return get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
0060 }
0061 };
0062
0063 template<typename CharType, typename SomeIteratorType>
0064 struct linear_iterator_traits {
0065 static constexpr bool is_linear =
0066 std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
0067 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
0068 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
0069 || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
0070 || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
0071 };
0072
0073 template<typename IteratorType>
0074 struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
0075 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
0076
0077 static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
0078 {
0079 index_type result;
0080
0081
0082
0083
0084
0085
0086 if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
0087 const char_type* begin = &*b;
0088 const char_type* end = begin + (e - b);
0089 index_type tmp = get_boundary_indexing<char_type>(l).map(t, begin, end);
0090 result.swap(tmp);
0091 } else {
0092 std::basic_string<char_type> str(b, e);
0093 index_type tmp = get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
0094 result.swap(tmp);
0095 }
0096 return result;
0097 }
0098 };
0099
0100 template<typename BaseIterator>
0101 class mapping {
0102 public:
0103 typedef BaseIterator base_iterator;
0104 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
0105
0106 mapping(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc) :
0107 index_(new index_type()), begin_(begin), end_(end)
0108 {
0109 index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
0110 index_->swap(idx);
0111 }
0112
0113 mapping() {}
0114
0115 const index_type& index() const { return *index_; }
0116
0117 base_iterator begin() const { return begin_; }
0118
0119 base_iterator end() const { return end_; }
0120
0121 private:
0122 std::shared_ptr<index_type> index_;
0123 base_iterator begin_, end_;
0124 };
0125
0126 template<typename BaseIterator>
0127 class segment_index_iterator : public boost::iterator_facade<segment_index_iterator<BaseIterator>,
0128 segment<BaseIterator>,
0129 boost::bidirectional_traversal_tag,
0130 const segment<BaseIterator>&> {
0131 public:
0132 typedef BaseIterator base_iterator;
0133 typedef mapping<base_iterator> mapping_type;
0134 typedef segment<base_iterator> segment_type;
0135
0136 segment_index_iterator() : current_(0, 0), map_(nullptr), mask_(0), full_select_(false) {}
0137
0138 segment_index_iterator(base_iterator p, const mapping_type* map, rule_type mask, bool full_select) :
0139 map_(map), mask_(mask), full_select_(full_select)
0140 {
0141 set(p);
0142 }
0143 segment_index_iterator(bool is_begin, const mapping_type* map, rule_type mask, bool full_select) :
0144 map_(map), mask_(mask), full_select_(full_select)
0145 {
0146 if(is_begin)
0147 set_begin();
0148 else
0149 set_end();
0150 }
0151
0152 const segment_type& dereference() const { return value_; }
0153
0154 bool equal(const segment_index_iterator& other) const
0155 {
0156 return map_ == other.map_ && current_.second == other.current_.second;
0157 }
0158
0159 void increment()
0160 {
0161 std::pair<size_t, size_t> next = current_;
0162 if(full_select_) {
0163 next.first = next.second;
0164 while(next.second < size()) {
0165 next.second++;
0166 if(valid_offset(next.second))
0167 break;
0168 }
0169 if(next.second == size())
0170 next.first = next.second - 1;
0171 } else {
0172 while(next.second < size()) {
0173 next.first = next.second;
0174 next.second++;
0175 if(valid_offset(next.second))
0176 break;
0177 }
0178 }
0179 update_current(next);
0180 }
0181
0182 void decrement()
0183 {
0184 std::pair<size_t, size_t> next = current_;
0185 if(full_select_) {
0186 while(next.second > 1) {
0187 next.second--;
0188 if(valid_offset(next.second))
0189 break;
0190 }
0191 next.first = next.second;
0192 while(next.first > 0) {
0193 next.first--;
0194 if(valid_offset(next.first))
0195 break;
0196 }
0197 } else {
0198 while(next.second > 1) {
0199 next.second--;
0200 if(valid_offset(next.second))
0201 break;
0202 }
0203 next.first = next.second - 1;
0204 }
0205 update_current(next);
0206 }
0207
0208 private:
0209 void set_end()
0210 {
0211 current_.first = size() - 1;
0212 current_.second = size();
0213 value_ = segment_type(map_->end(), map_->end(), 0);
0214 }
0215 void set_begin()
0216 {
0217 current_.first = current_.second = 0;
0218 value_ = segment_type(map_->begin(), map_->begin(), 0);
0219 increment();
0220 }
0221
0222 void set(base_iterator p)
0223 {
0224 const auto b = map_->index().begin(), e = map_->index().end();
0225 auto boundary_point = std::upper_bound(b, e, break_info(std::distance(map_->begin(), p)));
0226 while(boundary_point != e && (boundary_point->rule & mask_) == 0)
0227 ++boundary_point;
0228
0229 current_.first = current_.second = boundary_point - b;
0230
0231 if(full_select_) {
0232 while(current_.first > 0) {
0233 current_.first--;
0234 if(valid_offset(current_.first))
0235 break;
0236 }
0237 } else {
0238 if(current_.first > 0)
0239 current_.first--;
0240 }
0241 value_.first = map_->begin();
0242 std::advance(value_.first, get_offset(current_.first));
0243 value_.second = value_.first;
0244 std::advance(value_.second, get_offset(current_.second) - get_offset(current_.first));
0245
0246 update_rule();
0247 }
0248
0249 void update_current(std::pair<size_t, size_t> pos)
0250 {
0251 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
0252 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
0253 std::advance(value_.first, first_diff);
0254 std::advance(value_.second, second_diff);
0255 current_ = pos;
0256 update_rule();
0257 }
0258
0259 void update_rule()
0260 {
0261 if(current_.second != size())
0262 value_.rule(index()[current_.second].rule);
0263 }
0264 size_t get_offset(size_t ind) const
0265 {
0266 if(ind == size())
0267 return index().back().offset;
0268 return index()[ind].offset;
0269 }
0270
0271 bool valid_offset(size_t offset) const
0272 {
0273 return offset == 0 || offset == size()
0274 || (index()[offset].rule & mask_) != 0;
0275 }
0276
0277 size_t size() const { return index().size(); }
0278
0279 const index_type& index() const { return map_->index(); }
0280
0281 segment_type value_;
0282 std::pair<size_t, size_t> current_;
0283 const mapping_type* map_;
0284 rule_type mask_;
0285 bool full_select_;
0286 };
0287
0288 template<typename BaseIterator>
0289 class boundary_point_index_iterator : public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
0290 boundary_point<BaseIterator>,
0291 boost::bidirectional_traversal_tag,
0292 const boundary_point<BaseIterator>&> {
0293 public:
0294 typedef BaseIterator base_iterator;
0295 typedef mapping<base_iterator> mapping_type;
0296 typedef boundary_point<base_iterator> boundary_point_type;
0297
0298 boundary_point_index_iterator() : current_(0), map_(nullptr), mask_(0) {}
0299
0300 boundary_point_index_iterator(bool is_begin, const mapping_type* map, rule_type mask) :
0301 map_(map), mask_(mask)
0302 {
0303 if(is_begin)
0304 set_begin();
0305 else
0306 set_end();
0307 }
0308 boundary_point_index_iterator(base_iterator p, const mapping_type* map, rule_type mask) :
0309 map_(map), mask_(mask)
0310 {
0311 set(p);
0312 }
0313
0314 const boundary_point_type& dereference() const { return value_; }
0315
0316 bool equal(const boundary_point_index_iterator& other) const
0317 {
0318 return map_ == other.map_ && current_ == other.current_;
0319 }
0320
0321 void increment()
0322 {
0323 size_t next = current_;
0324 while(next < size()) {
0325 next++;
0326 if(valid_offset(next))
0327 break;
0328 }
0329 update_current(next);
0330 }
0331
0332 void decrement()
0333 {
0334 size_t next = current_;
0335 while(next > 0) {
0336 next--;
0337 if(valid_offset(next))
0338 break;
0339 }
0340 update_current(next);
0341 }
0342
0343 private:
0344 void set_end()
0345 {
0346 current_ = size();
0347 value_ = boundary_point_type(map_->end(), 0);
0348 }
0349 void set_begin()
0350 {
0351 current_ = 0;
0352 value_ = boundary_point_type(map_->begin(), 0);
0353 }
0354
0355 void set(base_iterator p)
0356 {
0357 size_t dist = std::distance(map_->begin(), p);
0358
0359 const auto b = index().begin(), e = index().end();
0360 const auto ptr = std::lower_bound(b, e, break_info(dist));
0361
0362 if(ptr == e)
0363 current_ = size() - 1;
0364 else
0365 current_ = ptr - b;
0366
0367 while(!valid_offset(current_))
0368 current_++;
0369
0370 std::ptrdiff_t diff = get_offset(current_) - dist;
0371 std::advance(p, diff);
0372 value_.iterator(p);
0373 update_rule();
0374 }
0375
0376 void update_current(size_t pos)
0377 {
0378 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
0379 base_iterator i = value_.iterator();
0380 std::advance(i, diff);
0381 current_ = pos;
0382 value_.iterator(i);
0383 update_rule();
0384 }
0385
0386 void update_rule()
0387 {
0388 if(current_ != size())
0389 value_.rule(index()[current_].rule);
0390 }
0391 size_t get_offset(size_t ind) const
0392 {
0393 if(ind == size())
0394 return index().back().offset;
0395 return index()[ind].offset;
0396 }
0397
0398 bool valid_offset(size_t offset) const
0399 {
0400 return offset == 0 || offset + 1 >= size()
0401 || (index()[offset].rule & mask_) != 0;
0402 }
0403
0404 size_t size() const { return index().size(); }
0405
0406 const index_type& index() const { return map_->index(); }
0407
0408 boundary_point_type value_;
0409 size_t current_;
0410 const mapping_type* map_;
0411 rule_type mask_;
0412 };
0413
0414 }
0415
0416
0417
0418 template<typename BaseIterator>
0419 class segment_index;
0420
0421 template<typename BaseIterator>
0422 class boundary_point_index;
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474 template<typename BaseIterator>
0475 class segment_index {
0476 public:
0477
0478 typedef BaseIterator base_iterator;
0479
0480 #ifdef BOOST_LOCALE_DOXYGEN
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493 typedef unspecified_iterator_type iterator;
0494
0495 typedef unspecified_iterator_type const_iterator;
0496 #else
0497 typedef detail::segment_index_iterator<base_iterator> iterator;
0498 typedef detail::segment_index_iterator<base_iterator> const_iterator;
0499 #endif
0500
0501
0502 typedef segment<base_iterator> value_type;
0503
0504
0505
0506
0507
0508
0509
0510
0511 segment_index() : mask_(0xFFFFFFFFu), full_select_(false) {}
0512
0513
0514 segment_index(boundary_type type,
0515 base_iterator begin,
0516 base_iterator end,
0517 rule_type mask,
0518 const std::locale& loc = std::locale()) :
0519 map_(type, begin, end, loc),
0520 mask_(mask), full_select_(false)
0521 {}
0522
0523
0524 segment_index(boundary_type type,
0525 base_iterator begin,
0526 base_iterator end,
0527 const std::locale& loc = std::locale()) :
0528 map_(type, begin, end, loc),
0529 mask_(0xFFFFFFFFu), full_select_(false)
0530 {}
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540 segment_index(const boundary_point_index<base_iterator>&);
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550 segment_index& operator=(const boundary_point_index<base_iterator>&);
0551
0552
0553
0554
0555
0556 void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
0557 {
0558 map_ = mapping_type(type, begin, end, loc);
0559 }
0560
0561
0562
0563
0564
0565
0566
0567
0568 iterator begin() const
0569 {
0570 return iterator(true, &map_, mask_, full_select_);
0571 }
0572
0573
0574
0575
0576
0577
0578 iterator end() const
0579 {
0580 return iterator(false, &map_, mask_, full_select_);
0581 }
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598 iterator find(base_iterator p) const
0599 {
0600 return iterator(p, &map_, mask_, full_select_);
0601 }
0602
0603
0604 rule_type rule() const
0605 {
0606 return mask_;
0607 }
0608
0609 void rule(rule_type v)
0610 {
0611 mask_ = v;
0612 }
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624 bool full_select() const
0625 {
0626 return full_select_;
0627 }
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639 void full_select(bool v)
0640 {
0641 full_select_ = v;
0642 }
0643
0644 private:
0645 friend class boundary_point_index<base_iterator>;
0646 typedef detail::mapping<base_iterator> mapping_type;
0647 mapping_type map_;
0648 rule_type mask_;
0649 bool full_select_;
0650 };
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694
0695
0696 template<typename BaseIterator>
0697 class boundary_point_index {
0698 public:
0699
0700 typedef BaseIterator base_iterator;
0701
0702 #ifdef BOOST_LOCALE_DOXYGEN
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712
0713
0714
0715
0716 typedef unspecified_iterator_type iterator;
0717
0718 typedef unspecified_iterator_type const_iterator;
0719 #else
0720 typedef detail::boundary_point_index_iterator<base_iterator> iterator;
0721 typedef detail::boundary_point_index_iterator<base_iterator> const_iterator;
0722 #endif
0723
0724
0725 typedef boundary_point<base_iterator> value_type;
0726
0727
0728
0729
0730
0731
0732
0733
0734 boundary_point_index() : mask_(0xFFFFFFFFu) {}
0735
0736
0737
0738 boundary_point_index(boundary_type type,
0739 base_iterator begin,
0740 base_iterator end,
0741 rule_type mask,
0742 const std::locale& loc = std::locale()) :
0743 map_(type, begin, end, loc),
0744 mask_(mask)
0745 {}
0746
0747
0748 boundary_point_index(boundary_type type,
0749 base_iterator begin,
0750 base_iterator end,
0751 const std::locale& loc = std::locale()) :
0752 map_(type, begin, end, loc),
0753 mask_(0xFFFFFFFFu)
0754 {}
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764 boundary_point_index(const segment_index<base_iterator>& other);
0765
0766
0767
0768
0769
0770
0771
0772
0773 boundary_point_index& operator=(const segment_index<base_iterator>& other);
0774
0775
0776
0777
0778
0779 void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
0780 {
0781 map_ = mapping_type(type, begin, end, loc);
0782 }
0783
0784
0785
0786
0787
0788
0789
0790
0791 iterator begin() const
0792 {
0793 return iterator(true, &map_, mask_);
0794 }
0795
0796
0797
0798
0799
0800
0801
0802
0803 iterator end() const
0804 {
0805 return iterator(false, &map_, mask_);
0806 }
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819 iterator find(base_iterator p) const
0820 {
0821 return iterator(p, &map_, mask_);
0822 }
0823
0824
0825 rule_type rule() const
0826 {
0827 return mask_;
0828 }
0829
0830 void rule(rule_type v)
0831 {
0832 mask_ = v;
0833 }
0834
0835 private:
0836 friend class segment_index<base_iterator>;
0837 typedef detail::mapping<base_iterator> mapping_type;
0838 mapping_type map_;
0839 rule_type mask_;
0840 };
0841
0842
0843 template<typename BaseIterator>
0844 segment_index<BaseIterator>::segment_index(const boundary_point_index<BaseIterator>& other) :
0845 map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
0846 {}
0847
0848 template<typename BaseIterator>
0849 boundary_point_index<BaseIterator>::boundary_point_index(const segment_index<BaseIterator>& other) :
0850 map_(other.map_), mask_(0xFFFFFFFFu)
0851 {}
0852
0853 template<typename BaseIterator>
0854 segment_index<BaseIterator>& segment_index<BaseIterator>::operator=(const boundary_point_index<BaseIterator>& other)
0855 {
0856 map_ = other.map_;
0857 return *this;
0858 }
0859
0860 template<typename BaseIterator>
0861 boundary_point_index<BaseIterator>&
0862 boundary_point_index<BaseIterator>::operator=(const segment_index<BaseIterator>& other)
0863 {
0864 map_ = other.map_;
0865 return *this;
0866 }
0867
0868
0869 typedef segment_index<std::string::const_iterator> ssegment_index;
0870 typedef segment_index<std::wstring::const_iterator> wssegment_index;
0871 #ifndef BOOST_LOCALE_NO_CXX20_STRING8
0872 typedef segment_index<std::u8string::const_iterator> u8ssegment_index;
0873 #endif
0874 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0875 typedef segment_index<std::u16string::const_iterator> u16ssegment_index;
0876 #endif
0877 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0878 typedef segment_index<std::u32string::const_iterator> u32ssegment_index;
0879 #endif
0880
0881 typedef segment_index<const char*> csegment_index;
0882 typedef segment_index<const wchar_t*> wcsegment_index;
0883 #ifdef __cpp_char8_t
0884 typedef segment_index<const char8_t*> u8csegment_index;
0885 #endif
0886 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0887 typedef segment_index<const char16_t*> u16csegment_index;
0888 #endif
0889 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0890 typedef segment_index<const char32_t*> u32csegment_index;
0891 #endif
0892
0893 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;
0894 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;
0895 #ifndef BOOST_LOCALE_NO_CXX20_STRING8
0896 typedef boundary_point_index<std::u8string::const_iterator> u8sboundary_point_index;
0897 #endif
0898 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0899 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;
0900 #endif
0901 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0902 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;
0903 #endif
0904
0905 typedef boundary_point_index<const char*> cboundary_point_index;
0906 typedef boundary_point_index<const wchar_t*> wcboundary_point_index;
0907 #ifdef __cpp_char8_t
0908 typedef boundary_point_index<const char8_t*> u8cboundary_point_index;
0909 #endif
0910 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
0911 typedef boundary_point_index<const char16_t*> u16cboundary_point_index;
0912 #endif
0913 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
0914 typedef boundary_point_index<const char32_t*> u32cboundary_point_index;
0915 #endif
0916
0917 }}}
0918
0919
0920
0921
0922
0923
0924
0925
0926 #ifdef BOOST_MSVC
0927 # pragma warning(pop)
0928 #endif
0929
0930 #endif