Skip to content

Commit e9912d8

Browse files
authored
<regex>: Cache bitmasks of negated character classes during matching (#5487)
1 parent 2ce34fd commit e9912d8

File tree

1 file changed

+36
-66
lines changed

1 file changed

+36
-66
lines changed

stl/inc/regex

Lines changed: 36 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,17 @@ public:
16571657
_Longest((_Re->_Flags & _Fl_longest) && !(_Mf & regex_constants::match_any)), _Traits(_Tr) {
16581658
_Loop_vals.resize(_Re->_Loops);
16591659
_Adl_verify_range(_Pfirst, _Plast);
1660+
if (_Re->_Flags & _Fl_begin_needs_w) {
1661+
_Char_class_w = _Lookup_char_class(static_cast<_Elem>('W'));
1662+
}
1663+
1664+
if (_Re->_Flags & _Fl_begin_needs_s) {
1665+
_Char_class_s = _Lookup_char_class(static_cast<_Elem>('S'));
1666+
}
1667+
1668+
if (_Re->_Flags & _Fl_begin_needs_d) {
1669+
_Char_class_d = _Lookup_char_class(static_cast<_Elem>('D'));
1670+
}
16601671
}
16611672

16621673
void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
@@ -1739,7 +1750,7 @@ private:
17391750
bool _Do_rep(_Node_rep*, bool, int);
17401751
bool _Do_rep_first(_Node_rep*);
17411752
bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
1742-
bool _Do_class(_Node_base*);
1753+
_It _Do_class(_Node_base*, _It);
17431754
bool _Match_pat(_Node_base*);
17441755
bool _Better_match();
17451756
bool _Is_wbound() const;
@@ -1758,6 +1769,9 @@ private:
17581769
bool _Full;
17591770
long _Max_complexity_count;
17601771
long _Max_stack_count;
1772+
typename _RxTraits::char_class_type _Char_class_w{};
1773+
typename _RxTraits::char_class_type _Char_class_s{};
1774+
typename _RxTraits::char_class_type _Char_class_d{};
17611775

17621776
public:
17631777
_Matcher2& operator=(const _Matcher2&) = delete;
@@ -3679,23 +3693,24 @@ _BidIt _Lookup_coll2(_Elem _First_ch, _BidIt _First, const _BidIt _Last, const _
36793693
}
36803694

36813695
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3682-
bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_class(_Node_base* _Nx) { // apply bracket expression
3696+
_It _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_class(_Node_base* _Nx, _It _First) {
3697+
// apply bracket expression
36833698
bool _Found;
3684-
_Elem _Ch = *_Tgt_state._Cur;
3699+
_Elem _Ch = *_First;
36853700
if (_Sflags & regex_constants::icase) {
36863701
_Ch = _Traits.translate_nocase(_Ch);
36873702
} else if (_Sflags & regex_constants::collate) {
36883703
_Ch = _Traits.translate(_Ch);
36893704
}
36903705
const auto _UCh = static_cast<typename _RxTraits::_Uelem>(_Ch);
36913706

3692-
_It _Res0 = _Tgt_state._Cur;
3707+
_It _Res0 = _First;
36933708
++_Res0;
36943709
_It _Resx;
36953710
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx);
36963711
if (_Node->_Coll
3697-
&& (_Resx = _STD _Lookup_coll2(_Ch, _Tgt_state._Cur, _End, _Node->_Coll, _Traits, _Sflags))
3698-
!= _Tgt_state._Cur) { // check for collation element
3712+
&& (_Resx = _STD _Lookup_coll2(_Ch, _First, _End, _Node->_Coll, _Traits, _Sflags))
3713+
!= _First) { // check for collation element
36993714
_Res0 = _Resx;
37003715
_Found = true;
37013716
} else if (_Node->_Ranges
@@ -3715,14 +3730,11 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_class(_Node_base* _Nx
37153730
_Found = true;
37163731
} else if (_Node->_Equiv && _STD _Lookup_equiv2(_Ch, _Node->_Equiv, _Traits)) {
37173732
_Found = true;
3718-
} else if ((_Node->_Flags & _Fl_class_negated_w)
3719-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('W')))) {
3733+
} else if ((_Node->_Flags & _Fl_class_negated_w) && !_Traits.isctype(_Ch, _Char_class_w)) {
37203734
_Found = true;
3721-
} else if ((_Node->_Flags & _Fl_class_negated_s)
3722-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('S')))) {
3735+
} else if ((_Node->_Flags & _Fl_class_negated_s) && !_Traits.isctype(_Ch, _Char_class_s)) {
37233736
_Found = true;
3724-
} else if ((_Node->_Flags & _Fl_class_negated_d)
3725-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('D')))) {
3737+
} else if ((_Node->_Flags & _Fl_class_negated_d) && !_Traits.isctype(_Ch, _Char_class_d)) {
37263738
_Found = true;
37273739
} else {
37283740
_Found = false;
@@ -3731,10 +3743,9 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_class(_Node_base* _Nx
37313743
const bool _Negated = (_Node->_Flags & _Fl_negate) != 0;
37323744

37333745
if (_Found == _Negated) {
3734-
return false;
3735-
} else { // record result
3736-
_Tgt_state._Cur = _Res0;
3737-
return true;
3746+
return _First;
3747+
} else {
3748+
return _Res0;
37383749
}
37393750
}
37403751

@@ -3868,7 +3879,12 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
38683879

38693880
case _N_class:
38703881
{ // check for bracket expression match
3871-
_Failed = _Tgt_state._Cur == _End || !_Do_class(_Nx);
3882+
_It _Res;
3883+
if (_Tgt_state._Cur != _End && (_Res = _Do_class(_Nx, _Tgt_state._Cur)) != _Tgt_state._Cur) {
3884+
_Tgt_state._Cur = _Res;
3885+
} else {
3886+
_Failed = true;
3887+
}
38723888
break;
38733889
}
38743890

@@ -4046,56 +4062,10 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
40464062

40474063
case _N_class:
40484064
{ // check for string match
4049-
for (; _First_arg != _Last; ++_First_arg) { // look for starting match
4050-
bool _Found;
4051-
_Elem _Ch = *_First_arg;
4052-
if (_Sflags & regex_constants::icase) {
4053-
_Ch = _Traits.translate_nocase(_Ch);
4054-
} else if (_Sflags & regex_constants::collate) {
4055-
_Ch = _Traits.translate(_Ch);
4056-
}
4057-
const auto _UCh = static_cast<typename _RxTraits::_Uelem>(_Ch);
4058-
4059-
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx);
4060-
4061-
if (_Node->_Coll
4062-
&& _STD _Lookup_coll2(_Ch, _First_arg, _Last, _Node->_Coll, _Traits, _Sflags) != _First_arg) {
4063-
_Found = true;
4064-
} else if (_Node->_Ranges
4065-
&& (_Sflags & regex_constants::collate
4066-
? _STD _Lookup_collating_range(_Ch, _Node->_Ranges, _Traits)
4067-
: _STD _Lookup_range(_UCh, _Node->_Ranges))) {
4068-
_Found = true;
4069-
} else if (_UCh < _Bmp_max) {
4070-
_Found = _Node->_Small && _Node->_Small->_Find(_UCh);
4071-
} else if (_Node->_Large
4072-
&& _STD find(_Node->_Large->_Str(), _Node->_Large->_Str() + _Node->_Large->_Size(), _Ch)
4073-
!= _Node->_Large->_Str() + _Node->_Large->_Size()) {
4074-
_Found = true;
4075-
} else if (_Node->_Classes != typename _RxTraits::char_class_type{}
4076-
&& _Traits.isctype(_Ch, _Node->_Classes)) {
4077-
_Found = true;
4078-
} else if ((_Node->_Flags & _Fl_class_cl_all_bits)
4079-
&& _Traits.isctype(_Ch, static_cast<typename _RxTraits::char_class_type>(-1))) {
4080-
_Found = true;
4081-
} else if (_Node->_Equiv && _STD _Lookup_equiv2(_Ch, _Node->_Equiv, _Traits)) {
4082-
_Found = true;
4083-
} else if ((_Node->_Flags & _Fl_class_negated_w)
4084-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('W')))) {
4085-
_Found = true;
4086-
} else if ((_Node->_Flags & _Fl_class_negated_s)
4087-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('S')))) {
4088-
_Found = true;
4089-
} else if ((_Node->_Flags & _Fl_class_negated_d)
4090-
&& !_Traits.isctype(_Ch, _Lookup_char_class(static_cast<_Elem>('D')))) {
4091-
_Found = true;
4092-
} else {
4093-
_Found = false;
4094-
}
4065+
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx);
40954066

4096-
const bool _Negated = (_Node->_Flags & _Fl_negate) != 0;
4097-
4098-
if (_Found != _Negated) {
4067+
for (; _First_arg != _Last; ++_First_arg) { // look for starting match
4068+
if (_Do_class(_Node, _First_arg) != _First_arg) {
40994069
return _First_arg;
41004070
}
41014071
}

0 commit comments

Comments
 (0)