Skip to content

Commit

Permalink
Improve RESearch for DBCS code pages to avoid matching from trailing …
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Oct 16, 2023
1 parent 5be563a commit e2818f5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
20 changes: 12 additions & 8 deletions scintilla/src/RESearch.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ RESearch::RESearch(const CharClassify *charClassTable) {
lineStartPos = 0;
lineEndPos = 0;
sta = NOP; /* status of lastpat */
failure = 0;
previousFlags = FindOption::None;
constexpr unsigned char nul = 0;
std::fill(bittab, std::end(bittab), nul);
Expand Down Expand Up @@ -758,8 +757,6 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
Sci::Position ep = NOTFOUND;
const char * const ap = nfa;

failure = 0;

Clear();

switch (*ap) {
Expand Down Expand Up @@ -787,8 +784,15 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
default: /* regular matching all the way. */
while (lp < endp) {
ep = PMatch(ci, lp, endp, ap);
if (ep != NOTFOUND)
break;
if (ep != NOTFOUND) {
// fix match started from middle of character like DBCS trailing ASCII byte
const Sci::Position pos = ci.MovePositionOutsideChar(lp, -1);
if (pos != lp) {
ep = NOTFOUND;
} else {
break;
}
}
lp++;
}
break;
Expand All @@ -809,7 +813,6 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
}
}

lp = ci.MovePositionOutsideChar(lp, -1);
ep = ci.MovePositionOutsideChar(ep, 1);
bopat[0] = lp;
eopat[0] = ep;
Expand Down Expand Up @@ -885,7 +888,9 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
return NOTFOUND;
break;
case BOT:
lp = ci.MovePositionOutsideChar(lp, -1);
if (lp != ci.MovePositionOutsideChar(lp, -1)) {
return NOTFOUND;
}
bopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case EOT:
Expand Down Expand Up @@ -944,7 +949,6 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
n = CCLSKIP;
break;
default:
failure = true;
//re_fail("closure: bad nfa.", *ap);
return NOTFOUND;
}
Expand Down
1 change: 0 additions & 1 deletion scintilla/src/RESearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class RESearch {
Sci::Position tagstk[MAXTAG]; /* subpat tag stack */
char nfa[MAXNFA]; /* automaton */
int sta;
int failure;

// cache for previous pattern to avoid recompile
Scintilla::FindOption previousFlags;
Expand Down

0 comments on commit e2818f5

Please sign in to comment.