Skip to content

Commit

Permalink
Fix regex match bug for \<, \> and \>$, issue #161.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Oct 13, 2023
1 parent b70d686 commit 1bfaf31
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 18 deletions.
9 changes: 7 additions & 2 deletions scintilla/src/Document.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -3389,8 +3389,11 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos,
const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
Sci::Position startOfLine = doc->LineStart(line);
Sci::Position endOfLine = doc->LineEnd(line);
const Sci::Position lineStartPos = doc->LineStart(line);
const Sci::Position lineEndPos = doc->LineEnd(line);
Sci::Position startOfLine = lineStartPos;
Sci::Position endOfLine = lineEndPos;

if (resr.increment > 0) {
if (line == resr.lineRangeStart) {
if ((resr.startPos != startOfLine) && searchforLineStart)
Expand All @@ -3416,6 +3419,8 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos,
}

const DocumentIndexer di(doc, endOfLine);
search.lineStartPos = lineStartPos;
search.lineEndPos = lineEndPos;
int success = search.Execute(di, startOfLine, endOfLine);
if (success) {
pos = search.bopat[0];
Expand Down
40 changes: 26 additions & 14 deletions scintilla/src/RESearch.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,11 @@ using namespace Scintilla::Internal;
*/

RESearch::RESearch(const CharClassify *charClassTable) {
failure = 0;
charClass = charClassTable;
lineStartPos = 0;
lineEndPos = 0;
sta = NOP; /* status of lastpat */
bol = 0;
failure = 0;
previousFlags = FindOption::None;
constexpr unsigned char nul = 0;
std::fill(bittab, std::end(bittab), nul);
Expand Down Expand Up @@ -761,9 +762,8 @@ const char *RESearch::DoCompile(const char *pattern, Sci::Position length, bool
*/
int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp) {
Sci::Position ep = NOTFOUND;
char *ap = nfa;
const char *ap = nfa;

bol = lp;
failure = 0;

Clear();
Expand All @@ -774,15 +774,15 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
ep = PMatch(ci, lp, endp, ap);
break;
case EOL: /* just searching for end of line normal path doesn't work */
if (*(ap + 1) == END) {
if (endp == lineEndPos && ap[1] == END) {
lp = endp;
ep = lp;
break;
} else {
return 0;
}
case CHR: { /* ordinary char: locate it fast */
const unsigned char c = *(ap + 1);
const unsigned char c = ap[1];
while ((lp < endp) && (static_cast<unsigned char>(ci.CharAt(lp)) != c)) {
lp++;
}
Expand All @@ -801,8 +801,20 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
case END: /* munged automaton. fail always */
return 0;
}
if (ep == NOTFOUND)
return 0;
if (ep == NOTFOUND) {
/* similar to EOL, match EOW at line end */
if (endp == lineEndPos && *ap == EOW) {
++ap;
if ((*ap == END || ((*ap == EOL && ap[1] == END))) && iswordc(ci.CharAt(lp - 1))) {
lp = endp;
ep = lp;
} else {
return 0;
}
} else {
return 0;
}
}

lp = ci.MovePositionOutsideChar(lp, -1);
ep = ci.MovePositionOutsideChar(ep, 1);
Expand Down Expand Up @@ -850,7 +862,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
#define CHRSKIP 3 /* [CLO] CHR chr END */
#define CCLSKIP 34 /* [CLO] CCL 32 bytes END */

Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap) {
Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap) {
uint8_t op;

while ((op = *ap++) != END) {
Expand All @@ -872,11 +884,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
ap += BITBLK;
break;
case BOL:
if (lp != bol)
if (lp != lineStartPos)
return NOTFOUND;
break;
case EOL:
if (lp < endp)
if (lp < lineEndPos)
return NOTFOUND;
break;
case BOT:
Expand All @@ -888,11 +900,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
eopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case BOW:
if ((lp != bol && iswordc(ci.CharAt(lp - 1))) || !iswordc(ci.CharAt(lp)))
if ((lp != lineStartPos && iswordc(ci.CharAt(lp - 1))) || !iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case EOW:
if (lp == bol || !iswordc(ci.CharAt(lp - 1)) || iswordc(ci.CharAt(lp)))
if (lp == lineStartPos || !iswordc(ci.CharAt(lp - 1)) || iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case REF: {
Expand Down Expand Up @@ -922,7 +934,7 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
n = ANYSKIP;
break;
case CHR: {
const char c = *(ap + 1);
const char c = ap[1];
if (op == CLO || op == LCLO) {
while ((lp < endp) && (c == ci.CharAt(lp))) {
lp++;
Expand Down
7 changes: 5 additions & 2 deletions scintilla/src/RESearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class RESearch {
Sci::Position bopat[MAXTAG];
Sci::Position eopat[MAXTAG];

// positions to match line start and line end
Sci::Position lineStartPos;
Sci::Position lineEndPos;

private:
static constexpr int MAXNFA = 4096;
// The following constants are not meant to be changeable.
Expand All @@ -42,9 +46,8 @@ class RESearch {
int GetBackslashExpression(const char *pattern, int &incr) noexcept;

const char *DoCompile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix) noexcept;
Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap);
Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap);

Sci::Position bol;
Sci::Position tagstk[MAXTAG]; /* subpat tag stack */
char nfa[MAXNFA]; /* automaton */
int sta;
Expand Down

0 comments on commit 1bfaf31

Please sign in to comment.