diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index a3fe39557d..bd235abbd4 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -2651,10 +2651,30 @@ class DocumentIndexer : public CharacterIndexer { char CharAt(Sci::Position index) const noexcept override { if (index < 0 || index >= end) - return 0; + return '\0'; else return pdoc->CharAt(index); } + + bool IsWordStartAt(Sci::Position pos) const noexcept override { + return pdoc->IsWordStartAt(pos); + } + + bool IsWordEndAt(Sci::Position pos) const noexcept override { + return pdoc->IsWordEndAt(pos); + } + + Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override { + return pdoc->MovePositionOutsideChar(pos, moveDir, true); + } + + Sci::Position NextPosition(Sci::Position pos, int moveDir) const noexcept override { + return pdoc->NextPosition(pos, moveDir); + } + + Sci::Position ExtendWordSelect(Sci::Position pos, int delta) const noexcept override { + return pdoc->ExtendWordSelect(pos, delta, true); + } }; #ifndef NO_CXX11_REGEX diff --git a/scintilla/src/RESearch.cxx b/scintilla/src/RESearch.cxx index a65c8992df..22c32db856 100644 --- a/scintilla/src/RESearch.cxx +++ b/scintilla/src/RESearch.cxx @@ -233,6 +233,12 @@ using namespace Scintilla; #define CLQ 12 /* 0 to 1 closure */ #define LCLO 13 /* lazy closure */ +// experimental +#define EXP_MATCH_WORD_START 14 +#define EXP_MATCH_WORD_END 15 +#define EXP_MATCH_TO_WORD_END 16 +#define EXP_MATCH_TO_WORD_END_OPT 17 + #define END 0 /* @@ -642,6 +648,11 @@ const char *RESearch::DoCompile(const char *pattern, Sci::Position length, bool break; } + if (*p == '?' && *lp == EXP_MATCH_TO_WORD_END) { + *lp = EXP_MATCH_TO_WORD_END_OPT; + break; + } + if (*p == '+') { for (sp = mp; lp < sp; lp++) { *mp++ = *lp; @@ -672,6 +683,17 @@ const char *RESearch::DoCompile(const char *pattern, Sci::Position length, bool return badpat("Null pattern inside \\<\\>"); *mp++ = EOW; break; + case 'h': + *mp++ = EXP_MATCH_WORD_START; + break; + case 'H': + if (*sp == EXP_MATCH_WORD_START) + return badpat("Null pattern inside \\h\\H"); + *mp++ = EXP_MATCH_WORD_END; + break; + case 'i': + *mp++ = EXP_MATCH_TO_WORD_END; + break; case '1': case '2': case '3': @@ -828,10 +850,11 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio // fall through default: /* regular matching all the way. */ while (lp < endp) { - ep = PMatch(ci, lp, endp, ap); + Sci::Position offset = 1; + ep = PMatch(ci, lp, endp, ap, 1, &offset); if (ep != NOTFOUND) break; - lp++; + lp += offset; } break; case END: /* munged automaton. fail always */ @@ -888,7 +911,7 @@ static inline int isinset(const char *ap, unsigned char c) noexcept { #define CHRSKIP 3 /* [CLO] CHR chr END */ #define CCLSKIP 34 /* [CLO] CCL 32 bytes END */ -Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap) { +Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap, int moveDir, Sci::Position *offset) { int op; int c; int n; @@ -938,6 +961,36 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci if (lp == bol || !iswordc(ci.CharAt(lp - 1)) || iswordc(ci.CharAt(lp))) return NOTFOUND; break; + case EXP_MATCH_WORD_START: + if (!ci.IsWordStartAt(lp)) { + if (offset) { + e = ci.MovePositionOutsideChar(lp, moveDir); + *offset = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp; + } + return NOTFOUND; + } + break; + case EXP_MATCH_WORD_END: + if (lp == bol || !ci.IsWordEndAt(lp)) { + if (offset) { + e = ci.MovePositionOutsideChar(lp, moveDir); + *offset = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp; + } + return NOTFOUND; + } + break; + case EXP_MATCH_TO_WORD_END: + case EXP_MATCH_TO_WORD_END_OPT: { + e = ci.ExtendWordSelect(lp, moveDir); + const bool find = ci.IsWordEndAt(e); + if (offset) { + *offset = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp; + } + if ((e == lp && op != EXP_MATCH_TO_WORD_END_OPT) || !find) { + return NOTFOUND; + } + lp = e; + } break; case REF: n = *ap++; bp = bopat[n]; @@ -992,13 +1045,14 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci e = NOTFOUND; while (llp >= are) { Sci::Position q; - if ((q = PMatch(ci, llp, endp, ap)) != NOTFOUND) { + Sci::Position qoff = -1; + if ((q = PMatch(ci, llp, endp, ap, -1, &qoff)) != NOTFOUND) { e = q; lp = llp; if (op != LCLO) return e; } if (*ap == END) return e; - --llp; + llp += qoff; } if (*ap == EOT) PMatch(ci, lp, endp, ap); diff --git a/scintilla/src/RESearch.h b/scintilla/src/RESearch.h index beb8e395fd..f32a05772c 100644 --- a/scintilla/src/RESearch.h +++ b/scintilla/src/RESearch.h @@ -22,6 +22,11 @@ namespace Scintilla { class CharacterIndexer { public: virtual char CharAt(Sci::Position index) const noexcept = 0; + virtual bool IsWordStartAt(Sci::Position pos) const noexcept = 0; + virtual bool IsWordEndAt(Sci::Position pos) const noexcept = 0; + virtual Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept = 0; + virtual Sci::Position NextPosition(Sci::Position pos, int moveDir) const noexcept = 0; + virtual Sci::Position ExtendWordSelect(Sci::Position pos, int delta) const noexcept = 0; virtual ~CharacterIndexer() = default; }; @@ -57,7 +62,7 @@ class RESearch { int GetBackslashExpression(const char *pattern, int &incr) noexcept; const char *DoCompile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix) noexcept; - Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap); + Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap, int moveDir = 1, Sci::Position *offset = nullptr); Sci::Position bol; Sci::Position tagstk[MAXTAG]; /* subpat tag stack */ diff --git a/src/Notepad2.rc b/src/Notepad2.rc index db9e9ca61d..069fba457c 100644 --- a/src/Notepad2.rc +++ b/src/Notepad2.rc @@ -1738,7 +1738,7 @@ END STRINGTABLE BEGIN IDS_BACKSLASHHELP "Backslash Transformations\n\n\\a\tAlert (BEL, ASCII 7)\n\\b\tBackspace (BS, ASCII 8)\n\\e\tEscape (ESC, ASCII 27 / 1B)\n\\f\tFormfeed (FF, ASCII 12 / 0C)\n\\n\tNewline (LF, ASCII 10 / 0A)\n\\r\tCarriage return (CR, ASCII 13 / 0D)\n\\t\tHorizontal Tab (HT, ASCII 9)\n\\v\tVertical Tab (VT, ASCII 11 / 0B)\n\\ooo\tOctal Value\n\\u####\tHexadecimal Value\n\\xhh\tHexadecimal Value\n\\\\\tBackslash" - IDS_REGEXPHELP "RegExp Syntax (Single Lines Only)\n\n.\tAny character\n^\tStart of a line\n$\tEnd of a line\n\\<\tStart of a word\n\\>\tEnd of a word\n[...]\tA set of chars ([abc]) or a range ([a-z])\n[^...]\tChars NOT in the set or range\n\\d\tAny decimal digit\n\\D\tAny non-digit char\n\\s\tAny whitespace char\n\\S\tNot a whitespace char\n\\w\tAny ""word"" char\n\\W\tAny ""non-word"" char\n\\x\tEscape character with otherwise special meaning\n\\xHH\tChar with hex code HH\n?\tMatches preceding 0 or 1 times\n*\tMatches preceding 0 or more times\n+\tMatches preceding 1 or more times\n*? or +?\tNon greedy matching of quantifiers ""?"" and ""+""\n(\tStart of a region\n)\tEnd of a region\n\\n\tRefers to a region when replacing (n is 1-9)\n" + IDS_REGEXPHELP "RegExp Syntax (Single Lines Only)\n\n.\tAny character\n^\tStart of a line\n$\tEnd of a line\n\\<\tStart of a word\n\\>\tEnd of a word\n[...]\tA set of chars ([abc]) or a range ([a-z])\n[^...]\tChars NOT in the set or range\n\\d\tAny decimal digit\n\\D\tAny non-digit char\n\\s\tAny whitespace char\n\\S\tAny non-whitespace char\n\\w\tAny ""word"" char\n\\W\tAny ""non-word"" char\n\\x\tEscape character with otherwise special meaning\n\\xHH\tChar with hex code HH\n?\tMatches preceding 0 or 1 times\n*\tMatches preceding 0 or more times\n+\tMatches preceding 1 or more times\n*? or +?\tNon greedy matching of quantifiers ""?"" and ""+""\n(\tStart of a region\n)\tEnd of a region\n\\n\tRefers to a region when replacing (n is 1-9)\n\nExperimental Syntax:\n\h\tStart of a word, according to IsWordStartAt()\n\H\tEnd of a word, according to IsWordEndAt()\n\i\tMatches 1 or more characters to end of a word\n\i?\tMatches 0 or more characters to end of a word" IDS_WILDCARDHELP "Wildcard Search\n\n*\tMatches zero or more characters.\n?\tMatches exactly one character. " END