Skip to content

Commit

Permalink
patch 9.0.0951: trying every character position for a match is ineffi…
Browse files Browse the repository at this point in the history
…cient

Problem:    Trying every character position for a match is inefficient.
Solution:   Use the start position of the match ignoring "\zs".
  • Loading branch information
brammool committed Nov 26, 2022
1 parent c96311b commit 01105b3
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/regexp.c
Expand Up @@ -1123,10 +1123,12 @@ static unsigned reg_tofreelen;
typedef struct {
regmatch_T *reg_match;
regmmatch_T *reg_mmatch;

char_u **reg_startp;
char_u **reg_endp;
lpos_T *reg_startpos;
lpos_T *reg_endpos;

win_T *reg_win;
buf_T *reg_buf;
linenr_T reg_firstlnum;
Expand Down
4 changes: 4 additions & 0 deletions src/regexp.h
Expand Up @@ -133,6 +133,8 @@ typedef struct
regprog_T *regprog;
char_u *startp[NSUBEXP];
char_u *endp[NSUBEXP];

colnr_T rm_matchcol; // match start without "\zs"
int rm_ic;
} regmatch_T;

Expand All @@ -149,6 +151,8 @@ typedef struct
regprog_T *regprog;
lpos_T startpos[NSUBEXP];
lpos_T endpos[NSUBEXP];

colnr_T rmm_matchcol; // match start without "\zs"
int rmm_ic;
colnr_T rmm_maxcol; // when not zero: maximum column
} regmmatch_T;
Expand Down
11 changes: 10 additions & 1 deletion src/regexp_bt.c
Expand Up @@ -4842,11 +4842,12 @@ regtry(
static long
bt_regexec_both(
char_u *line,
colnr_T col, // column to start looking for match
colnr_T startcol, // column to start looking for match
int *timed_out) // flag set on timeout or NULL
{
bt_regprog_T *prog;
char_u *s;
colnr_T col = startcol;
long retval = 0L;

// Create "regstack" and "backpos" if they are not allocated yet.
Expand Down Expand Up @@ -5042,11 +5043,19 @@ bt_regexec_both(
if (end->lnum < start->lnum
|| (end->lnum == start->lnum && end->col < start->col))
rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];

// startpos[0] may be set by "\zs", also return the column where
// the whole pattern matched.
rex.reg_mmatch->rmm_matchcol = col;
}
else
{
if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
rex.reg_match->endp[0] = rex.reg_match->startp[0];

// startpos[0] may be set by "\zs", also return the column where
// the whole pattern matched.
rex.reg_match->rm_matchcol = col;
}
}

Expand Down
17 changes: 16 additions & 1 deletion src/regexp_nfa.c
Expand Up @@ -7378,7 +7378,14 @@ nfa_regexec_both(
// If match_text is set it contains the full text that must match.
// Nothing else to try. Doesn't handle combining chars well.
if (prog->match_text != NULL && !rex.reg_icombine)
return find_match_text(col, prog->regstart, prog->match_text);
{
retval = find_match_text(col, prog->regstart, prog->match_text);
if (REG_MULTI)
rex.reg_mmatch->rmm_matchcol = col;
else
rex.reg_match->rm_matchcol = col;
return retval;
}
}

// If the start column is past the maximum column: no need to try.
Expand Down Expand Up @@ -7414,11 +7421,19 @@ nfa_regexec_both(
if (end->lnum < start->lnum
|| (end->lnum == start->lnum && end->col < start->col))
rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];

// startpos[0] may be set by "\zs", also return the column where
// the whole pattern matched.
rex.reg_mmatch->rmm_matchcol = col;
}
else
{
if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
rex.reg_match->endp[0] = rex.reg_match->startp[0];

// startpos[0] may be set by "\zs", also return the column where
// the whole pattern matched.
rex.reg_match->rm_matchcol = col;
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/version.c
Expand Up @@ -695,6 +695,8 @@ static char *(features[]) =

static int included_patches[] =
{ /* Add new patch number below this line */
/**/
951,
/**/
950,
/**/
Expand Down

0 comments on commit 01105b3

Please sign in to comment.