Skip to content

Commit

Permalink
regex: fix bug for issue #19789 (#19793)
Browse files Browse the repository at this point in the history
  • Loading branch information
penguindark committed Nov 7, 2023
1 parent 93d5c2d commit 6b2a6b9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
34 changes: 21 additions & 13 deletions vlib/regex/regex.v
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub type FnValidator = fn (u8) bool

struct Token {
mut:
ist rune
ist u32
// char
ch rune // char of the token if any
ch_len u8 // char len
Expand Down Expand Up @@ -553,7 +553,7 @@ fn (re RE) check_char_class(pc int, ch rune) bool {
}

// parse_char_class return (index, str_len, cc_type) of a char class [abcm-p], char class start after the [ char
fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
mut status := CharClass_parse_state.start
mut i := in_i

Expand Down Expand Up @@ -1259,7 +1259,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
if re.prog[pc2].ist == regex.ist_dot_char {
return regex.err_syntax_error, 0
}
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next dot char check is PC: ${pc2}")
re.prog[pc1].dot_check_pc = pc2
Expand All @@ -1276,7 +1276,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_dot_char_pc + 1
mut is_last_dot := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_dot = false
break
}
Expand All @@ -1302,7 +1302,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
bsls_char_count++
mut pc2 := pc1 + 1
for pc2 < pc {
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next bsls check is PC: ${pc2}")
re.prog[pc1].bsls_check_pc = pc2
Expand All @@ -1319,7 +1319,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_bsls_char_pc + 1
mut is_last_bsls := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_bsls = false
break
}
Expand All @@ -1337,12 +1337,12 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
mut cc_char_count := 0
mut last_cc_char_pc := -1
for pc1 < pc {
if re.prog[pc1].ist in [rune(regex.ist_char_class_pos), regex.ist_char_class_neg] {
if re.prog[pc1].ist in [u32(regex.ist_char_class_pos), regex.ist_char_class_neg] {
last_cc_char_pc = pc1
cc_char_count++
mut pc2 := pc1 + 1
for pc2 < pc {
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next CC check is PC: ${pc2}")
re.prog[pc1].cc_check_pc = pc2
Expand All @@ -1359,7 +1359,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_cc_char_pc + 1
mut is_last_cc := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_cc = false
break
}
Expand Down Expand Up @@ -1727,8 +1727,8 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
mut last_fnd_pc := -1

mut state := StateObj{} // actual state
mut ist := rune(0) // actual instruction
mut l_ist := rune(0) // last matched instruction
mut ist := u32(0) // actual instruction
mut l_ist := u32(0) // last matched instruction

mut step_count := 0 // stats for debug
mut dbg_line := 0 // count debug line printed
Expand Down Expand Up @@ -1904,7 +1904,7 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}

if l_ist in [
rune(regex.ist_char_class_neg),
u32(regex.ist_char_class_neg),
regex.ist_char_class_pos,
regex.ist_bsls_char,
regex.ist_dot_char,
Expand Down Expand Up @@ -2273,6 +2273,8 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
continue
}

// println("HERE WE MUST STAY! ${state.i} >= ${in_txt_len}")

state.match_flag = false
mut cc_neg := false

Expand All @@ -2281,6 +2283,12 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}
mut cc_res := re.check_char_class(state.pc, ch)

// manage out of text on char class parse
if state.i >= (in_txt_len - 1) && cc_neg && re.prog[state.pc].last_dot_flag {
m_state = .ist_quant_n
continue
}

if cc_neg {
cc_res = !cc_res
}
Expand Down Expand Up @@ -2606,7 +2614,7 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}

rep := re.prog[state.pc].rep
// println(rep)
// println("ist_quant_p rep: ${rep} rep_min: ${re.prog[state.pc].rep_min}")

// under range
if rep > 0 && rep < re.prog[state.pc].rep_min {
Expand Down
4 changes: 4 additions & 0 deletions vlib/regex/regex_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ match_test_suite = [
TestItem{"ab.c", r"[^\s]*\.",0,3},
TestItem{"ab c", r"[\S]+\s",0,3},
TestItem{"ab c", r"[^\s]+\s",0,3},

// test last charr classes neg class
TestItem{"/a/", r"^/a/[^/]+$", -1,3},
TestItem{"/a/b",r"^/a/[^/]+$", 0,4},
]
)

Expand Down

0 comments on commit 6b2a6b9

Please sign in to comment.