Skip to content

Commit 1a32467

Browse files
authored
regex: added groups in replace strings (#9576)
1 parent 0eb59cf commit 1a32467

File tree

3 files changed

+154
-6
lines changed

3 files changed

+154
-6
lines changed

vlib/regex/README.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,10 +544,36 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
544544
#### Replace functions
545545

546546
```v ignore
547-
// replace return a string where the matches are replaced with the replace string, only non overlapped matches are used
547+
// replace return a string where the matches are replaced with the repl_str string,
548+
// this function support groups in the replace string
548549
pub fn (re mut RE) replace(in_txt string, repl string) string
549550
```
550551

552+
replace string can include groups references:
553+
554+
```v ignore
555+
txt := "Today it is a good day."
556+
query := r'(a\w)[ ,.]'
557+
mut re := regex.regex_opt(query)?
558+
res := re.replace(txt, r"__[\0]__")
559+
```
560+
561+
in this example we used the group `0` in the replace string: `\0`, the result will be:
562+
563+
```
564+
Today it is a good day. => Tod__[ay]__it is a good d__[ay]__
565+
```
566+
567+
**Note:** in the replace strings can be used only groups from `0` to `9`.
568+
569+
If the usage of `groups` in the replace process is not needed it is possible
570+
to use a quick function:
571+
572+
```v ignore
573+
// replace_simple return a string where the matches are replaced with the replace string
574+
pub fn (mut re RE) replace_simple(in_txt string, repl string) string
575+
```
576+
551577
#### Custom replace function
552578

553579
For complex find and replace operations it is available the function `replace_by_fn` .

vlib/regex/regex_test.v

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,41 @@ match_test_suite_replace = [
176176
r"[Tt]o\w+",
177177
"CIAO",
178178
"CIAO is a good day and CIAO will be for sure."
179-
}
179+
},
180+
TestItemRe{
181+
"Today is a good day and tomorrow will be for sure.",
182+
r"(a\w) ",
183+
r"[\0] ",
184+
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
185+
},
186+
TestItemRe{
187+
"Today is a good day and tomorrow will be for sure.",
188+
r"(a\w) ",
189+
r"[\0_\0] ",
190+
"Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure."
191+
},
192+
TestItemRe{
193+
"Today is a good day and tomorrow will be for sure.",
194+
r"(a\w) ",
195+
r"[\0\1] ",
196+
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
197+
},
198+
]
199+
200+
match_test_suite_replace_simple = [
201+
// replace tests
202+
TestItemRe{
203+
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
204+
r"(pi?(ba)+o)",
205+
"CIAO",
206+
"oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
207+
},
208+
TestItemRe{
209+
"Today is a good day and tomorrow will be for sure.",
210+
r"[Tt]o\w+",
211+
"CIAO",
212+
"CIAO is a good day and CIAO will be for sure."
213+
},
180214
]
181215
)
182216

@@ -425,6 +459,25 @@ fn test_regex(){
425459
}
426460
}
427461

462+
// check replace simple
463+
for c,to in match_test_suite_replace_simple{
464+
// debug print
465+
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
466+
467+
mut re := regex.regex_opt(to.q) or {
468+
eprintln('err: $err')
469+
assert false
470+
continue
471+
}
472+
473+
res := re.replace_simple(to.src,to.rep)
474+
if res != to.r {
475+
eprintln("ERROR: replace.")
476+
assert false
477+
continue
478+
}
479+
}
480+
428481
// check match and find
429482
for c,to in match_test_suite {
430483
// debug print

vlib/regex/regex_util.v

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
5454
tmp_index := re.group_map[group_name]-1
5555
start := re.groups[tmp_index * 2]
5656
end := re.groups[tmp_index * 2 + 1]
57-
return in_txt[start..end]
57+
if start >= 0 && end > start {
58+
return in_txt[start..end]
59+
}
5860
}
5961
return ""
6062
}
@@ -65,7 +67,9 @@ pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
6567
index := group_id << 1
6668
start := re.groups[index]
6769
end := re.groups[index + 1]
68-
return in_txt[start..end]
70+
if start >= 0 && end > start {
71+
return in_txt[start..end]
72+
}
6973
}
7074
return ""
7175
}
@@ -307,8 +311,8 @@ pub fn (mut re RE) find_all_str(in_txt string) []string {
307311
* Replacers
308312
*
309313
******************************************************************************/
310-
// replace return a string where the matches are replaced with the replace string
311-
pub fn (mut re RE) replace(in_txt string, repl string) string {
314+
// replace_simple return a string where the matches are replaced with the replace string
315+
pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
312316
pos := re.find_all(in_txt)
313317

314318
if pos.len > 0 {
@@ -331,6 +335,7 @@ pub fn (mut re RE) replace(in_txt string, repl string) string {
331335
return in_txt
332336
}
333337

338+
334339
// type of function used for custom replace
335340
// in_txt source text
336341
// start index of the start of the match in in_txt
@@ -378,3 +383,67 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
378383
}
379384
return res.str()
380385
}
386+
387+
388+
fn (re RE) parsed_replace_string(in_txt string, repl string) string {
389+
str_lst := repl.split("\\")
390+
mut res := str_lst[0]
391+
mut i := 1
392+
for i < str_lst.len {
393+
tmp := str_lst[i]
394+
//println("tmp: ${tmp}")
395+
if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
396+
group_id := int(tmp[0] - `0`)
397+
group := re.get_group_by_id(in_txt, group_id)
398+
//println("group: $group_id [$group]")
399+
res += "${group}${tmp[1..]}"
400+
} else {
401+
res += '\\'+tmp
402+
}
403+
i++
404+
}
405+
return res
406+
}
407+
408+
// replace return a string where the matches are replaced with the repl_str string,
409+
// this function support use groups in the replace string
410+
pub fn (mut re RE) replace(in_txt string, repl_str string) string {
411+
mut i := 0
412+
mut res := strings.new_builder(in_txt.len)
413+
mut last_end := 0
414+
415+
for i < in_txt.len {
416+
//println("Find Start. $i [${in_txt[i..]}]")
417+
s, e := re.find_from(in_txt,i)
418+
//println("Find End.")
419+
if s >= 0 && e > s {
420+
//println("find match in: ${s},${e} [${in_txt[s..e]}]")
421+
422+
if last_end < s {
423+
res.write_string(in_txt[last_end..s])
424+
}
425+
426+
for g_i in 0..re.group_count {
427+
re.groups[g_i << 1 ] += i
428+
re.groups[(g_i << 1) + 1] += i
429+
}
430+
431+
//repl := repl_fn(re, in_txt, s, e)
432+
repl := re.parsed_replace_string(in_txt, repl_str)
433+
//println("repl res: $repl")
434+
res.write_string(repl)
435+
//res.write_string("[[${in_txt[s..e]}]]")
436+
437+
last_end = e
438+
i = e
439+
} else {
440+
break
441+
//i++
442+
}
443+
//println(i)
444+
}
445+
if last_end >= 0 && last_end < in_txt.len {
446+
res.write_string(in_txt[last_end..])
447+
}
448+
return res.str()
449+
}

0 commit comments

Comments
 (0)