From 80444c8ec4ffd280bcb3d181bf43d67ba5a9d583 Mon Sep 17 00:00:00 2001 From: Larpon Date: Tue, 15 Feb 2022 14:12:15 +0100 Subject: [PATCH] strings: add find_between_pair (#13468) --- vlib/strings/strings.v | 115 ++++++++++++++++++++++++++++++++++++ vlib/strings/strings_test.v | 82 +++++++++++++++++++++++++ 2 files changed, 197 insertions(+) diff --git a/vlib/strings/strings.v b/vlib/strings/strings.v index c01dd90fc71dfc..172c3c3dc2b65f 100644 --- a/vlib/strings/strings.v +++ b/vlib/strings/strings.v @@ -11,3 +11,118 @@ pub fn random(n int) string { return tos(buf) } */ + +// find_between_pair_byte returns the string found between the pair of marks defined +// by `start` and `end`. +// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the +// `string` type, this function can extract content between *nested* marks in `input`. +// If `start` and `end` marks are nested in `input`, the characters +// between the *outermost* mark pair is returned. It is expected that `start` and `end` +// marks are *balanced*, meaning that the amount of `start` marks equal the +// amount of `end` marks in the `input`. An empty string is returned otherwise. +// Using two identical marks as `start` and `end` results in undefined output behavior. +// find_between_pair_byte is the fastest in the find_between_pair_* family of functions. +// Example: assert strings.find_between_pair_byte('(V) (NOT V)',`(`,`)`) == 'V' +// Example: assert strings.find_between_pair_byte('s {X{Y}} s',`{`,`}`) == 'X{Y}' +pub fn find_between_pair_byte(input string, start byte, end byte) string { + mut marks := 0 + mut start_index := -1 + for i, b in input { + if b == start { + if start_index == -1 { + start_index = i + 1 + } + marks++ + continue + } + if start_index > 0 { + if b == end { + marks-- + if marks == 0 { + return input[start_index..i] + } + } + } + } + return '' +} + +// find_between_pair_rune returns the string found between the pair of marks defined +// by `start` and `end`. +// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the +// `string` type, this function can extract content between *nested* marks in `input`. +// If `start` and `end` marks are nested in `input`, the characters +// between the *outermost* mark pair is returned. It is expected that `start` and `end` +// marks are *balanced*, meaning that the amount of `start` marks equal the +// amount of `end` marks in the `input`. An empty string is returned otherwise. +// Using two identical marks as `start` and `end` results in undefined output behavior. +// find_between_pair_rune is inbetween the fastest and slowest in the find_between_pair_* family of functions. +// Example: assert strings.find_between_pair_rune('(V) (NOT V)',`(`,`)`) == 'V' +// Example: assert strings.find_between_pair_rune('s {X{Y}} s',`{`,`}`) == 'X{Y}' +pub fn find_between_pair_rune(input string, start rune, end rune) string { + mut marks := 0 + mut start_index := -1 + runes := input.runes() + for i, r in runes { + if r == start { + if start_index == -1 { + start_index = i + 1 + } + marks++ + continue + } + if start_index > 0 { + if r == end { + marks-- + if marks == 0 { + return runes[start_index..i].string() + } + } + } + } + return '' +} + +// find_between_pair_string returns the string found between the pair of marks defined +// by `start` and `end`. +// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the +// `string` type, this function can extract content between *nested* marks in `input`. +// If `start` and `end` marks are nested in `input`, the characters +// between the *outermost* mark pair is returned. It is expected that `start` and `end` +// marks are *balanced*, meaning that the amount of `start` marks equal the +// amount of `end` marks in the `input`. An empty string is returned otherwise. +// Using two identical marks as `start` and `end` results in undefined output behavior. +// find_between_pair_string is the slowest in the find_between_pair_* function family. +// Example: assert strings.find_between_pair_string('/*V*/ /*NOT V*/','/*','*/') == 'V' +// Example: assert strings.find_between_pair_string('s {{X{{Y}}}} s','{{','}}') == 'X{{Y}}' +pub fn find_between_pair_string(input string, start string, end string) string { + mut start_index := -1 + mut marks := 0 + start_runes := start.runes() + end_runes := end.runes() + runes := input.runes() + mut i := 0 + for ; i < runes.len; i++ { + start_slice := runes#[i..i + start_runes.len] + if start_slice == start_runes { + i = i + start_runes.len - 1 + if start_index < 0 { + start_index = i + 1 + } + marks++ + continue + } + if start_index > 0 { + end_slice := runes#[i..i + end_runes.len] + if end_slice == end_runes { + marks-- + if marks == 0 { + return runes[start_index..i].string() + } + i = i + end_runes.len - 1 + continue + } + } + } + return '' +} diff --git a/vlib/strings/strings_test.v b/vlib/strings/strings_test.v index ff5ddf504a833f..eac5bf05b8ebdc 100644 --- a/vlib/strings/strings_test.v +++ b/vlib/strings/strings_test.v @@ -12,3 +12,85 @@ fn test_repeat_string() { assert strings.repeat_string('abc', 0) == '' assert strings.repeat_string('', 200) == '' } + +const test_rune_and_byte = [ + 'xxx[ok1]xxx', + 'xxx[[ok2]okok]', + 'xxx[ok3[[[ok]okok]]]', + 'yyy[ok4]', + '[]', + ']', + '[', + 'yyy[ok5][]zzz', + 'yyy[xxx', + 'xxx[xxx + xxx]', +] + +const test_strings = [ + 'xxx/*ok1*/xxx', + 'xxx/*/*ok2*/okok*/', + 'xxx/*ok3/*/*/*ok*/okok*/*/*/', + 'yyy/*ok4*/', + '/**/', + '*/', + '/*', + 'yyy/*ok5*//**/zzz', + 'yyy/*xxx', + 'xxx/*xxx + xxx*/xxx', +] + +const expected_rune_and_byte_outputs = [ + 'ok1', + '[ok2]okok', + 'ok3[[[ok]okok]]', + 'ok4', + '', + '', + '', + 'ok5', + '', + 'xxx + xxx', +] + +const expected_string_outputs = [ + 'ok1', + '/*ok2*/okok', + 'ok3/*/*/*ok*/okok*/*/', + 'ok4', + '', + '', + '', + 'ok5', + '', + 'xxx + xxx', +] + +fn test_find_between_pair_family() { + assert strings.find_between_pair_rune('xx♡ok❦yy', `♡`, `❦`) == 'ok' + assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok' + assert strings.find_between_pair_string('xx/*ok*/yy', '/*', '*/') == 'ok' + assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok' + assert strings.find_between_pair_string('xxxxokyyyy', 'xxx', 'yyy') == 'xok' + + for i, tstr in test_rune_and_byte { + e1 := strings.find_between_pair_rune(tstr, `[`, `]`) + e2 := expected_rune_and_byte_outputs[i] + assert '$e1' == '$e2' + } + + for i, tstr in test_rune_and_byte { + e1 := strings.find_between_pair_byte(tstr, `[`, `]`) + e2 := expected_rune_and_byte_outputs[i] + assert '$e1' == '$e2' + } + + for i, tstr in test_strings { + e1 := strings.find_between_pair_string(tstr, '/*', '*/') + e2 := expected_string_outputs[i] + assert '$e1' == '$e2' + } +}