Skip to content

Commit 17d82fd

Browse files
committed
KMP string matching
1 parent 9b76d76 commit 17d82fd

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed

src/string_proc.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Palindrome substrings in O(n), Manacher's algorithm
2+
// length of odd palin centred at s[i] is len[2*i]
3+
// even palin btwn s[i],s[i+1]: len[2*i+1]
4+
// TODO: check for underflows
5+
// Alternative version:
6+
// for c in (..)
7+
// while bla
8+
// len[c] += 2;
9+
// if len[c]-r+i == len[c-r+i] { len.push(len[c]-r+i); r += 1; }
10+
fn find_pals(text: &[u8]) -> Vec<usize> {
11+
let mut len = Vec::with_capacity(2*text.len() - 1);
12+
len.push(1); len.push(0);
13+
let mut i = 1;
14+
while i < 2*text.len() - 2 {
15+
let max_len = ::std::cmp::min(i+1, 2*text.len()-1-i);
16+
while len[i] < max_len && text[(i-len[i]-1)/2] == text[(i+len[i]+1)/2] {
17+
len[i] += 2;
18+
}
19+
let mut d = 1;
20+
while len[i-d] < len[i]-d { len[i+d] = len[i-d]; d += 1; }
21+
len[i+d] = len[i]-d;
22+
i += d;
23+
}
24+
len
25+
}
26+
27+
// fail[i] = len of longest proper prefix-suffix of pat[0...i]
28+
fn kmp_init(pat: &[u8]) -> Vec<usize> {
29+
let mut fail = Vec::with_capacity(pat.len());
30+
fail.push(0);
31+
let mut j = 0;
32+
for ch in &pat[1..] {
33+
while j > 0 && pat[j] != *ch { j = fail[j-1]; }
34+
if pat[j] == *ch { j += 1; }
35+
fail.push(j);
36+
}
37+
fail
38+
}
39+
40+
// matches[i] = len of longest prefix of pat matching with suffix of text[0...i]
41+
fn kmp_match(text: &[u8], pat: &[u8]) -> Vec<usize> {
42+
let fail = kmp_init(pat);
43+
let mut matches = Vec::with_capacity(text.len());
44+
let mut j = 0;
45+
for ch in text {
46+
if j == pat.len() { j = fail[j-1]; }
47+
while j > 0 && pat[j] != *ch { j = fail[j-1]; }
48+
if pat[j] == *ch { j += 1; }
49+
matches.push(j);
50+
}
51+
matches
52+
}
53+
54+
#[cfg(test)]
55+
mod test {
56+
use super::*;
57+
58+
#[test]
59+
fn test_string() {
60+
let text = "abcbc".as_bytes();
61+
let pat = "bc".as_bytes();
62+
let matches = kmp_match(text, pat);
63+
let pal_len = find_pals(text);
64+
assert_eq!(matches, vec![0, 1, 2, 1, 2]);
65+
assert_eq!(pal_len, vec![1, 0, 1, 0, 3, 0, 3, 0, 1]);
66+
}
67+
}

0 commit comments

Comments
 (0)