Skip to content

Commit ca3de65

Browse files
added knuth-morris-pratt pattern in a string search module.
1 parent cf8a21b commit ca3de65

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

kmp_str_search.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""Author Anurag Kumar(mailto:anuragkumarak95@gmail.com)
2+
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of te$
3+
with complexity O(n + m)
4+
1) Preprocess pattern to identify any suffixes that are identical to prefix$
5+
This tells us where to continue from if we get a mismatch between a cha$
6+
and the text.
7+
2) Step through the text one character at a time and compare it to a charac$
8+
updating our location within the pattern if necessary
9+
"""
10+
def kmp(pattern, text, len_p=None, len_t=None):
11+
# 1) Construct the failure array
12+
failure = [0]
13+
i = 0
14+
for index, char in enumerate(pattern[1:]):
15+
if pattern[i] == char:
16+
i += 1
17+
else:
18+
i = 0
19+
failure.append(i)
20+
21+
# 2) Step through text searching for pattern
22+
i, j = 0, 0 # index into text, pattern
23+
while i < len(text):
24+
if pattern[j] == text[i]:
25+
if j == (len(pattern) - 1):
26+
return True
27+
i += 1
28+
j += 1
29+
30+
# if this is a prefix in our pattern
31+
# just go back far enough to continue
32+
elif failure[j] > 0:
33+
j = failure[j] - 1
34+
else:
35+
i += 1
36+
return False
37+
38+
39+
if __name__ == '__main__':
40+
41+
# Test 1)
42+
pattern = "abc1abc12"
43+
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
44+
text2 = "alskfjaldsk23adsfabcabc"
45+
assert kmp(pattern, text1) and not kmp(pattern, text2)
46+
47+
# Test 2)
48+
pattern = "ABABX"
49+
text = "ABABZABABYABABX"
50+
assert kmp(pattern, text)

0 commit comments

Comments
 (0)