Skip to content

Commit 9c6ed37

Browse files
authored
Added Rabin–Karp algorithm (#413)
1 parent 5cf53a2 commit 9c6ed37

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

pydatastructs/strings/algorithms.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
'find'
66
]
77

8+
PRIME_NUMBER, MOD = 257, 1000000007
9+
810
def find(text, query, algorithm):
911
"""
1012
Finds occurrence of a query string within the text string.
@@ -22,6 +24,7 @@ def find(text, query, algorithm):
2224
Currently the following algorithms are
2325
supported,
2426
'kmp' -> Knuth-Morris-Pratt as given in [1].
27+
'rabin_karp' -> Rabin–Karp algorithm as given in [2].
2528
2629
Returns
2730
=======
@@ -52,6 +55,7 @@ def find(text, query, algorithm):
5255
==========
5356
5457
.. [1] https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
58+
.. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
5559
"""
5660
import pydatastructs.strings.algorithms as algorithms
5761
func = "_" + algorithm
@@ -64,6 +68,8 @@ def find(text, query, algorithm):
6468

6569

6670
def _knuth_morris_pratt(text, query):
71+
if len(text) == 0 or len(query) == 0:
72+
return DynamicOneDimensionalArray(int, 0)
6773
kmp_table = _build_kmp_table(query)
6874
return _do_match(text, query, kmp_table)
6975

@@ -107,3 +113,38 @@ def _do_match(string, query, kmp_table):
107113
k = k + 1
108114

109115
return positions
116+
117+
def _p_pow(length, p=PRIME_NUMBER, m=MOD):
118+
p_pow = OneDimensionalArray(int, length)
119+
p_pow[0] = 1
120+
for i in range(1, length):
121+
p_pow[i] = (p_pow[i-1] * p) % m
122+
return p_pow
123+
124+
def _hash_str(string, p=PRIME_NUMBER, m=MOD):
125+
hash_value = 0
126+
p_pow = _p_pow(len(string), p, m)
127+
for i in range(len(string)):
128+
hash_value = (hash_value + ord(string[i]) * p_pow[i]) % m
129+
return hash_value
130+
131+
def _rabin_karp(text, query):
132+
t = len(text)
133+
q = len(query)
134+
positions = DynamicOneDimensionalArray(int, 0)
135+
if q == 0 or t == 0:
136+
return positions
137+
138+
query_hash = _hash_str(query)
139+
text_hash = OneDimensionalArray(int, t + 1)
140+
text_hash.fill(0)
141+
p_pow = _p_pow(t)
142+
143+
for i in range(t):
144+
text_hash[i+1] = (text_hash[i] + ord(text[i]) * p_pow[i]) % MOD
145+
for i in range(t - q + 1):
146+
curr_hash = (text_hash[i + q] + MOD - text_hash[i]) % MOD
147+
if curr_hash == (query_hash * p_pow[i]) % MOD:
148+
positions.append(i)
149+
150+
return positions

pydatastructs/strings/tests/test_algorithms.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
def test_kmp():
66
_test_common_string_matching('kmp')
77

8+
def test_rka():
9+
_test_common_string_matching('rabin_karp')
810

911
def _test_common_string_matching(algorithm):
1012
true_text_pattern_dictionary = {
@@ -26,7 +28,9 @@ def _test_common_string_matching(algorithm):
2628
"Knuth-Morris-Pratt": "-Pratt-",
2729
"abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
2830
"aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
29-
"fullstringmatch": "fullstrinmatch"
31+
"fullstringmatch": "fullstrinmatch",
32+
"abc": "",
33+
"": "abc"
3034
}
3135

3236
for test_case_key in false_text_pattern_dictionary:
@@ -52,13 +56,13 @@ def gen_random_string(length):
5256
if rand_str != query:
5357
freq += 1
5458
text += query + rand_str + query
55-
positions = find(text, query, algorithm="kmp")
59+
positions = find(text, query, algorithm)
5660
assert positions._num == num_times * 2
5761
for i in range(positions._last_pos_filled):
5862
p = positions[i]
5963
assert text[p:p + len(query)] == query
6064

6165
text = gen_random_string(len(query))
6266
if text != query:
63-
positions = find(text, query, algorithm="kmp")
67+
positions = find(text, query, algorithm)
6468
assert positions.size == 0

0 commit comments

Comments
 (0)