5
5
'find'
6
6
]
7
7
8
+ PRIME_NUMBER , MOD = 257 , 1000000007
9
+
8
10
def find (text , query , algorithm ):
9
11
"""
10
12
Finds occurrence of a query string within the text string.
@@ -22,6 +24,7 @@ def find(text, query, algorithm):
22
24
Currently the following algorithms are
23
25
supported,
24
26
'kmp' -> Knuth-Morris-Pratt as given in [1].
27
+ 'rabin_karp' -> Rabin–Karp algorithm as given in [2].
25
28
26
29
Returns
27
30
=======
@@ -52,6 +55,7 @@ def find(text, query, algorithm):
52
55
==========
53
56
54
57
.. [1] https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
58
+ .. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
55
59
"""
56
60
import pydatastructs .strings .algorithms as algorithms
57
61
func = "_" + algorithm
@@ -64,6 +68,8 @@ def find(text, query, algorithm):
64
68
65
69
66
70
def _knuth_morris_pratt (text , query ):
71
+ if len (text ) == 0 or len (query ) == 0 :
72
+ return DynamicOneDimensionalArray (int , 0 )
67
73
kmp_table = _build_kmp_table (query )
68
74
return _do_match (text , query , kmp_table )
69
75
@@ -107,3 +113,38 @@ def _do_match(string, query, kmp_table):
107
113
k = k + 1
108
114
109
115
return positions
116
+
117
+ def _p_pow (length , p = PRIME_NUMBER , m = MOD ):
118
+ p_pow = OneDimensionalArray (int , length )
119
+ p_pow [0 ] = 1
120
+ for i in range (1 , length ):
121
+ p_pow [i ] = (p_pow [i - 1 ] * p ) % m
122
+ return p_pow
123
+
124
+ def _hash_str (string , p = PRIME_NUMBER , m = MOD ):
125
+ hash_value = 0
126
+ p_pow = _p_pow (len (string ), p , m )
127
+ for i in range (len (string )):
128
+ hash_value = (hash_value + ord (string [i ]) * p_pow [i ]) % m
129
+ return hash_value
130
+
131
+ def _rabin_karp (text , query ):
132
+ t = len (text )
133
+ q = len (query )
134
+ positions = DynamicOneDimensionalArray (int , 0 )
135
+ if q == 0 or t == 0 :
136
+ return positions
137
+
138
+ query_hash = _hash_str (query )
139
+ text_hash = OneDimensionalArray (int , t + 1 )
140
+ text_hash .fill (0 )
141
+ p_pow = _p_pow (t )
142
+
143
+ for i in range (t ):
144
+ text_hash [i + 1 ] = (text_hash [i ] + ord (text [i ]) * p_pow [i ]) % MOD
145
+ for i in range (t - q + 1 ):
146
+ curr_hash = (text_hash [i + q ] + MOD - text_hash [i ]) % MOD
147
+ if curr_hash == (query_hash * p_pow [i ]) % MOD :
148
+ positions .append (i )
149
+
150
+ return positions
0 commit comments