16
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
17
# See the License for the specific language governing permissions and
18
18
# limitations under the License.
19
+ import Base. norm
19
20
export ngrams, count, tfnorm, sparse_count, norm, znorm, ngram_iterator, ngrams!
20
21
21
22
immutable NgramStringIterator
22
- string :: String
23
+ string :: AbstractString
23
24
order :: Int32
24
25
truncated_start :: Bool
25
26
end
26
- type StringPosition
27
+ type AbstractStringPosition
27
28
start :: Int32
28
29
fin :: Int32
29
30
nth :: Int32
@@ -35,9 +36,9 @@ function start(ngi :: NgramStringIterator)
35
36
for i = 1 : (ngi. order- 1 ) # necessary because strings are indexed to bytes, not characters
36
37
idx = nextind (ngi. string, idx)
37
38
end
38
- return StringPosition (1 , idx, ngi. order)
39
+ return AbstractStringPosition (1 , idx, ngi. order)
39
40
else
40
- return StringPosition (1 , 1 , 1 )
41
+ return AbstractStringPosition (1 , 1 , 1 )
41
42
end
42
43
end
43
44
61
62
# -------------------------------------------------------------------------------------------------------------------------
62
63
# feature extractors
63
64
# -------------------------------------------------------------------------------------------------------------------------
64
- make_string (words :: String , b, e) = SubString (words, b, e)
65
+ make_string (words :: AbstractString , b, e) = SubString (words, b, e)
65
66
make_string (words :: Array , b, e) = join (words[b: e], " " )
66
67
67
68
function ngrams (words:: Array ; order = 2 , truncated_start = false )
68
- ret = String []
69
+ ret = AbstractString []
69
70
70
71
if ! truncated_start
71
72
for o = 1 : min (order - 1 , length (words))
@@ -81,19 +82,19 @@ function ngrams(words::Array; order = 2, truncated_start = false)
81
82
return ret
82
83
end
83
84
84
- function ngrams (words:: String ; order = 2 , truncated_start = false )
85
- ret = String []
85
+ function ngrams (words :: AbstractString ; order = 2 , truncated_start = false )
86
+ ret = AbstractString []
86
87
return ngrams! (ret, words, order = order, truncated_start = truncated_start)
87
88
end
88
89
89
- function ngrams! (ret :: Array , words :: String ; order = 2 , truncated_start = false )
90
+ function ngrams! (ret :: Array , words :: AbstractString ; order = 2 , truncated_start = false )
90
91
for x in ngram_iterator (words, order = order, truncated_start = truncated_start)
91
92
push! (ret, x)
92
93
end
93
94
return ret
94
95
end
95
96
96
- ngram_iterator (words :: String ; order = 2 , truncated_start = false ) = NgramStringIterator (words, order, truncated_start)
97
+ ngram_iterator (words :: AbstractString ; order = 2 , truncated_start = false ) = NgramStringIterator (words, order, truncated_start)
97
98
98
99
# -------------------------------------------------------------------------------------------------------------------------
99
100
# feature vector operations
@@ -107,7 +108,7 @@ function sparse_count(text, bkg)
107
108
end
108
109
109
110
function dict_count (tokens)
110
- map = DefaultDict {String ,Int32} ()
111
+ map = DefaultDict {AbstractString ,Int32} ()
111
112
for w in tokens
112
113
map[w] += 1
113
114
end
0 commit comments