@@ -31,6 +31,7 @@ private class TestClass
31
31
{
32
32
public string A ;
33
33
public string [ ] OutputTokens ;
34
+ public float [ ] Features = null ;
34
35
}
35
36
36
37
[ Fact ]
@@ -41,7 +42,7 @@ public void TextFeaturizerWithPredefinedStopWordRemoverTest()
41
42
var dataView = ML . Data . LoadFromEnumerable ( data ) ;
42
43
43
44
var options = new TextFeaturizingEstimator . Options ( ) { StopWordsRemoverOptions = new StopWordsRemovingEstimator . Options ( ) , OutputTokensColumnName = "OutputTokens" } ;
44
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
45
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
45
46
var model = pipeline . Fit ( dataView ) ;
46
47
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
47
48
var prediction = engine . Predict ( data [ 0 ] ) ;
@@ -51,6 +52,95 @@ public void TextFeaturizerWithPredefinedStopWordRemoverTest()
51
52
Assert . Equal ( "stop words" , string . Join ( " " , prediction . OutputTokens ) ) ;
52
53
}
53
54
55
+ [ Fact ]
56
+ public void TextFeaturizerWithWordFeatureExtractorTest ( )
57
+ {
58
+ var data = new [ ] { new TestClass ( ) { A = "This is some text in english" , OutputTokens = null } ,
59
+ new TestClass ( ) { A = "This is another example" , OutputTokens = null } } ;
60
+ var dataView = ML . Data . LoadFromEnumerable ( data ) ;
61
+
62
+ var options = new TextFeaturizingEstimator . Options ( )
63
+ {
64
+ WordFeatureExtractor = new WordBagEstimator . Options ( ) { NgramLength = 1 } ,
65
+ CharFeatureExtractor = null ,
66
+ Norm = TextFeaturizingEstimator . NormFunction . None ,
67
+ OutputTokensColumnName = "OutputTokens"
68
+ } ;
69
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features" , options , "A" ) ;
70
+ var model = pipeline . Fit ( dataView ) ;
71
+ var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
72
+
73
+ var prediction = engine . Predict ( data [ 0 ] ) ;
74
+ Assert . Equal ( data [ 0 ] . A . ToLower ( ) , string . Join ( " " , prediction . OutputTokens ) ) ;
75
+ var expected = new float [ ] { 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 0.0f , 0.0f } ;
76
+ Assert . Equal ( expected , prediction . Features ) ;
77
+
78
+ prediction = engine . Predict ( data [ 1 ] ) ;
79
+ Assert . Equal ( data [ 1 ] . A . ToLower ( ) , string . Join ( " " , prediction . OutputTokens ) ) ;
80
+ expected = new float [ ] { 1.0f , 1.0f , 0.0f , 0.0f , 0.0f , 0.0f , 1.0f , 1.0f } ;
81
+ Assert . Equal ( expected , prediction . Features ) ;
82
+ }
83
+
84
+ [ Fact ]
85
+ public void TextFeaturizerWithCharFeatureExtractorTest ( )
86
+ {
87
+ var data = new [ ] { new TestClass ( ) { A = "abc efg" , OutputTokens = null } ,
88
+ new TestClass ( ) { A = "xyz" , OutputTokens = null } } ;
89
+ var dataView = ML . Data . LoadFromEnumerable ( data ) ;
90
+
91
+ var options = new TextFeaturizingEstimator . Options ( )
92
+ {
93
+ WordFeatureExtractor = null ,
94
+ CharFeatureExtractor = new WordBagEstimator . Options ( ) { NgramLength = 1 } ,
95
+ Norm = TextFeaturizingEstimator . NormFunction . None ,
96
+ OutputTokensColumnName = "OutputTokens"
97
+ } ;
98
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features" , options , "A" ) ;
99
+ var model = pipeline . Fit ( dataView ) ;
100
+ var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
101
+
102
+ var prediction = engine . Predict ( data [ 0 ] ) ;
103
+ Assert . Equal ( data [ 0 ] . A , string . Join ( " " , prediction . OutputTokens ) ) ;
104
+ var expected = new float [ ] { 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 1.0f , 0.0f , 0.0f , 0.0f } ;
105
+ Assert . Equal ( expected , prediction . Features ) ;
106
+
107
+ prediction = engine . Predict ( data [ 1 ] ) ;
108
+ Assert . Equal ( data [ 1 ] . A , string . Join ( " " , prediction . OutputTokens ) ) ;
109
+ expected = new float [ ] { 1.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 1.0f , 1.0f , 1.0f , 1.0f } ;
110
+ Assert . Equal ( expected , prediction . Features ) ;
111
+ }
112
+
113
+ [ Fact ]
114
+ public void TextFeaturizerWithL2NormTest ( )
115
+ {
116
+ var data = new [ ] { new TestClass ( ) { A = "abc xyz" , OutputTokens = null } ,
117
+ new TestClass ( ) { A = "xyz" , OutputTokens = null } } ;
118
+ var dataView = ML . Data . LoadFromEnumerable ( data ) ;
119
+
120
+ var options = new TextFeaturizingEstimator . Options ( )
121
+ {
122
+ CharFeatureExtractor = new WordBagEstimator . Options ( ) { NgramLength = 1 } ,
123
+ Norm = TextFeaturizingEstimator . NormFunction . L2 ,
124
+ OutputTokensColumnName = "OutputTokens"
125
+ } ;
126
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features" , options , "A" ) ;
127
+ var model = pipeline . Fit ( dataView ) ;
128
+ var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
129
+
130
+ var prediction = engine . Predict ( data [ 0 ] ) ;
131
+ Assert . Equal ( data [ 0 ] . A , string . Join ( " " , prediction . OutputTokens ) ) ;
132
+ var exp1 = 0.333333343f ;
133
+ var exp2 = 0.707106769f ;
134
+ var expected = new float [ ] { exp1 , exp1 , exp1 , exp1 , exp1 , exp1 , exp1 , exp1 , exp1 , exp2 , exp2 } ;
135
+ Assert . Equal ( expected , prediction . Features ) ;
136
+
137
+ prediction = engine . Predict ( data [ 1 ] ) ;
138
+ exp1 = 0.4472136f ;
139
+ Assert . Equal ( data [ 1 ] . A , string . Join ( " " , prediction . OutputTokens ) ) ;
140
+ expected = new float [ ] { exp1 , 0.0f , 0.0f , 0.0f , 0.0f , exp1 , exp1 , exp1 , exp1 , 0.0f , 1.0f } ;
141
+ Assert . Equal ( expected , prediction . Features ) ;
142
+ }
143
+
54
144
[ Fact ]
55
145
public void TextFeaturizerWithCustomStopWordRemoverTest ( )
56
146
{
@@ -67,7 +157,7 @@ public void TextFeaturizerWithCustomStopWordRemoverTest()
67
157
OutputTokensColumnName = "OutputTokens" ,
68
158
CaseMode = TextNormalizingEstimator . CaseMode . None
69
159
} ;
70
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
160
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
71
161
var model = pipeline . Fit ( dataView ) ;
72
162
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
73
163
var prediction = engine . Predict ( data [ 0 ] ) ;
@@ -84,7 +174,7 @@ private void TestCaseMode(IDataView dataView, TestClass[] data, TextNormalizingE
84
174
CaseMode = caseMode ,
85
175
OutputTokensColumnName = "OutputTokens"
86
176
} ;
87
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
177
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
88
178
var model = pipeline . Fit ( dataView ) ;
89
179
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
90
180
var prediction1 = engine . Predict ( data [ 0 ] ) ;
@@ -133,7 +223,7 @@ private void TestKeepNumbers(IDataView dataView, TestClass[] data, bool keepNumb
133
223
CaseMode = TextNormalizingEstimator . CaseMode . None ,
134
224
OutputTokensColumnName = "OutputTokens"
135
225
} ;
136
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
226
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
137
227
var model = pipeline . Fit ( dataView ) ;
138
228
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
139
229
var prediction1 = engine . Predict ( data [ 0 ] ) ;
@@ -170,7 +260,7 @@ private void TestKeepPunctuations(IDataView dataView, TestClass[] data, bool kee
170
260
CaseMode = TextNormalizingEstimator . CaseMode . None ,
171
261
OutputTokensColumnName = "OutputTokens"
172
262
} ;
173
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
263
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
174
264
var model = pipeline . Fit ( dataView ) ;
175
265
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
176
266
var prediction1 = engine . Predict ( data [ 0 ] ) ;
@@ -208,7 +298,7 @@ private void TestKeepDiacritics(IDataView dataView, TestClass[] data, bool keepD
208
298
CaseMode = TextNormalizingEstimator . CaseMode . None ,
209
299
OutputTokensColumnName = "OutputTokens"
210
300
} ;
211
- var pipeline = ML . Transforms . Text . FeaturizeText ( "OutputText " , options , "A" ) ;
301
+ var pipeline = ML . Transforms . Text . FeaturizeText ( "Features " , options , "A" ) ;
212
302
var model = pipeline . Fit ( dataView ) ;
213
303
var engine = model . CreatePredictionEngine < TestClass , TestClass > ( ML ) ;
214
304
var prediction1 = engine . Predict ( data [ 0 ] ) ;
0 commit comments