2
2
// The .NET Foundation licenses this file to you under the MIT license.
3
3
// See the LICENSE file in the project root for more information.
4
4
5
- using Float = System . Single ;
6
-
7
5
using System ;
8
6
using Microsoft . ML . Runtime ;
9
7
using Microsoft . ML . Runtime . CommandLine ;
@@ -49,7 +47,7 @@ public sealed class Arguments : OnlineLinearArguments
49
47
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Regularizer constant" , ShortName = "lambda" , SortOrder = 50 ) ]
50
48
[ TGUI ( SuggestedSweeps = "0.00001-0.1;log;inc:10" ) ]
51
49
[ TlcModule . SweepableFloatParamAttribute ( "Lambda" , 0.00001f , 0.1f , 10 , isLogScale : true ) ]
52
- public Float Lambda = ( Float ) 0.001 ;
50
+ public float Lambda = ( float ) 0.001 ;
53
51
54
52
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Batch size" , ShortName = "batch" , SortOrder = 190 ) ]
55
53
[ TGUI ( Label = "Batch Size" ) ]
@@ -79,9 +77,9 @@ public sealed class Arguments : OnlineLinearArguments
79
77
// weightsUpdate/weightsUpdateScale/biasUpdate are similar to weights/weightsScale/bias, in that
80
78
// all elements of weightsUpdate are considered to be multiplied by weightsUpdateScale, and the
81
79
// bias update term is not considered to be multiplied by the scale.
82
- private VBuffer < Float > _weightsUpdate ;
83
- private Float _weightsUpdateScale ;
84
- private Float _biasUpdate ;
80
+ private VBuffer < float > _weightsUpdate ;
81
+ private float _weightsUpdateScale ;
82
+ private float _biasUpdate ;
85
83
86
84
protected override bool NeedCalibration => true ;
87
85
@@ -114,7 +112,7 @@ protected override void CheckLabel(RoleMappedData data)
114
112
/// <summary>
115
113
/// Return the raw margin from the decision hyperplane
116
114
/// </summary>
117
- protected override Float Margin ( ref VBuffer < Float > feat )
115
+ protected override float Margin ( ref VBuffer < float > feat )
118
116
{
119
117
return Bias + VectorUtils . DotProduct ( ref feat , ref Weights ) * WeightsScale ;
120
118
}
@@ -134,7 +132,7 @@ protected override void InitCore(IChannel ch, int numFeatures, LinearPredictor p
134
132
if ( predictor == null )
135
133
VBufferUtils . Densify ( ref Weights ) ;
136
134
137
- _weightsUpdate = VBufferUtils . CreateEmpty < Float > ( numFeatures ) ;
135
+ _weightsUpdate = VBufferUtils . CreateEmpty < float > ( numFeatures ) ;
138
136
}
139
137
140
138
protected override void BeginIteration ( IChannel ch )
@@ -148,10 +146,10 @@ private void BeginBatch()
148
146
_batch ++ ;
149
147
_numBatchExamples = 0 ;
150
148
_biasUpdate = 0 ;
151
- _weightsUpdate = new VBuffer < Float > ( _weightsUpdate . Length , 0 , _weightsUpdate . Values , _weightsUpdate . Indices ) ;
149
+ _weightsUpdate = new VBuffer < float > ( _weightsUpdate . Length , 0 , _weightsUpdate . Values , _weightsUpdate . Indices ) ;
152
150
}
153
151
154
- private void FinishBatch ( ref VBuffer < Float > weightsUpdate , Float weightsUpdateScale )
152
+ private void FinishBatch ( ref VBuffer < float > weightsUpdate , float weightsUpdateScale )
155
153
{
156
154
if ( _numBatchExamples > 0 )
157
155
UpdateWeights ( ref weightsUpdate , weightsUpdateScale ) ;
@@ -161,19 +159,19 @@ private void FinishBatch(ref VBuffer<Float> weightsUpdate, Float weightsUpdateSc
161
159
/// <summary>
162
160
/// Observe an example and update weights if necessary
163
161
/// </summary>
164
- protected override void ProcessDataInstance ( IChannel ch , ref VBuffer < Float > feat , Float label , Float weight )
162
+ protected override void ProcessDataInstance ( IChannel ch , ref VBuffer < float > feat , float label , float weight )
165
163
{
166
164
base . ProcessDataInstance ( ch , ref feat , label , weight ) ;
167
165
168
166
// compute the update and update if needed
169
- Float output = Margin ( ref feat ) ;
170
- Float trueOutput = ( label > 0 ? 1 : - 1 ) ;
171
- Float loss = output * trueOutput - 1 ;
167
+ float output = Margin ( ref feat ) ;
168
+ float trueOutput = ( label > 0 ? 1 : - 1 ) ;
169
+ float loss = output * trueOutput - 1 ;
172
170
173
171
// Accumulate the update if there is a loss and we have larger batches.
174
172
if ( Args . BatchSize > 1 && loss < 0 )
175
173
{
176
- Float currentBiasUpdate = trueOutput * weight ;
174
+ float currentBiasUpdate = trueOutput * weight ;
177
175
_biasUpdate += currentBiasUpdate ;
178
176
// Only aggregate in the case where we're handling multiple instances.
179
177
if ( _weightsUpdate . Count == 0 )
@@ -192,7 +190,7 @@ protected override void ProcessDataInstance(IChannel ch, ref VBuffer<Float> feat
192
190
Contracts . Assert ( _weightsUpdate . Count == 0 ) ;
193
191
// If we aren't aggregating multiple instances, just use the instance's
194
192
// vector directly.
195
- Float currentBiasUpdate = trueOutput * weight ;
193
+ float currentBiasUpdate = trueOutput * weight ;
196
194
_biasUpdate += currentBiasUpdate ;
197
195
FinishBatch ( ref feat , currentBiasUpdate ) ;
198
196
}
@@ -206,13 +204,13 @@ protected override void ProcessDataInstance(IChannel ch, ref VBuffer<Float> feat
206
204
/// Updates the weights at the end of the batch. Since weightsUpdate can be an instance
207
205
/// feature vector, this function should not change the contents of weightsUpdate.
208
206
/// </summary>
209
- private void UpdateWeights ( ref VBuffer < Float > weightsUpdate , Float weightsUpdateScale )
207
+ private void UpdateWeights ( ref VBuffer < float > weightsUpdate , float weightsUpdateScale )
210
208
{
211
209
Contracts . Assert ( _batch > 0 ) ;
212
210
213
211
// REVIEW: This is really odd - normally lambda is small, so the learning rate is initially huge!?!?!
214
212
// Changed from the paper's recommended rate = 1 / (lambda * t) to rate = 1 / (1 + lambda * t).
215
- Float rate = 1 / ( 1 + Args . Lambda * _batch ) ;
213
+ float rate = 1 / ( 1 + Args . Lambda * _batch ) ;
216
214
217
215
// w_{t+1/2} = (1 - eta*lambda) w_t + eta/k * totalUpdate
218
216
WeightsScale *= 1 - rate * Args . Lambda ;
@@ -226,7 +224,7 @@ private void UpdateWeights(ref VBuffer<Float> weightsUpdate, Float weightsUpdate
226
224
// w_{t+1} = min{1, 1/sqrt(lambda)/|w_{t+1/2}|} * w_{t+1/2}
227
225
if ( Args . PerformProjection )
228
226
{
229
- Float normalizer = 1 / ( MathUtils . Sqrt ( Args . Lambda ) * VectorUtils . Norm ( Weights ) * Math . Abs ( WeightsScale ) ) ;
227
+ float normalizer = 1 / ( MathUtils . Sqrt ( Args . Lambda ) * VectorUtils . Norm ( Weights ) * Math . Abs ( WeightsScale ) ) ;
230
228
if ( normalizer < 1 )
231
229
{
232
230
// REVIEW: Why would we not scale _bias if we're scaling the weights?
0 commit comments