Adding tests, and re-establishing the role-mapped data before calibration

sfilipi · sfilipi · commit 7d6f15facb89 · 2018-09-12T15:45:42.000-07:00
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs
@@ -15,9 +15,7 @@
 
 namespace Microsoft.ML.Runtime.Learners
 {
-    // using TScalarTrainer = ITrainer<IPredictorProducing<Float>>;
-     using TScalarTrainer = ITrainerEstimator<IPredictionTransformer<IPredictorProducing<float>>, IPredictorProducing<float>>;
-    // using TScalarTrainer = IEstimator<IPredictionTransformer<IPredictorProducing<float>>>;
+    using TScalarTrainer = ITrainerEstimator<IPredictionTransformer<IPredictorProducing<float>>, IPredictorProducing<float>>;
 
     public abstract class MetaMulticlassTrainer<TTransformer, TModel> : ITrainerEstimator<TTransformer, TModel>, ITrainer<TModel>
         where TTransformer : IPredictionTransformer<TModel>
@@ -81,8 +79,7 @@ internal MetaMulticlassTrainer(IHostEnvironment env, ArgumentsBase args, string
             // Create the first trainer so errors in the args surface early.
             _trainer = singleEstimator?? CreateTrainer();
 
-            if (calibrator != null)
-                Calibrator = calibrator;
+            Calibrator = calibrator?? null;
 
             if (args.Calibrator != null)
                 Calibrator = args.Calibrator.CreateComponent(Host);
@@ -105,27 +102,26 @@ private TScalarTrainer CreateTrainer()
                 new LinearSvm(Host, new LinearSvm.Arguments());
         }
 
-        protected IDataView MapLabelsCore<T>(ColumnType type, RefPredicate<T> equalsTarget, RoleMappedData data, string dstName)
+        protected IDataView MapLabelsCore<T>(ColumnType type, RefPredicate<T> equalsTarget, RoleMappedData data)
         {
             Host.AssertValue(type);
             Host.Assert(type.RawType == typeof(T));
             Host.AssertValue(equalsTarget);
             Host.AssertValue(data);
             Host.AssertValue(data.Schema.Label);
-            Host.AssertNonWhiteSpace(dstName);
 
             var lab = data.Schema.Label;
 
             RefPredicate<T> isMissing;
             if (!Args.ImputeMissingLabelsAsNegative && Conversions.Instance.TryGetIsNAPredicate(type, out isMissing))
             {
                 return LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
-                    lab.Name, dstName, type, NumberType.Float,
+                    lab.Name, lab.Name, type, NumberType.Float,
                     (ref T src, ref float dst) =>
                         dst = equalsTarget(ref src) ? 1 : (isMissing(ref src) ? float.NaN : default(float)));
             }
             return LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
-                lab.Name, dstName, type, NumberType.Float,
+                lab.Name, lab.Name, type, NumberType.Float,
                 (ref T src, ref float dst) =>
                     dst = equalsTarget(ref src) ? 1 : default(float));
         }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs
@@ -36,7 +36,6 @@
 namespace Microsoft.ML.Runtime.Learners
 {
     using TScalarPredictor = IPredictorProducing<Float>;
-    // using TScalarTrainer = IEstimator<IPredictionTransformer<IPredictor>>;
     using TScalarTrainer = ITrainerEstimator<IPredictionTransformer<IPredictorProducing<float>>, IPredictorProducing<float>>;
     using CR = RoleMappedSchema.ColumnRole;
 
@@ -113,25 +112,24 @@ protected override OvaPredictor TrainCore(IChannel ch, RoleMappedData data, int
 
         private IPredictionTransformer<TScalarPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls)
         {
-            string dstName;
-            var view = MapLabels(data, cls, out dstName);
+            var view = MapLabels(data, cls);
 
             string trainerLabel = data.Schema.Label.Name;
 
-            //copy the newly created column in the what the learner knows as the label column
-            var trainerData = new CopyColumnsTransform(Host, (dstName, trainerLabel)).Transform(view);
-
             // REVIEW: In principle we could support validation sets and the like via the train context, but
             // this is currently unsupported.
-            var transformer = trainer.Fit(trainerData);
+            var transformer = trainer.Fit(view);
 
             if (_args.UseProbabilities)
             {
                 var calibratedModel = transformer.Model as TScalarPredictor;
 
+                // the validations in the calibrator check for the feature column, in the RoleMappedData
+                var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumn);
+
                 if (calibratedModel == null)
                     // calibratedModel = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, _args.MaxCalibrationExamples, trainer, transformer.Model, data) as TScalarPredictor;
-                    calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, data) as TScalarPredictor;
+                    calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, trainedData) as TScalarPredictor;
 
                 Host.Check(calibratedModel != null, "Calibrated predictor does not implement the expected interface");
                 return new BinaryPredictionTransformer<TScalarPredictor>(Host, calibratedModel, data.Data.Schema, transformer.FeatureColumn);
@@ -140,30 +138,30 @@ private IPredictionTransformer<TScalarPredictor> TrainOne(IChannel ch, TScalarTr
             return new BinaryPredictionTransformer<TScalarPredictor>(Host, transformer.Model, data.Data.Schema, transformer.FeatureColumn);
         }
 
-        private IDataView MapLabels(RoleMappedData data, int cls, out string dstName)
+        private IDataView MapLabels(RoleMappedData data, int cls)
         {
             var lab = data.Schema.Label;
             Host.Assert(!data.Schema.Schema.IsHidden(lab.Index));
             Host.Assert(lab.Type.KeyCount > 0 || lab.Type == NumberType.R4 || lab.Type == NumberType.R8);
 
             // Get the destination label column name.
-            dstName = data.Schema.Schema.GetTempColumnName();
+            //dstName = data.Schema.Schema.GetTempColumnName();
 
             if (lab.Type.KeyCount > 0)
             {
                 // Key values are 1-based.
                 uint key = (uint)(cls + 1);
-                return MapLabelsCore(NumberType.U4, (ref uint val) => key == val, data, dstName);
+                return MapLabelsCore(NumberType.U4, (ref uint val) => key == val, data);
             }
             if (lab.Type == NumberType.R4)
             {
                 Float key = cls;
-                return MapLabelsCore(NumberType.R4, (ref float val) => key == val, data, dstName);
+                return MapLabelsCore(NumberType.R4, (ref float val) => key == val, data);
             }
             if (lab.Type == NumberType.R8)
             {
                 Double key = cls;
-                return MapLabelsCore(NumberType.R8, (ref double val) => key == val, data, dstName);
+                return MapLabelsCore(NumberType.R8, (ref double val) => key == val, data);
             }
 
             throw Host.ExceptNotSupp($"Label column type is not supported by OVA: {lab.Type}");
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs
@@ -29,7 +29,6 @@ namespace Microsoft.ML.Runtime.Learners
 {
 
     using TDistPredictor = IDistPredictorProducing<float, float>;
-    // using TScalarTrainer = IEstimator<IPredictionTransformer<IPredictorProducing<float>>>;
     using TScalarTrainer = ITrainerEstimator<IPredictionTransformer<IPredictorProducing<float>>, IPredictorProducing<float>>;
     using CR = RoleMappedSchema.ColumnRole;
     using TTransformer = MulticlassPredictionTransformer<PkpdPredictor>;
@@ -131,52 +130,50 @@ protected override PkpdPredictor TrainCore(IChannel ch, RoleMappedData data, int
 
         private IPredictionTransformer<TDistPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2)
         {
-            string dstName;
-            var view = MapLabels(data, cls1, cls2, out dstName);
-
             // this should not be necessary when the legacy constructor doesn't exist, and the label colum is not an optional parameter on the
             // MetaMulticlassTrainer constructor.
             string trainerLabel = data.Schema.Label.Name;
 
-            //copy the newly created column in the what the learner knows as the label column
-            var trainerData = new CopyColumnsTransform(Host, (dstName, trainerLabel)).Transform(view);
-
+            var view = MapLabels(data, cls1, cls2);
             var transformer = trainer.Fit(view);
 
+            // the validations in the calibrator check for the feature column, in the RoleMappedData
+            var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumn);
+
             var calibratedModel = transformer.Model as TDistPredictor;
             if (calibratedModel == null)
-                calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, data) as TDistPredictor;
+                calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, trainedData) as TDistPredictor;
 
             return new BinaryPredictionTransformer<TDistPredictor>(Host, calibratedModel, data.Data.Schema, transformer.FeatureColumn);
         }
 
-        private IDataView MapLabels(RoleMappedData data, int cls1, int cls2, out string dstName)
+        private IDataView MapLabels(RoleMappedData data, int cls1, int cls2)
         {
             var lab = data.Schema.Label;
             Host.Assert(!data.Schema.Schema.IsHidden(lab.Index));
             Host.Assert(lab.Type.KeyCount > 0 || lab.Type == NumberType.R4 || lab.Type == NumberType.R8);
 
             // Get the destination label column name.
-            dstName = data.Schema.Schema.GetTempColumnName();
+            //dstName = data.Schema.Schema.GetTempColumnName();
 
             if (lab.Type.KeyCount > 0)
             {
                 // Key values are 1-based.
                 uint key1 = (uint)(cls1 + 1);
                 uint key2 = (uint)(cls2 + 1);
-                return MapLabelsCore(NumberType.U4, (ref uint val) => val == key1 || val == key2, data, dstName);
+                return MapLabelsCore(NumberType.U4, (ref uint val) => val == key1 || val == key2, data);
             }
             if (lab.Type == NumberType.R4)
             {
                 float key1 = cls1;
                 float key2 = cls2;
-                return MapLabelsCore(NumberType.R4, (ref float val) => val == key1 || val == key2, data, dstName);
+                return MapLabelsCore(NumberType.R4, (ref float val) => val == key1 || val == key2, data);
             }
             if (lab.Type == NumberType.R8)
             {
                 double key1 = cls1;
                 double key2 = cls2;
-                return MapLabelsCore(NumberType.R8, (ref double val) => val == key1 || val == key2, data, dstName);
+                return MapLabelsCore(NumberType.R8, (ref double val) => val == key1 || val == key2, data);
             }
 
             throw Host.ExceptNotSupp($"Label column type is not supported by PKPD: {lab.Type}");
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
@@ -2,8 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using Float = System.Single;
-
 using System;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.CommandLine;
@@ -49,7 +47,7 @@ public sealed class Arguments : OnlineLinearArguments
             [Argument(ArgumentType.AtMostOnce, HelpText = "Regularizer constant", ShortName = "lambda", SortOrder = 50)]
             [TGUI(SuggestedSweeps = "0.00001-0.1;log;inc:10")]
             [TlcModule.SweepableFloatParamAttribute("Lambda", 0.00001f, 0.1f, 10, isLogScale:true)]
-            public Float Lambda = (Float)0.001;
+            public float Lambda = (float)0.001;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Batch size", ShortName = "batch", SortOrder = 190)]
             [TGUI(Label = "Batch Size")]
@@ -79,9 +77,9 @@ public sealed class Arguments : OnlineLinearArguments
         // weightsUpdate/weightsUpdateScale/biasUpdate are similar to weights/weightsScale/bias, in that
         // all elements of weightsUpdate are considered to be multiplied by weightsUpdateScale, and the
         // bias update term is not considered to be multiplied by the scale.
-        private VBuffer<Float> _weightsUpdate;
-        private Float _weightsUpdateScale;
-        private Float _biasUpdate;
+        private VBuffer<float> _weightsUpdate;
+        private float _weightsUpdateScale;
+        private float _biasUpdate;
 
         protected override bool NeedCalibration => true;
 
@@ -114,7 +112,7 @@ protected override void CheckLabel(RoleMappedData data)
         /// <summary>
         /// Return the raw margin from the decision hyperplane
         /// </summary>
-        protected override Float Margin(ref VBuffer<Float> feat)
+        protected override float Margin(ref VBuffer<float> feat)
         {
             return Bias + VectorUtils.DotProduct(ref feat, ref Weights) * WeightsScale;
         }
@@ -134,7 +132,7 @@ protected override void InitCore(IChannel ch, int numFeatures, LinearPredictor p
             if (predictor == null)
                 VBufferUtils.Densify(ref Weights);
 
-            _weightsUpdate = VBufferUtils.CreateEmpty<Float>(numFeatures);
+            _weightsUpdate = VBufferUtils.CreateEmpty<float>(numFeatures);
         }
 
         protected override void BeginIteration(IChannel ch)
@@ -148,10 +146,10 @@ private void BeginBatch()
             _batch++;
             _numBatchExamples = 0;
             _biasUpdate = 0;
-            _weightsUpdate = new VBuffer<Float>(_weightsUpdate.Length, 0, _weightsUpdate.Values, _weightsUpdate.Indices);
+            _weightsUpdate = new VBuffer<float>(_weightsUpdate.Length, 0, _weightsUpdate.Values, _weightsUpdate.Indices);
         }
 
-        private void FinishBatch(ref VBuffer<Float> weightsUpdate, Float weightsUpdateScale)
+        private void FinishBatch(ref VBuffer<float> weightsUpdate, float weightsUpdateScale)
         {
             if (_numBatchExamples > 0)
                 UpdateWeights(ref weightsUpdate, weightsUpdateScale);
@@ -161,19 +159,19 @@ private void FinishBatch(ref VBuffer<Float> weightsUpdate, Float weightsUpdateSc
         /// <summary>
         /// Observe an example and update weights if necessary
         /// </summary>
-        protected override void ProcessDataInstance(IChannel ch, ref VBuffer<Float> feat, Float label, Float weight)
+        protected override void ProcessDataInstance(IChannel ch, ref VBuffer<float> feat, float label, float weight)
         {
             base.ProcessDataInstance(ch, ref feat, label, weight);
 
             // compute the update and update if needed
-            Float output = Margin(ref feat);
-            Float trueOutput = (label > 0 ? 1 : -1);
-            Float loss = output * trueOutput - 1;
+            float output = Margin(ref feat);
+            float trueOutput = (label > 0 ? 1 : -1);
+            float loss = output * trueOutput - 1;
 
             // Accumulate the update if there is a loss and we have larger batches.
             if (Args.BatchSize > 1 && loss < 0)
             {
-                Float currentBiasUpdate = trueOutput * weight;
+                float currentBiasUpdate = trueOutput * weight;
                 _biasUpdate += currentBiasUpdate;
                 // Only aggregate in the case where we're handling multiple instances.
                 if (_weightsUpdate.Count == 0)
@@ -192,7 +190,7 @@ protected override void ProcessDataInstance(IChannel ch, ref VBuffer<Float> feat
                     Contracts.Assert(_weightsUpdate.Count == 0);
                     // If we aren't aggregating multiple instances, just use the instance's
                     // vector directly.
-                    Float currentBiasUpdate = trueOutput * weight;
+                    float currentBiasUpdate = trueOutput * weight;
                     _biasUpdate += currentBiasUpdate;
                     FinishBatch(ref feat, currentBiasUpdate);
                 }
@@ -206,13 +204,13 @@ protected override void ProcessDataInstance(IChannel ch, ref VBuffer<Float> feat
         /// Updates the weights at the end of the batch. Since weightsUpdate can be an instance
         /// feature vector, this function should not change the contents of weightsUpdate.
         /// </summary>
-        private void UpdateWeights(ref VBuffer<Float> weightsUpdate, Float weightsUpdateScale)
+        private void UpdateWeights(ref VBuffer<float> weightsUpdate, float weightsUpdateScale)
         {
             Contracts.Assert(_batch > 0);
 
             // REVIEW: This is really odd - normally lambda is small, so the learning rate is initially huge!?!?!
             // Changed from the paper's recommended rate = 1 / (lambda * t) to rate = 1 / (1 + lambda * t).
-            Float rate = 1 / (1 + Args.Lambda * _batch);
+            float rate = 1 / (1 + Args.Lambda * _batch);
 
             // w_{t+1/2} = (1 - eta*lambda) w_t + eta/k * totalUpdate
             WeightsScale *= 1 - rate * Args.Lambda;
@@ -226,7 +224,7 @@ private void UpdateWeights(ref VBuffer<Float> weightsUpdate, Float weightsUpdate
             // w_{t+1} = min{1, 1/sqrt(lambda)/|w_{t+1/2}|} * w_{t+1/2}
             if (Args.PerformProjection)
             {
-                Float normalizer = 1 / (MathUtils.Sqrt(Args.Lambda) * VectorUtils.Norm(Weights) * Math.Abs(WeightsScale));
+                float normalizer = 1 / (MathUtils.Sqrt(Args.Lambda) * VectorUtils.Norm(Weights) * Math.Abs(WeightsScale));
                 if (normalizer < 1)
                 {
                     // REVIEW: Why would we not scale _bias if we're scaling the weights?
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs