From ad6ea9f80ddeeaf6da1b58f237d4c5794c7c6102 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 5 Jun 2018 13:46:17 -0700 Subject: [PATCH 1/3] Respect normalization in OVA. --- .../Standard/MultiClass/Ova.cs | 9 ++- .../UnitTests/TestCSharpApi.cs | 63 +++++++++++++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs index 895dcea7cf..3d6e1e67b2 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs @@ -200,10 +200,13 @@ public static ModelOperations.PredictorModelOutput CombineOvaModels(IHostEnviron host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); host.CheckNonEmpty(input.ModelArray, nameof(input.ModelArray)); - + // Something tells me we should put normalization as part of macro expansion, but since i get + // subgraph instead of learner it's a bit tricky to get learner and decide should we add + // normalization node or not, plus everywhere in code we leave that reposnsibility to TransformModel. + var normalizedView = input.ModelArray[0].TransformModel.Apply(host, input.TrainingData); using (var ch = host.Start("CombineOvaModels")) { - ISchema schema = input.TrainingData.Schema; + ISchema schema = normalizedView.Schema; var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(input.LabelColumn), input.LabelColumn, DefaultColumnNames.Label); @@ -211,7 +214,7 @@ public static ModelOperations.PredictorModelOutput CombineOvaModels(IHostEnviron input.FeatureColumn, DefaultColumnNames.Features); var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(input.WeightColumn), input.WeightColumn, DefaultColumnNames.Weight); - var data = TrainUtils.CreateExamples(input.TrainingData, label, feature, null, weight); + var data = TrainUtils.CreateExamples(normalizedView, label, feature, null, weight); return new ModelOperations.PredictorModelOutput { diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index 4c1969c2d9..007d421a6f 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -593,5 +593,68 @@ public void TestCrossValidationMacroWithStratification() } } } + + [Fact] + public void TestOvaMacro() + { + // Get datasets + var dataPath = GetDataPath(@"iris.txt"); + using (var env = new TlcEnvironment(42)) + { + + var experiment = env.CreateExperiment(); + var importInput = new ML.Data.TextLoader(dataPath); + importInput.Arguments.Column = new ML.Data.TextLoaderColumn[] + { + new ML.Data.TextLoaderColumn { Name = "Label", Source = new[] { new ML.Data.TextLoaderRange(0) } }, + new ML.Data.TextLoaderColumn { Name = "Features", Source = new[] { new ML.Data.TextLoaderRange(1,4) } } + }; + var importOutput = experiment.Add(importInput); + var subGraph = env.CreateExperiment(); + var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier + { + TrainingData = importOutput.Data, + NumThreads = 1 + }; + var learnerOutput = subGraph.Add(learnerInput); + var oneVersusAll = new ML.Models.OneVersusAll + { + TrainingData = importOutput.Data, + Nodes = subGraph, + UseProbabilities = true, + }; + var ovaOutput = experiment.Add(oneVersusAll); + var scoreInput = new ML.Transforms.DatasetScorer + { + Data = importOutput.Data, + PredictorModel = ovaOutput.PredictorModel + }; + var scoreOutput = experiment.Add(scoreInput); + + var evalInput = new ML.Models.ClassificationEvaluator + { + Data = scoreOutput.ScoredData + }; + var evalOutput = experiment.Add(evalInput); + experiment.Compile(); + experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); + experiment.Run(); + var data = experiment.GetOutput(evalOutput.OverallMetrics); + var schema = data.Schema; + var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int aucCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == aucCol)) + { + var getter = cursor.GetGetter(aucCol); + b = cursor.MoveNext(); + Assert.True(b); + double auc = 0; + getter(ref auc); + Assert.Equal(0.96, auc, 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + } } } From b4174f7b64117d001976f878f53702e12dfa9b5c Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 5 Jun 2018 13:50:49 -0700 Subject: [PATCH 2/3] some cleanup --- .../UnitTests/TestCSharpApi.cs | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index 007d421a6f..120112dcac 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -597,27 +597,23 @@ public void TestCrossValidationMacroWithStratification() [Fact] public void TestOvaMacro() { - // Get datasets var dataPath = GetDataPath(@"iris.txt"); using (var env = new TlcEnvironment(42)) { - + // Specify subgraph for OVA + var subGraph = env.CreateExperiment(); + var learnerInput = new Trainers.StochasticDualCoordinateAscentBinaryClassifier { NumThreads = 1 }; + var learnerOutput = subGraph.Add(learnerInput); + // Create pipeline with OVA and multiclass scoring. var experiment = env.CreateExperiment(); var importInput = new ML.Data.TextLoader(dataPath); - importInput.Arguments.Column = new ML.Data.TextLoaderColumn[] + importInput.Arguments.Column = new TextLoaderColumn[] { - new ML.Data.TextLoaderColumn { Name = "Label", Source = new[] { new ML.Data.TextLoaderRange(0) } }, - new ML.Data.TextLoaderColumn { Name = "Features", Source = new[] { new ML.Data.TextLoaderRange(1,4) } } + new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, + new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(1,4) } } }; var importOutput = experiment.Add(importInput); - var subGraph = env.CreateExperiment(); - var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier - { - TrainingData = importOutput.Data, - NumThreads = 1 - }; - var learnerOutput = subGraph.Add(learnerInput); - var oneVersusAll = new ML.Models.OneVersusAll + var oneVersusAll = new Models.OneVersusAll { TrainingData = importOutput.Data, Nodes = subGraph, @@ -630,7 +626,6 @@ public void TestOvaMacro() PredictorModel = ovaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); - var evalInput = new ML.Models.ClassificationEvaluator { Data = scoreOutput.ScoredData @@ -639,6 +634,7 @@ public void TestOvaMacro() experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); + var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int aucCol); From 7897dce2664ff50183fb0e5e3b9cbfdd86a26e98 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 5 Jun 2018 16:14:11 -0700 Subject: [PATCH 3/3] fix copypaste issues --- .../UnitTests/TestCSharpApi.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index 984d8d661b..b42dee2d52 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -783,16 +783,16 @@ public void TestOvaMacro() var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; - var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int aucCol); + var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == aucCol)) + using (var cursor = data.GetRowCursor(col => col == accCol)) { - var getter = cursor.GetGetter(aucCol); + var getter = cursor.GetGetter(accCol); b = cursor.MoveNext(); Assert.True(b); - double auc = 0; - getter(ref auc); - Assert.Equal(0.96, auc, 2); + double acc = 0; + getter(ref acc); + Assert.Equal(0.96, acc, 2); b = cursor.MoveNext(); Assert.False(b); }