From b38dac284abad7a76ade9e783adee2b15ea3d041 Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Thu, 3 Jan 2019 19:27:56 +0000 Subject: [PATCH 1/4] Update Iris tests to use new API --- .../Microsoft.ML.Tests.csproj | 1 - .../Scenarios/IrisPlantClassificationTests.cs | 98 +++++------- ...PlantClassificationWithStringLabelTests.cs | 142 ++++++++---------- 3 files changed, 100 insertions(+), 141 deletions(-) diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 716138ce8f..12c5547921 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -22,7 +22,6 @@ - diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index ad634680cc..500682635d 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -3,33 +3,45 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; +using Microsoft.ML.RunTests; using Xunit; -using TextLoader = Microsoft.ML.Legacy.Data.TextLoader; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612, 618 public partial class ScenariosTests { [Fact] - public void TrainAndPredictIrisModelTest() + public void New_TrainAndPredictIrisModelTest() { - string dataPath = GetDataPath("iris.txt"); - - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - - Legacy.PredictionModel model = pipeline.Train(); - - IrisPrediction prediction = model.Predict(new IrisData() + var mlContext = new MLContext(seed: 1, conc: 1); + + var reader = mlContext.Data.CreateTextReader(columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4) + } + ); + + var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); + + // Read training and test data sets + string dataPath = GetDataPath(TestDatasets.iris.trainFilename); + string testDataPath = dataPath; + var trainData = reader.Read(dataPath); + var testData = reader.Read(testDataPath); + + // Train the pipeline + var trainedModel = pipe.Fit(trainData); + + // Make predictions + var predictFunction = trainedModel.CreatePredictionEngine(mlContext); + IrisPrediction prediction = predictFunction.Predict(new IrisData() { SepalLength = 5.1f, SepalWidth = 3.3f, @@ -41,7 +53,7 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(0, prediction.PredictedLabels[2], 2); - prediction = model.Predict(new IrisData() + prediction = predictFunction.Predict(new IrisData() { SepalLength = 6.4f, SepalWidth = 3.1f, @@ -53,7 +65,7 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(1, prediction.PredictedLabels[2], 2); - prediction = model.Predict(new IrisData() + prediction = predictFunction.Predict(new IrisData() { SepalLength = 4.4f, SepalWidth = 3.1f, @@ -65,53 +77,20 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(.8, prediction.PredictedLabels[1], 1); Assert.Equal(0, prediction.PredictedLabels[2], 2); - // Note: Testing against the same data set as a simple way to test evaluation. - // This isn't appropriate in real-world scenarios. - string testDataPath = GetDataPath("iris.txt"); - var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false); - - var evaluator = new ClassificationEvaluator(); - evaluator.OutputTopKAcc = 3; - ClassificationMetrics metrics = evaluator.Evaluate(model, testData); + // Evaluate the trained pipeline + var predicted = trainedModel.Transform(testData); + var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK:3); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); - Assert.Equal(.06, metrics.LogLoss, 2); + Assert.InRange(metrics.LogLoss, .05, .06); Assert.InRange(metrics.LogLossReduction, 94, 96); - Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - - ConfusionMatrix matrix = metrics.ConfusionMatrix; - Assert.Equal(3, matrix.Order); - Assert.Equal(3, matrix.ClassNames.Count); - Assert.Equal("0", matrix.ClassNames[0]); - Assert.Equal("1", matrix.ClassNames[1]); - Assert.Equal("2", matrix.ClassNames[2]); - - Assert.Equal(50, matrix[0, 0]); - Assert.Equal(50, matrix["0", "0"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["0", "1"]); - Assert.Equal(0, matrix[0, 2]); - Assert.Equal(0, matrix["0", "2"]); - - Assert.Equal(0, matrix[1, 0]); - Assert.Equal(0, matrix["1", "0"]); - Assert.Equal(48, matrix[1, 1]); - Assert.Equal(48, matrix["1", "1"]); - Assert.Equal(2, matrix[1, 2]); - Assert.Equal(2, matrix["1", "2"]); - - Assert.Equal(0, matrix[2, 0]); - Assert.Equal(0, matrix["2", "0"]); - Assert.Equal(1, matrix[2, 1]); - Assert.Equal(1, matrix["2", "1"]); - Assert.Equal(49, matrix[2, 2]); - Assert.Equal(49, matrix["2", "2"]); + Assert.Equal(1, metrics.TopKAccuracy); } public class IrisData @@ -138,6 +117,5 @@ public class IrisPrediction public float[] PredictedLabels; } } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 6ad0059032..eee7202951 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -3,44 +3,48 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; using Xunit; -using TextLoader = Microsoft.ML.Legacy.Data.TextLoader; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612, 618 public partial class ScenariosTests { [Fact] - public void TrainAndPredictIrisModelWithStringLabelTest() + public void New_TrainAndPredictIrisModelWithStringLabelTest() { + var mlContext = new MLContext(seed: 1, conc: 1); + + var reader = mlContext.Data.CreateTextReader(columns: new[] + { + new TextLoader.Column("SepalLength", DataKind.R4, 0), + new TextLoader.Column("SepalWidth", DataKind.R4, 1), + new TextLoader.Column("PetalLength", DataKind.R4, 2), + new TextLoader.Column("PetalWidth", DataKind.R4, 3), + new TextLoader.Column("IrisPlantType", DataKind.TX, 4), + }, + separatorChar: ',' + ); + + // Read training and test data sets string dataPath = GetDataPath("iris.data"); - - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false, separator: ',')); - - pipeline.Add(new Dictionarizer("Label")); // "IrisPlantType" is used as "Label" because of column attribute name on the field. - - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - - var model = pipeline.Train(); - string[] scoreLabels; - model.TryGetScoreLabelNames(out scoreLabels); - - Assert.NotNull(scoreLabels); - Assert.Equal(3, scoreLabels.Length); - Assert.Equal("Iris-setosa", scoreLabels[0]); - Assert.Equal("Iris-versicolor", scoreLabels[1]); - Assert.Equal("Iris-virginica", scoreLabels[2]); - - IrisPrediction prediction = model.Predict(new IrisDataWithStringLabel() + string testDataPath = dataPath; + var trainData = reader.Read(dataPath); + var testData = reader.Read(testDataPath); + + // Create Estimator + var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("IrisPlantType", "Label"), TransformerScope.TrainTest) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Plant"))); + + // Train the pipeline + var trainedModel = pipe.Fit(trainData); + + // Make predictions + var predictFunction = trainedModel.CreatePredictionEngine(mlContext); + IrisPredictionWithStringLabel prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 5.1f, SepalWidth = 3.3f, @@ -48,11 +52,12 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 0.2f, }); - Assert.Equal(1, prediction.PredictedLabels[0], 2); - Assert.Equal(0, prediction.PredictedLabels[1], 2); - Assert.Equal(0, prediction.PredictedLabels[2], 2); + Assert.Equal(1, prediction.PredictedScores[0], 2); + Assert.Equal(0, prediction.PredictedScores[1], 2); + Assert.Equal(0, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-setosa"); - prediction = model.Predict(new IrisDataWithStringLabel() + prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 6.4f, SepalWidth = 3.1f, @@ -60,11 +65,12 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 2.2f, }); - Assert.Equal(0, prediction.PredictedLabels[0], 2); - Assert.Equal(0, prediction.PredictedLabels[1], 2); - Assert.Equal(1, prediction.PredictedLabels[2], 2); + Assert.Equal(0, prediction.PredictedScores[0], 2); + Assert.Equal(0, prediction.PredictedScores[1], 2); + Assert.Equal(1, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-virginica"); - prediction = model.Predict(new IrisDataWithStringLabel() + prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 4.4f, SepalWidth = 3.1f, @@ -72,57 +78,25 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 1.2f, }); - Assert.Equal(.2, prediction.PredictedLabels[0], 1); - Assert.Equal(.8, prediction.PredictedLabels[1], 1); - Assert.Equal(0, prediction.PredictedLabels[2], 2); + Assert.Equal(.2, prediction.PredictedScores[0], 1); + Assert.Equal(.8, prediction.PredictedScores[1], 1); + Assert.Equal(0, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-versicolor"); - // Note: Testing against the same data set as a simple way to test evaluation. - // This isn't appropriate in real-world scenarios. - string testDataPath = GetDataPath("iris.data"); - var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false, separator: ','); - - var evaluator = new ClassificationEvaluator(); - evaluator.OutputTopKAcc = 3; - ClassificationMetrics metrics = evaluator.Evaluate(model, testData); + // Evaluate the trained pipeline + var predicted = trainedModel.Transform(testData); + var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK: 3); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); - Assert.Equal(.06, metrics.LogLoss, 2); + Assert.InRange(metrics.LogLoss, .05, .06); Assert.InRange(metrics.LogLossReduction, 94, 96); - Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - - ConfusionMatrix matrix = metrics.ConfusionMatrix; - Assert.Equal(3, matrix.Order); - Assert.Equal(3, matrix.ClassNames.Count); - Assert.Equal("Iris-setosa", matrix.ClassNames[0]); - Assert.Equal("Iris-versicolor", matrix.ClassNames[1]); - Assert.Equal("Iris-virginica", matrix.ClassNames[2]); - - Assert.Equal(50, matrix[0, 0]); - Assert.Equal(50, matrix["Iris-setosa", "Iris-setosa"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["Iris-setosa", "Iris-versicolor"]); - Assert.Equal(0, matrix[0, 2]); - Assert.Equal(0, matrix["Iris-setosa", "Iris-virginica"]); - - Assert.Equal(0, matrix[1, 0]); - Assert.Equal(0, matrix["Iris-versicolor", "Iris-setosa"]); - Assert.Equal(48, matrix[1, 1]); - Assert.Equal(48, matrix["Iris-versicolor", "Iris-versicolor"]); - Assert.Equal(2, matrix[1, 2]); - Assert.Equal(2, matrix["Iris-versicolor", "Iris-virginica"]); - - Assert.Equal(0, matrix[2, 0]); - Assert.Equal(0, matrix["Iris-virginica", "Iris-setosa"]); - Assert.Equal(1, matrix[2, 1]); - Assert.Equal(1, matrix["Iris-virginica", "Iris-versicolor"]); - Assert.Equal(49, matrix[2, 2]); - Assert.Equal(49, matrix["Iris-virginica", "Iris-virginica"]); + Assert.Equal(1, metrics.TopKAccuracy); } public class IrisDataWithStringLabel @@ -139,9 +113,17 @@ public class IrisDataWithStringLabel [LoadColumn(3)] public float PetalWidth; - [LoadColumn(4), ColumnName("Label")] + [LoadColumn(4)] public string IrisPlantType; } + + public class IrisPredictionWithStringLabel + { + [ColumnName("Score")] + public float[] PredictedScores; + + [ColumnName("Plant")] + public string PredictedPlant; + } } -#pragma warning restore 612, 618 } From b38109d1ce2f257732f6daf4dcbaa2b84b8866d8 Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Thu, 3 Jan 2019 21:31:57 +0000 Subject: [PATCH 2/4] review comments --- .../Scenarios/IrisPlantClassificationTests.cs | 9 ++++----- .../IrisPlantClassificationWithStringLabelTests.cs | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index 500682635d..73f6c03e8f 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.Scenarios public partial class ScenariosTests { [Fact] - public void New_TrainAndPredictIrisModelTest() + public void TrainAndPredictIrisModelTest() { var mlContext = new MLContext(seed: 1, conc: 1); @@ -79,18 +79,17 @@ public void New_TrainAndPredictIrisModelTest() // Evaluate the trained pipeline var predicted = trainedModel.Transform(testData); - var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK:3); + var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK: 3); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); - Assert.InRange(metrics.LogLoss, .05, .06); - Assert.InRange(metrics.LogLossReduction, 94, 96); + Assert.Equal(.06, metrics.LogLoss, 2); + Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - Assert.Equal(1, metrics.TopKAccuracy); } public class IrisData diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index eee7202951..493f5803b5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.Scenarios public partial class ScenariosTests { [Fact] - public void New_TrainAndPredictIrisModelWithStringLabelTest() + public void TrainAndPredictIrisModelWithStringLabelTest() { var mlContext = new MLContext(seed: 1, conc: 1); @@ -89,14 +89,14 @@ public void New_TrainAndPredictIrisModelWithStringLabelTest() Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); - Assert.InRange(metrics.LogLoss, .05, .06); + Assert.Equal(.06, metrics.LogLoss, 2); Assert.InRange(metrics.LogLossReduction, 94, 96); + Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - Assert.Equal(1, metrics.TopKAccuracy); } public class IrisDataWithStringLabel From 9418f5daed3fc9799cdd79e55521ffc39be5420a Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Thu, 3 Jan 2019 22:43:15 +0000 Subject: [PATCH 3/4] review comments -2 . made couple of classes private --- .../IrisPlantClassificationWithStringLabelTests.cs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 493f5803b5..69fe4f699b 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -97,9 +97,14 @@ public void TrainAndPredictIrisModelWithStringLabelTest() Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); + + // Dummy Initialization + var dummy1 = new IrisDataWithStringLabel() { IrisPlantType = default }; + var dummy2 = new IrisPredictionWithStringLabel() { PredictedScores = default, PredictedPlant = default }; + } - public class IrisDataWithStringLabel + private class IrisDataWithStringLabel { [LoadColumn(0)] public float SepalLength; @@ -117,7 +122,7 @@ public class IrisDataWithStringLabel public string IrisPlantType; } - public class IrisPredictionWithStringLabel + private class IrisPredictionWithStringLabel { [ColumnName("Score")] public float[] PredictedScores; From f1a05aa9b0369e77d82db9c202e392380be29b4c Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Thu, 3 Jan 2019 22:59:17 +0000 Subject: [PATCH 4/4] removed dummy initialization, and using properties instead --- .../IrisPlantClassificationWithStringLabelTests.cs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 69fe4f699b..ff38fbebe5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -97,11 +97,6 @@ public void TrainAndPredictIrisModelWithStringLabelTest() Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - - // Dummy Initialization - var dummy1 = new IrisDataWithStringLabel() { IrisPlantType = default }; - var dummy2 = new IrisPredictionWithStringLabel() { PredictedScores = default, PredictedPlant = default }; - } private class IrisDataWithStringLabel @@ -119,16 +114,16 @@ private class IrisDataWithStringLabel public float PetalWidth; [LoadColumn(4)] - public string IrisPlantType; + public string IrisPlantType { get; set; } } private class IrisPredictionWithStringLabel { [ColumnName("Score")] - public float[] PredictedScores; + public float[] PredictedScores { get; set; } [ColumnName("Plant")] - public string PredictedPlant; + public string PredictedPlant { get; set; } } } }