From 41f8598de2232159c15121a3100179ccc0122ec7 Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Mon, 16 Jul 2018 15:25:36 +0000 Subject: [PATCH 1/5] failing test case for multiclass --- .../TestAutoInference.cs | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index 4d8ec880d1..e36112bd8d 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -362,6 +362,73 @@ public void EntryPointPipelineSweepRoles() Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); } + [Fact] + [TestCategory("EntryPoints")] + public void EntryPointPipelineSweepMultiClass() + { + // Get datasets + var pathData = GetDataPath("adult.train"); + var pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 100; + const string schema = + "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " + + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'LabelColumns': ['IsOver50K_'], + 'WeightColumns': ['education_num'], + 'NameColumns': ['education'], + 'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'], + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Accuracy(micro-avg)', + 'Engine': { + 'Name': 'Defaults' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 2 + } + }, + 'TrainerKind': 'SignatureMultiClassClassifierTrainer', + } + }, + 'BatchSize': 1 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graphJson = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + } + [Fact] public void TestRocketPipelineEngine() { From b7653de2e614794d24d40e4d6e507b6bfe17bf02 Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Tue, 17 Jul 2018 17:58:04 +0000 Subject: [PATCH 2/5] Refactored PipelineSweeperSupportedMetrics Class; added unit test for MultiClassClassification; refactored out unit tests for the PipelineSweeper --- .../AutoInference.cs | 101 +-- .../PipelinePattern.cs | 2 +- .../PipelineSweeperSupportedMetrics.cs | 120 ++++ src/Microsoft.ML/CSharpApi.cs | 55 +- .../Common/EntryPoints/core_manifest.json | 1 + .../TestAutoInference.cs | 663 ++---------------- .../TestPipelineSweeper.cs | 564 +++++++++++++++ 7 files changed, 784 insertions(+), 722 deletions(-) create mode 100644 src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs create mode 100644 test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index 73a358865f..9f3027e9b8 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -51,67 +51,6 @@ public class LevelDependencyMap : Dictionary public class DependencyMap : Dictionary { } - /// - /// AutoInference will support metrics as they are added here. - /// - public sealed class SupportedMetric - { - public static readonly SupportedMetric Auc = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Auc, true); - public static readonly SupportedMetric AccuracyMicro = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMicro, true); - public static readonly SupportedMetric AccuracyMacro = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMacro, true); - public static readonly SupportedMetric L1 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L1, false); - public static readonly SupportedMetric L2 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L2, false); - public static readonly SupportedMetric F1 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.F1, true); - public static readonly SupportedMetric AuPrc = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AuPrc, true); - public static readonly SupportedMetric TopKAccuracy = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.TopKAccuracy, true); - public static readonly SupportedMetric Rms = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Rms, false); - public static readonly SupportedMetric LossFn = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LossFn, false); - public static readonly SupportedMetric RSquared = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.RSquared, false); - public static readonly SupportedMetric LogLoss = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLoss, false); - public static readonly SupportedMetric LogLossReduction = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLossReduction, true); - public static readonly SupportedMetric Ndcg = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Ndcg, true); - public static readonly SupportedMetric Dcg = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dcg, true); - public static readonly SupportedMetric PositivePrecision = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositivePrecision, true); - public static readonly SupportedMetric PositiveRecall = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositiveRecall, true); - public static readonly SupportedMetric NegativePrecision = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativePrecision, true); - public static readonly SupportedMetric NegativeRecall = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativeRecall, true); - public static readonly SupportedMetric DrAtK = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtK, true); - public static readonly SupportedMetric DrAtPFpr = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtPFpr, true); - public static readonly SupportedMetric DrAtNumPos = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtNumPos, true); - public static readonly SupportedMetric NumAnomalies = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NumAnomalies, true); - public static readonly SupportedMetric ThreshAtK = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtK, false); - public static readonly SupportedMetric ThreshAtP = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtP, false); - public static readonly SupportedMetric ThreshAtNumPos = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtNumPos, false); - public static readonly SupportedMetric Nmi = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Nmi, true); - public static readonly SupportedMetric AvgMinScore = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AvgMinScore, false); - public static readonly SupportedMetric Dbi = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dbi, false); - - public string Name { get; } - public bool IsMaximizing { get; } - - private SupportedMetric(string name, bool isMaximizing) - { - Name = name; - IsMaximizing = isMaximizing; - } - - public static SupportedMetric ByName(string name) - { - var fields = - typeof(SupportedMetric).GetFields(BindingFlags.Static | BindingFlags.Public); - - foreach (var field in fields) - { - var metric = (SupportedMetric)field.GetValue(Auc); - if (name.Equals(metric.Name, StringComparison.OrdinalIgnoreCase)) - return metric; - } - throw new NotSupportedException($"Metric '{name}' not supported."); - } - - public override string ToString() => Name; - } - /// /// Class for encapsulating an entrypoint experiment graph /// and keeping track of the input and output nodes. @@ -218,42 +157,8 @@ public sealed class AutoMlMlState : IMlState Desc = "State of an AutoML search and search space.")] public sealed class Arguments : ISupportAutoMlStateFactory { - // REVIEW: These should be the same as SupportedMetrics above. Not sure how to reference that class, - // without the C# API generator trying to create a version of that class in the API as well. - public enum Metrics - { - Auc, - AccuracyMicro, - AccuracyMacro, - L2, - F1, - AuPrc, - TopKAccuracy, - Rms, - LossFn, - RSquared, - LogLoss, - LogLossReduction, - Ndcg, - Dcg, - PositivePrecision, - PositiveRecall, - NegativePrecision, - NegativeRecall, - DrAtK, - DrAtPFpr, - DrAtNumPos, - NumAnomalies, - ThreshAtK, - ThreshAtP, - ThreshAtNumPos, - Nmi, - AvgMinScore, - Dbi - }; - [Argument(ArgumentType.Required, HelpText = "Supported metric for evaluator.", ShortName = "metric")] - public Metrics Metric; + public PipelineSweeperSupportedMetrics.Metrics Metric; [Argument(ArgumentType.Required, HelpText = "AutoML engine (pipeline optimizer) that generates next candidates.", ShortName = "engine")] public ISupportIPipelineOptimizerFactory Engine; @@ -271,7 +176,9 @@ public enum Metrics } public AutoMlMlState(IHostEnvironment env, Arguments args) - : this(env, SupportedMetric.ByName(Enum.GetName(typeof(Arguments.Metrics), args.Metric)), args.Engine.CreateComponent(env), + : this(env, + PipelineSweeperSupportedMetrics.GetSupportedMetric(env, Enum.GetName(typeof(PipelineSweeperSupportedMetrics.Metrics), args.Metric)), + args.Engine.CreateComponent(env), args.TerminatorArgs.CreateComponent(env), args.TrainerKind, requestedLearners: args.RequestedLearners) { } diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs index 56a48f654b..44e9a1adcb 100644 --- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs +++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs @@ -205,7 +205,7 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var trainData, /// Runs a train-test experiment on the current pipeline, through entrypoints. /// public void RunTrainTestExperiment(IDataView trainData, IDataView testData, - AutoInference.SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue, + SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue, out double trainMetricValue) { var experiment = CreateTrainTestExperiment(trainData, testData, trainerKind, true, out var trainTestOutput); diff --git a/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs new file mode 100644 index 0000000000..8b77b83b52 --- /dev/null +++ b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs @@ -0,0 +1,120 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Reflection; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.PipelineInference; +using Microsoft.ML.Runtime.EntryPoints.JsonUtils; +using Newtonsoft.Json.Linq; + +namespace Microsoft.ML.Runtime.PipelineInference +{ + /// + /// PipelineSweeper will support metrics as they are added here. + /// + public sealed class PipelineSweeperSupportedMetrics + { + public enum Metrics + { + Auc, + AccuracyMicro, + AccuracyMacro, + L1, + L2, + F1, + AuPrc, + TopKAccuracy, + Rms, + LossFn, + RSquared, + LogLoss, + LogLossReduction, + Ndcg, + Dcg, + PositivePrecision, + PositiveRecall, + NegativePrecision, + NegativeRecall, + DrAtK, + DrAtPFpr, + DrAtNumPos, + NumAnomalies, + ThreshAtK, + ThreshAtP, + ThreshAtNumPos, + Nmi, + AvgMinScore, + Dbi + }; + + /// + /// Mapp Enum Metrics to a SupportedMetric + /// + private static readonly Dictionary _map = new Dictionary + { + { Metrics.Auc.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Auc, true)}, + { Metrics.AccuracyMicro.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMicro, true)}, + { Metrics.AccuracyMacro.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMacro, true)}, + { Metrics.L1.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L1, false)}, + { Metrics.L2.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L2, false)}, + { Metrics.F1.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.F1, true)}, + { Metrics.AuPrc.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AuPrc, true)}, + { Metrics.TopKAccuracy.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.TopKAccuracy, true)}, + { Metrics.Rms.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Rms, false)}, + { Metrics.LossFn.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LossFn, false)}, + { Metrics.RSquared.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.RSquared, false)}, + { Metrics.LogLoss.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLoss, false)}, + { Metrics.LogLossReduction.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLossReduction, true)}, + { Metrics.Ndcg.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Ndcg, true)}, + { Metrics.Dcg.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dcg, true)}, + { Metrics.PositivePrecision.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositivePrecision, true)}, + { Metrics.PositiveRecall.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositiveRecall, true)}, + { Metrics.NegativePrecision.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativePrecision, true)}, + { Metrics.NegativeRecall.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativeRecall, true)}, + { Metrics.DrAtK.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtK, true)}, + { Metrics.DrAtPFpr.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtPFpr, true)}, + { Metrics.DrAtNumPos.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtNumPos, true)}, + { Metrics.NumAnomalies.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NumAnomalies, true)}, + { Metrics.ThreshAtK.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtK, false)}, + { Metrics.ThreshAtP.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtP, false)}, + { Metrics.ThreshAtNumPos.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtNumPos, false)}, + { Metrics.Nmi.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Nmi, false)}, + { Metrics.AvgMinScore.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AvgMinScore, false)}, + { Metrics.Dbi.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dbi, false)} + }; + + public static SupportedMetric GetSupportedMetric(IHostEnvironment env, string metricName) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckNonEmpty(metricName, nameof(metricName)); + + if (_map.ContainsKey(metricName)) + { + return _map[metricName]; + } + + throw new NotSupportedException($"Metric '{metricName}' not supported."); + } + } + + public sealed class SupportedMetric + { + public string Name { get; } + public bool IsMaximizing { get; } + + public SupportedMetric(string name, bool isMaximizing) + { + Name = name; + IsMaximizing = isMaximizing; + } + + public override string ToString() => Name; + } +} diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 83723638e1..e35e2913c3 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -15626,36 +15626,37 @@ public sealed class UniformRandomAutoMlEngine : AutoMlEngine public abstract class AutoMlStateBase : ComponentKind {} - public enum AutoInferenceAutoMlMlStateArgumentsMetrics + public enum PipelineSweeperSupportedMetricsMetrics { Auc = 0, AccuracyMicro = 1, AccuracyMacro = 2, - L2 = 3, - F1 = 4, - AuPrc = 5, - TopKAccuracy = 6, - Rms = 7, - LossFn = 8, - RSquared = 9, - LogLoss = 10, - LogLossReduction = 11, - Ndcg = 12, - Dcg = 13, - PositivePrecision = 14, - PositiveRecall = 15, - NegativePrecision = 16, - NegativeRecall = 17, - DrAtK = 18, - DrAtPFpr = 19, - DrAtNumPos = 20, - NumAnomalies = 21, - ThreshAtK = 22, - ThreshAtP = 23, - ThreshAtNumPos = 24, - Nmi = 25, - AvgMinScore = 26, - Dbi = 27 + L1 = 3, + L2 = 4, + F1 = 5, + AuPrc = 6, + TopKAccuracy = 7, + Rms = 8, + LossFn = 9, + RSquared = 10, + LogLoss = 11, + LogLossReduction = 12, + Ndcg = 13, + Dcg = 14, + PositivePrecision = 15, + PositiveRecall = 16, + NegativePrecision = 17, + NegativeRecall = 18, + DrAtK = 19, + DrAtPFpr = 20, + DrAtNumPos = 21, + NumAnomalies = 22, + ThreshAtK = 23, + ThreshAtP = 24, + ThreshAtNumPos = 25, + Nmi = 26, + AvgMinScore = 27, + Dbi = 28 } @@ -15668,7 +15669,7 @@ public sealed class AutoMlStateAutoMlStateBase : AutoMlStateBase /// /// Supported metric for evaluator. /// - public AutoInferenceAutoMlMlStateArgumentsMetrics Metric { get; set; } = AutoInferenceAutoMlMlStateArgumentsMetrics.Auc; + public PipelineSweeperSupportedMetricsMetrics Metric { get; set; } = PipelineSweeperSupportedMetricsMetrics.Auc; /// /// AutoML engine (pipeline optimizer) that generates next candidates. diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index f7d73f54b4..11ff811a7d 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -21644,6 +21644,7 @@ "Auc", "AccuracyMicro", "AccuracyMacro", + "L1", "L2", "F1", "AuPrc", diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index e36112bd8d..5a11b05be7 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -34,7 +34,7 @@ public void TestLearn() int batchSize = 5; int numIterations = 10; int numTransformLevels = 3; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc; + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); @@ -69,446 +69,6 @@ public void TestLearn() Done(); } - [Fact] - [TestCategory("EntryPoints")] - public void TestPipelineSweeperMacroNoTransforms() - { - // Set up inputs for experiment - string pathData = GetDataPath("adult.train"); - string pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 1000; - const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+"; - - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - const int batchSize = 5; - const int numIterations = 20; - const int numTransformLevels = 2; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc; - - // Using the simple, uniform random sampling (with replacement) engine - PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env); - - // Create search object - var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations), - MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest); - - // Infer search space - amls.InferSearchSpace(numTransformLevels); - - // Create macro object - var pipelineSweepInput = new Microsoft.ML.Models.PipelineSweeper() - { - BatchSize = batchSize, - }; - - var exp = new Experiment(Env); - var output = exp.Add(pipelineSweepInput); - exp.Compile(); - exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain); - exp.SetInput(pipelineSweepInput.TestingData, datasetTest); - exp.SetInput(pipelineSweepInput.State, amls); - exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]); - exp.Run(); - - // Make sure you get back an AutoMlState, and that it ran for correct number of iterations - // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset). - AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State); - Assert.NotNull(amlsOut); - Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations); - Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.1); - } - - [Fact] - [TestCategory("EntryPoints")] - public void EntryPointPipelineSweepSerialization() - { - // Get datasets - var pathData = GetDataPath("adult.train"); - var pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 1000; - int numIterations = 10; - const string schema = - "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + - "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Auc', - 'Engine': { - 'Name': 'UniformRandom' - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 10 - } - }, - 'TrainerKind': 'SignatureBinaryClassifierTrainer' - } - }, - 'BatchSize': 5 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graphJson = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - var graph = new EntryPointGraph(Env, catalog, graphJson[FieldNames.Nodes] as JArray); - // Test if ToJson() works properly. - var nodes = new JArray(graph.AllNodes.Select(node => node.ToJson())); - var runner = new GraphRunner(Env, catalog, nodes); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - - var results = runner.GetOutput("ResultsOut"); - Assert.NotNull(results); - var rows = PipelinePattern.ExtractResults(Env, results, - "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); - Assert.True(rows.Length == numIterations); - } - - [Fact] - public void EntryPointPipelineSweep() - { - // Get datasets - var pathData = GetDataPath("adult.tiny.with-schema.txt"); - var pathDataTest = GetDataPath("adult.tiny.with-schema.txt"); - const int numOfSampleRows = 1000; - int numIterations = 4; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Auc', - 'Engine': { - 'Name': 'UniformRandom' - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 4 - } - }, - 'TrainerKind': 'SignatureBinaryClassifierTrainer' - } - }, - 'BatchSize': 2 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graph = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - - var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - - var autoMlState = runner.GetOutput("StateOut"); - Assert.NotNull(autoMlState); - var allPipelines = autoMlState.GetAllEvaluatedPipelines(); - var bestPipeline = autoMlState.GetBestPipeline(); - Assert.Equal(allPipelines.Length, numIterations); - Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); - - var results = runner.GetOutput("ResultsOut"); - Assert.NotNull(results); - var rows = PipelinePattern.ExtractResults(Env, results, - "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); - Assert.True(rows.Length == numIterations); - Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); - } - - [Fact] - [TestCategory("EntryPoints")] - public void EntryPointPipelineSweepRoles() - { - // Get datasets - var pathData = GetDataPath("adult.train"); - var pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 100; - int numIterations = 2; - const string schema = - "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " + - "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+"; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'LabelColumns': ['IsOver50K_'], - 'WeightColumns': ['education_num'], - 'NameColumns': ['education'], - 'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'], - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Auc', - 'Engine': { - 'Name': 'Defaults' - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 2 - } - }, - 'TrainerKind': 'SignatureBinaryClassifierTrainer', - 'RequestedLearners' : [ - 'LogisticRegressionBinaryClassifier', - 'FastTreeBinaryClassifier' - ] - } - }, - 'BatchSize': 1 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graphJson = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - - var autoMlState = runner.GetOutput("StateOut"); - Assert.NotNull(autoMlState); - var allPipelines = autoMlState.GetAllEvaluatedPipelines(); - var bestPipeline = autoMlState.GetBestPipeline(); - Assert.Equal(allPipelines.Length, numIterations); - - var trainAuc = bestPipeline.PerformanceSummary.TrainingMetricValue; - var testAuc = bestPipeline.PerformanceSummary.MetricValue; - Assert.True((0.94 < trainAuc) && (trainAuc < 0.95)); - Assert.True((0.83 < testAuc) && (testAuc < 0.84)); - - var results = runner.GetOutput("ResultsOut"); - Assert.NotNull(results); - var rows = PipelinePattern.ExtractResults(Env, results, - "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); - Assert.True(rows.Length == numIterations); - Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); - } - - [Fact] - [TestCategory("EntryPoints")] - public void EntryPointPipelineSweepMultiClass() - { - // Get datasets - var pathData = GetDataPath("adult.train"); - var pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 100; - const string schema = - "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " + - "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+"; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'LabelColumns': ['IsOver50K_'], - 'WeightColumns': ['education_num'], - 'NameColumns': ['education'], - 'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'], - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Accuracy(micro-avg)', - 'Engine': { - 'Name': 'Defaults' - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 2 - } - }, - 'TrainerKind': 'SignatureMultiClassClassifierTrainer', - } - }, - 'BatchSize': 1 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graphJson = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - } - - [Fact] - public void TestRocketPipelineEngine() - { - // Get datasets - var pathData = GetDataPath("adult.train"); - var pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 1000; - int numIterations = 35; - const string schema = - "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + - "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); -#pragma warning restore 0618 - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Auc', - 'Engine': { - 'Name': 'Rocket', - 'Settings' : { - 'TopKLearners' : 2, - 'SecondRoundTrialsPerLearner' : 5 - }, - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 35 - } - }, - 'TrainerKind': 'SignatureBinaryClassifierTrainer' - } - }, - 'BatchSize': 5 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graph = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - - var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - - var autoMlState = runner.GetOutput("StateOut"); - Assert.NotNull(autoMlState); - var allPipelines = autoMlState.GetAllEvaluatedPipelines(); - var bestPipeline = autoMlState.GetBestPipeline(); - Assert.Equal(allPipelines.Length, numIterations); - Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); - - var results = runner.GetOutput("ResultsOut"); - Assert.NotNull(results); - var rows = PipelinePattern.ExtractResults(Env, results, - "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); - Assert.True(rows.Length == numIterations); - } - [Fact(Skip = "Need CoreTLC specific baseline update")] public void TestTextDatasetLearn() { @@ -519,7 +79,7 @@ public void TestTextDatasetLearn() int numIterations = 35; int numTransformLevels = 1; int numSampleRows = 100; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.AccuracyMicro; + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "AccuracyMicro"); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); @@ -570,27 +130,6 @@ public void TestPipelineNodeCloning() } } - [Fact] - public void TestSupportedMetricsByName() - { - var names = new List() - { - AutoInference.SupportedMetric.AccuracyMacro.Name, - AutoInference.SupportedMetric.AccuracyMicro.Name, - AutoInference.SupportedMetric.Auc.Name, - AutoInference.SupportedMetric.AuPrc.Name, - AutoInference.SupportedMetric.Dbi.Name, - AutoInference.SupportedMetric.F1.Name, - AutoInference.SupportedMetric.LogLossReduction.Name - }; - - foreach (var name in names) - { - var metric = AutoInference.SupportedMetric.ByName(name); - Assert.Equal(metric.Name, name); - } - } - [Fact] public void TestHyperparameterFreezing() { @@ -599,41 +138,44 @@ public void TestHyperparameterFreezing() int batchSize = 1; int numIterations = 10; int numTransformLevels = 3; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc; + using (var env = new TlcEnvironment()) + { + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); - // Using the simple, uniform random sampling (with replacement) brain - PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); + // Using the simple, uniform random sampling (with replacement) brain + PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); - // Run initial experiments - var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, - metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), - MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); + // Run initial experiments + var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, + metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), + MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); - // Clear results - amls.ClearEvaluatedPipelines(); + // Clear results + amls.ClearEvaluatedPipelines(); - // Get space, remove transforms and all but one learner, freeze hyperparameters on learner. - var space = amls.GetSearchSpace(); - var transforms = space.Item1.Where(t => - t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray(); - var learners = new[] { space.Item2.First() }; - var hyperParam = learners[0].PipelineNode.SweepParams.First(); - var frozenParamValue = hyperParam.RawValue; - hyperParam.Frozen = true; - amls.UpdateSearchSpace(learners, transforms); + // Get space, remove transforms and all but one learner, freeze hyperparameters on learner. + var space = amls.GetSearchSpace(); + var transforms = space.Item1.Where(t => + t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray(); + var learners = new[] { space.Item2.First() }; + var hyperParam = learners[0].PipelineNode.SweepParams.First(); + var frozenParamValue = hyperParam.RawValue; + hyperParam.Frozen = true; + amls.UpdateSearchSpace(learners, transforms); - // Allow for one more iteration - amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); + // Allow for one more iteration + amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); - // Do learning. Only retained learner should be left in all pipelines. - bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); + // Do learning. Only retained learner should be left in all pipelines. + bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); - // Make sure all pipelines have retained learner - Assert.True(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName)); + // Make sure all pipelines have retained learner + Assert.True(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName)); - // Make sure hyperparameter value did not change - Assert.NotNull(bestPipeline); - Assert.Equal(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue); + // Make sure hyperparameter value did not change + Assert.NotNull(bestPipeline); + Assert.Equal(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue); + } } [Fact(Skip = "Dataset not available.")] @@ -644,26 +186,29 @@ public void TestRegressionPipelineWithMinimizingMetric() int batchSize = 5; int numIterations = 10; int numTransformLevels = 1; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.L1; + using (var env = new TlcEnvironment()) + { + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "L1"); - // Using the simple, uniform random sampling (with replacement) brain - PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); + // Using the simple, uniform random sampling (with replacement) brain + PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); - // Run initial experiments - var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, - metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), - MacroUtils.TrainerKinds.SignatureRegressorTrainer); + // Run initial experiments + var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, + metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), + MacroUtils.TrainerKinds.SignatureRegressorTrainer); - // Allow for one more iteration - amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); + // Allow for one more iteration + amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); - // Do learning. Only retained learner should be left in all pipelines. - bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); + // Do learning. Only retained learner should be left in all pipelines. + bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); - // Make sure hyperparameter value did not change - Assert.NotNull(bestPipeline); - Assert.True(amls.GetAllEvaluatedPipelines().All( - p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue)); + // Make sure hyperparameter value did not change + Assert.NotNull(bestPipeline); + Assert.True(amls.GetAllEvaluatedPipelines().All( + p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue)); + } } [Fact] @@ -675,102 +220,26 @@ public void TestLearnerConstrainingByName() int numIterations = 1; int numTransformLevels = 2; var retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; - AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc; + using (var env = new TlcEnvironment()) + { + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); - // Using the simple, uniform random sampling (with replacement) brain. - PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); + // Using the simple, uniform random sampling (with replacement) brain. + PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); - // Run initial experiment. - var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, - numTransformLevels, batchSize, metric, out var _, numOfSampleRows, - new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); + // Run initial experiment. + var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, + numTransformLevels, batchSize, metric, out var _, numOfSampleRows, + new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); - // Keep only logistic regression and FastTree. - amls.KeepSelectedLearners(retainedLearnerNames); - var space = amls.GetSearchSpace(); + // Keep only logistic regression and FastTree. + amls.KeepSelectedLearners(retainedLearnerNames); + var space = amls.GetSearchSpace(); - // Make sure only learners left are those retained. - Assert.Equal(retainedLearnerNames.Length, space.Item2.Length); - Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName))); - } - - [Fact] - public void TestRequestedLearners() - { - // Get datasets - var pathData = GetDataPath("adult.train"); - var pathDataTest = GetDataPath("adult.test"); - const int numOfSampleRows = 100; - const string schema = - "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + - "col=relationship:TX:7 col=race:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; - var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); -#pragma warning disable 0618 - var datasetTrain = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - var datasetTest = ImportTextData.ImportText(Env, - new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); - var requestedLearners = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; -#pragma warning restore 0618 - // Define entrypoint graph - string inputGraph = @" - { - 'Nodes': [ - { - 'Name': 'Models.PipelineSweeper', - 'Inputs': { - 'TrainingData': '$TrainingData', - 'TestingData': '$TestingData', - 'StateArguments': { - 'Name': 'AutoMlState', - 'Settings': { - 'Metric': 'Auc', - 'Engine': { - 'Name': 'Rocket', - 'Settings' : { - 'TopKLearners' : 2, - 'SecondRoundTrialsPerLearner' : 0 - }, - }, - 'TerminatorArgs': { - 'Name': 'IterationLimited', - 'Settings': { - 'FinalHistoryLength': 35 - } - }, - 'TrainerKind': 'SignatureBinaryClassifierTrainer', - 'RequestedLearners' : [ - 'LogisticRegressionBinaryClassifier', - 'FastTreeBinaryClassifier' - ] - } - }, - 'BatchSize': 5 - }, - 'Outputs': { - 'State': '$StateOut', - 'Results': '$ResultsOut' - } - }, - ] - }"; - - JObject graph = JObject.Parse(inputGraph); - var catalog = ModuleCatalog.CreateInstance(Env); - - var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); - runner.SetInput("TrainingData", datasetTrain); - runner.SetInput("TestingData", datasetTest); - runner.RunAll(); - - var autoMlState = runner.GetOutput("StateOut"); - Assert.NotNull(autoMlState); - var space = autoMlState.GetSearchSpace(); - - // Make sure only learners left are those retained. - Assert.Equal(requestedLearners.Length, space.Item2.Length); - Assert.True(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName))); + // Make sure only learners left are those retained. + Assert.Equal(retainedLearnerNames.Length, space.Item2.Length); + Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName))); + } } [Fact] diff --git a/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs b/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs new file mode 100644 index 0000000000..fb17a92eac --- /dev/null +++ b/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs @@ -0,0 +1,564 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.EntryPoints.JsonUtils; +using Microsoft.ML.Runtime.PipelineInference; +using Newtonsoft.Json.Linq; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Runtime.RunTests +{ + public sealed class TestPipelineSweeper : BaseTestBaseline + { + public TestPipelineSweeper(ITestOutputHelper helper) + : base(helper) + { + } + + [Fact] + public void PipelineSweeperBasic() + { + // Get datasets + var pathData = GetDataPath("adult.tiny.with-schema.txt"); + var pathDataTest = GetDataPath("adult.tiny.with-schema.txt"); + const int numOfSampleRows = 1000; + int numIterations = 4; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'UniformRandom' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 4 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer' + } + }, + 'BatchSize': 2 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + + var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var allPipelines = autoMlState.GetAllEvaluatedPipelines(); + var bestPipeline = autoMlState.GetBestPipeline(); + Assert.Equal(allPipelines.Length, numIterations); + Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); + + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, + "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); + Assert.True(rows.Length == numIterations); + Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); + } + + [Fact] + [TestCategory("EntryPoints")] + public void PipelineSweeperNoTransforms() + { + // Set up inputs for experiment + string pathData = GetDataPath("adult.train"); + string pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 1000; + const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+"; + + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + const int batchSize = 5; + const int numIterations = 20; + const int numTransformLevels = 2; + using (var env = new TlcEnvironment()) + { + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); + + // Using the simple, uniform random sampling (with replacement) engine + PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env); + + // Create search object + var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations), + MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest); + + // Infer search space + amls.InferSearchSpace(numTransformLevels); + + // Create macro object + var pipelineSweepInput = new Microsoft.ML.Models.PipelineSweeper() + { + BatchSize = batchSize, + }; + + var exp = new Experiment(Env); + var output = exp.Add(pipelineSweepInput); + exp.Compile(); + exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain); + exp.SetInput(pipelineSweepInput.TestingData, datasetTest); + exp.SetInput(pipelineSweepInput.State, amls); + exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]); + exp.Run(); + + // Make sure you get back an AutoMlState, and that it ran for correct number of iterations + // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset). + AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State); + Assert.NotNull(amlsOut); + Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations); + Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.1); + } + } + + [Fact] + [TestCategory("EntryPoints")] + public void PipelineSweeperSerialization() + { + // Get datasets + var pathData = GetDataPath("adult.train"); + var pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 1000; + int numIterations = 10; + const string schema = + "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'UniformRandom' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 10 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer' + } + }, + 'BatchSize': 5 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graphJson = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + var graph = new EntryPointGraph(Env, catalog, graphJson[FieldNames.Nodes] as JArray); + // Test if ToJson() works properly. + var nodes = new JArray(graph.AllNodes.Select(node => node.ToJson())); + var runner = new GraphRunner(Env, catalog, nodes); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, + "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); + Assert.True(rows.Length == numIterations); + } + + [Fact] + [TestCategory("EntryPoints")] + public void PipelineSweeperRoles() + { + // Get datasets + var pathData = GetDataPath("adult.train"); + var pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 100; + int numIterations = 2; + const string schema = + "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " + + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'LabelColumns': ['IsOver50K_'], + 'WeightColumns': ['education_num'], + 'NameColumns': ['education'], + 'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'], + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'Defaults' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 2 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer', + 'RequestedLearners' : [ + 'LogisticRegressionBinaryClassifier', + 'FastTreeBinaryClassifier' + ] + } + }, + 'BatchSize': 1 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graphJson = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var allPipelines = autoMlState.GetAllEvaluatedPipelines(); + var bestPipeline = autoMlState.GetBestPipeline(); + Assert.Equal(allPipelines.Length, numIterations); + + var trainAuc = bestPipeline.PerformanceSummary.TrainingMetricValue; + var testAuc = bestPipeline.PerformanceSummary.MetricValue; + Assert.True((0.94 < trainAuc) && (trainAuc < 0.95)); + Assert.True((0.83 < testAuc) && (testAuc < 0.84)); + + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, + "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); + Assert.True(rows.Length == numIterations); + Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); + } + + [Fact] + [TestCategory("EntryPoints")] + public void PipelineSweeperMultiClassClassification() + { + // Get datasets + // TODO (agoswami) : For now we use the same dataset for train and test since the repo does not have a separate test file for the iris dataset. + // In the future the PipelineSweeper Macro will have an option to take just one dataset as input, and do the train-test split internally. + var pathData = GetDataPath(@"iris.txt"); + var pathDataTest = GetDataPath(@"iris.txt"); + int numIterations = 2; + const string schema = "col=Species:R4:0 col=SepalLength:R4:1 col=SepalWidth:R4:2 col=PetalLength:R4:3 col=PetalWidth:R4:4"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data; + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data; +#pragma warning restore 0618 + + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'LabelColumns': ['Species'], + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'AccuracyMicro', + 'Engine': { + 'Name': 'Defaults' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 2 + } + }, + 'TrainerKind': 'SignatureMultiClassClassifierTrainer', + 'RequestedLearners' : [ + 'LogisticRegressionClassifier', + 'StochasticDualCoordinateAscentClassifier' + ] + } + }, + 'BatchSize': 1 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graphJson = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var allPipelines = autoMlState.GetAllEvaluatedPipelines(); + var bestPipeline = autoMlState.GetBestPipeline(); + Assert.Equal(allPipelines.Length, numIterations); + + var bestMicroAccuracyTrain = bestPipeline.PerformanceSummary.TrainingMetricValue; + var bestMicroAccuracyTest = bestPipeline.PerformanceSummary.MetricValue; + Assert.True((0.97 < bestMicroAccuracyTrain) && (bestMicroAccuracyTrain < 0.99)); + Assert.True((0.97 < bestMicroAccuracyTest) && (bestMicroAccuracyTest < 0.99)); + + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, + "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); + Assert.True(rows.Length == numIterations); + Assert.True(rows.All(r => r.MetricValue > 0.9)); + } + + [Fact] + public void PipelineSweeperRocketEngine() + { + // Get datasets + var pathData = GetDataPath("adult.train"); + var pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 1000; + int numIterations = 35; + const string schema = + "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'Rocket', + 'Settings' : { + 'TopKLearners' : 2, + 'SecondRoundTrialsPerLearner' : 5 + }, + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 35 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer' + } + }, + 'BatchSize': 5 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + + var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var allPipelines = autoMlState.GetAllEvaluatedPipelines(); + var bestPipeline = autoMlState.GetBestPipeline(); + Assert.Equal(allPipelines.Length, numIterations); + Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); + + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, + "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); + Assert.True(rows.Length == numIterations); + } + + [Fact] + public void PipelineSweeperRequestedLearners() + { + // Get datasets + var pathData = GetDataPath("adult.train"); + var pathDataTest = GetDataPath("adult.test"); + const int numOfSampleRows = 100; + const string schema = + "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + + "col=relationship:TX:7 col=race:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); + var requestedLearners = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; +#pragma warning restore 0618 + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'Rocket', + 'Settings' : { + 'TopKLearners' : 2, + 'SecondRoundTrialsPerLearner' : 0 + }, + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 35 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer', + 'RequestedLearners' : [ + 'LogisticRegressionBinaryClassifier', + 'FastTreeBinaryClassifier' + ] + } + }, + 'BatchSize': 5 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); + + var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); + + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var space = autoMlState.GetSearchSpace(); + + // Make sure only learners left are those retained. + Assert.Equal(requestedLearners.Length, space.Item2.Length); + Assert.True(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName))); + } + } +} From 0ccfbab631db05f1d13ca0fabd13653ff40f7e62 Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Wed, 18 Jul 2018 00:53:51 +0000 Subject: [PATCH 3/5] take care of review comments; display transforms/learners + metrics in pipeline --- .../AutoInference.cs | 25 ++++++++++--------- .../Macros/PipelineSweeperMacro.cs | 3 +-- .../PipelineSweeperSupportedMetrics.cs | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index 9f3027e9b8..e60bd6c463 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -431,6 +431,18 @@ public void AddEvaluated(PipelinePattern pipeline) d += 1e-3; _sortedSampledElements.Add(d, pipeline); _history.Add(pipeline); + + using (var ch = _host.Start("Suggested Pipeline")) + { + ch.Info($"PipelineSweeper Pipeline Id : {pipeline.UniqueId}"); + foreach (var transform in pipeline.Transforms) + { + ch.Info($"PipelineSweeper Transform : {transform.Transform}"); + } + ch.Info($"PipelineSweeper Learner : {pipeline.Learner}"); + ch.Info($"PipelineSweeper Train Metric Value: {pipeline.PerformanceSummary.TrainingMetricValue}"); + ch.Info($"PipelineSweeper Test Metric Value: {pipeline.PerformanceSummary.MetricValue}"); + } } public void AddEvaluated(PipelinePattern[] pipelines) @@ -448,18 +460,7 @@ public PipelinePattern[] GetNextCandidates(int numberOfCandidates) currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates); BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles); - using (var ch = _host.Start("Suggested Pipeline")) - { - foreach (var pipeline in BatchCandidates) - { - ch.Info($"AutoInference Pipeline Id : {pipeline.UniqueId}"); - foreach (var transform in pipeline.Transforms) - { - ch.Info($"AutoInference Transform : {transform.Transform}"); - } - ch.Info($"AutoInference Learner : {pipeline.Learner}"); - } - } + return BatchCandidates; } diff --git a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs index be51c1e695..c5c23ce675 100644 --- a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs +++ b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs @@ -239,8 +239,7 @@ public static CommonOutputs.MacroOutput PipelineSweep( if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v) && node.Context.TryGetVariable(AutoMlUtils.GenerateOverallTrainingMetricVarName(pipeline.UniqueId), out var v2)) { - pipeline.PerformanceSummary = - AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value); + pipeline.PerformanceSummary = AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value); autoMlState.AddEvaluated(pipeline); } } diff --git a/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs index 8b77b83b52..64609cb205 100644 --- a/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs +++ b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs @@ -55,7 +55,7 @@ public enum Metrics }; /// - /// Mapp Enum Metrics to a SupportedMetric + /// Map Enum Metrics to a SupportedMetric /// private static readonly Dictionary _map = new Dictionary { From 46af33f6c5a87dd6b376d53d9c00c99be324f03c Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Wed, 18 Jul 2018 17:27:10 +0000 Subject: [PATCH 4/5] taking care of PR comments + refactor PipelineSweeperRunSummary --- .../AutoInference.cs | 32 +++------------- .../AutoMlUtils.cs | 23 ++++++----- .../PipelinePattern.cs | 4 +- .../PipelineSweeperRunSummary.cs | 38 +++++++++++++++++++ 4 files changed, 60 insertions(+), 37 deletions(-) create mode 100644 src/Microsoft.ML.PipelineInference/PipelineSweeperRunSummary.cs diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index e60bd6c463..4f65ca42f7 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -106,26 +106,6 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok } } - /// - /// Class containing some information about an exectuted pipeline. - /// These are analogous to IRunResult for smart sweepers. - /// - public sealed class RunSummary - { - public double MetricValue { get; } - public double TrainingMetricValue { get; } - public int NumRowsInTraining { get; } - public long RunTimeMilliseconds { get; } - - public RunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue) - { - MetricValue = metricValue; - TrainingMetricValue = trainingMetricValue; - NumRowsInTraining = numRows; - RunTimeMilliseconds = runTimeMilliseconds; - } - } - [TlcModule.ComponentKind("AutoMlStateBase")] public interface ISupportAutoMlStateFactory : IComponentFactory { } @@ -262,8 +242,7 @@ private void ProcessPipeline(Sweeper.Algorithms.SweeperProbabilityUtils utils, S testMetricVal += 1e-10; // Save performance score - candidate.PerformanceSummary = - new RunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal); + candidate.PerformanceSummary = new PipelineSweeperRunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal); _sortedSampledElements.Add(candidate.PerformanceSummary.MetricValue, candidate); _history.Add(candidate); } @@ -434,14 +413,17 @@ public void AddEvaluated(PipelinePattern pipeline) using (var ch = _host.Start("Suggested Pipeline")) { + ch.Info($"PipelineSweeper Iteration Number : {_history.Count}"); ch.Info($"PipelineSweeper Pipeline Id : {pipeline.UniqueId}"); + foreach (var transform in pipeline.Transforms) { ch.Info($"PipelineSweeper Transform : {transform.Transform}"); } + ch.Info($"PipelineSweeper Learner : {pipeline.Learner}"); - ch.Info($"PipelineSweeper Train Metric Value: {pipeline.PerformanceSummary.TrainingMetricValue}"); - ch.Info($"PipelineSweeper Test Metric Value: {pipeline.PerformanceSummary.MetricValue}"); + ch.Info($"PipelineSweeper Train Metric Value : {pipeline.PerformanceSummary.TrainingMetricValue}"); + ch.Info($"PipelineSweeper Test Metric Value : {pipeline.PerformanceSummary.MetricValue}"); } } @@ -460,8 +442,6 @@ public PipelinePattern[] GetNextCandidates(int numberOfCandidates) currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates); BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles); - - return BatchCandidates; } diff --git a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs index 5f028835e2..e0bf7dbcca 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs @@ -38,11 +38,15 @@ public static double ExtractValueFromIdv(IHostEnvironment env, IDataView result, return outputValue; } - public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null) + public static PipelineSweeperRunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null) { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(result, nameof(result)); + env.CheckNonEmpty(metricColumnName, nameof(metricColumnName)); + double testingMetricValue = ExtractValueFromIdv(env, result, metricColumnName); double trainingMetricValue = trainResult != null ? ExtractValueFromIdv(env, trainResult, metricColumnName) : double.MinValue; - return new AutoInference.RunSummary(testingMetricValue, 0, 0, trainingMetricValue); + return new PipelineSweeperRunSummary(testingMetricValue, 0, 0, trainingMetricValue); } public static CommonInputs.IEvaluatorInput CloneEvaluatorInstance(CommonInputs.IEvaluatorInput evalInput) => @@ -566,14 +570,15 @@ private static ParameterSet ConvertToParameterSet(TlcModule.SweepableParamAttrib return learner.PipelineNode.HyperSweeperParamSet; } - public static IRunResult ConvertToRunResult(RecipeInference.SuggestedRecipe.SuggestedLearner learner, - AutoInference.RunSummary rs, bool isMetricMaximizing) => - new RunResult(ConvertToParameterSet(learner.PipelineNode.SweepParams, learner), rs.MetricValue, isMetricMaximizing); - - public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool isMetricMaximizing) => - history.Select(h => - ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray(); + public static IRunResult ConvertToRunResult(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelineSweeperRunSummary rs, bool isMetricMaximizing) + { + return new RunResult(ConvertToParameterSet(learner.PipelineNode.SweepParams, learner), rs.MetricValue, isMetricMaximizing); + } + public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool isMetricMaximizing) + { + return history.Select(h => ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray(); + } /// /// Method to convert set of sweepable hyperparameters into strings of a format understood /// by the current smart hyperparameter sweepers. diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs index 44e9a1adcb..fd22c4d624 100644 --- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs +++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs @@ -55,13 +55,13 @@ public PipelineResultRow(string graphJson, double metricValue, private readonly IHostEnvironment _env; public readonly TransformInference.SuggestedTransform[] Transforms; public readonly RecipeInference.SuggestedRecipe.SuggestedLearner Learner; - public AutoInference.RunSummary PerformanceSummary { get; set; } + public PipelineSweeperRunSummary PerformanceSummary { get; set; } public string LoaderSettings { get; set; } public Guid UniqueId { get; } public PipelinePattern(TransformInference.SuggestedTransform[] transforms, RecipeInference.SuggestedRecipe.SuggestedLearner learner, - string loaderSettings, IHostEnvironment env, AutoInference.RunSummary summary = null) + string loaderSettings, IHostEnvironment env, PipelineSweeperRunSummary summary = null) { // Make sure internal pipeline nodes and sweep params are cloned, not shared. // Cloning the transforms and learner rather than assigning outright diff --git a/src/Microsoft.ML.PipelineInference/PipelineSweeperRunSummary.cs b/src/Microsoft.ML.PipelineInference/PipelineSweeperRunSummary.cs new file mode 100644 index 0000000000..662e08df9f --- /dev/null +++ b/src/Microsoft.ML.PipelineInference/PipelineSweeperRunSummary.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Reflection; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.PipelineInference; +using Microsoft.ML.Runtime.EntryPoints.JsonUtils; +using Newtonsoft.Json.Linq; + +namespace Microsoft.ML.Runtime.PipelineInference +{ + /// + /// Class containing some information about an exectuted pipeline. + /// These are analogous to IRunResult for smart sweepers. + /// + public sealed class PipelineSweeperRunSummary + { + public double MetricValue { get; } + public double TrainingMetricValue { get; } + public int NumRowsInTraining { get; } + public long RunTimeMilliseconds { get; } + + public PipelineSweeperRunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue) + { + MetricValue = metricValue; + TrainingMetricValue = trainingMetricValue; + NumRowsInTraining = numRows; + RunTimeMilliseconds = runTimeMilliseconds; + } + } +} From d7e84024d1fe154290a12451c56a1bad7e6434cb Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Wed, 18 Jul 2018 22:02:17 +0000 Subject: [PATCH 5/5] taking care of review comments --- .../AutoInference.cs | 2 +- .../PipelineSweeperSupportedMetrics.cs | 148 +++++++++++------- .../TestAutoInference.cs | 10 +- .../TestPipelineSweeper.cs | 4 +- 4 files changed, 102 insertions(+), 62 deletions(-) diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index 4f65ca42f7..6ec2894895 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -157,7 +157,7 @@ public sealed class Arguments : ISupportAutoMlStateFactory public AutoMlMlState(IHostEnvironment env, Arguments args) : this(env, - PipelineSweeperSupportedMetrics.GetSupportedMetric(env, Enum.GetName(typeof(PipelineSweeperSupportedMetrics.Metrics), args.Metric)), + PipelineSweeperSupportedMetrics.GetSupportedMetric(args.Metric), args.Engine.CreateComponent(env), args.TerminatorArgs.CreateComponent(env), args.TrainerKind, requestedLearners: args.RequestedLearners) { diff --git a/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs index 64609cb205..accfe9cd14 100644 --- a/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs +++ b/src/Microsoft.ML.PipelineInference/PipelineSweeperSupportedMetrics.cs @@ -2,17 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Reflection; -using Microsoft.ML.Runtime.CommandLine; -using Microsoft.ML.Runtime.EntryPoints; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.PipelineInference; using Microsoft.ML.Runtime.EntryPoints.JsonUtils; -using Newtonsoft.Json.Linq; +using System; namespace Microsoft.ML.Runtime.PipelineInference { @@ -54,53 +45,102 @@ public enum Metrics Dbi }; - /// - /// Map Enum Metrics to a SupportedMetric - /// - private static readonly Dictionary _map = new Dictionary + public static SupportedMetric GetSupportedMetric(Metrics metric) { - { Metrics.Auc.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Auc, true)}, - { Metrics.AccuracyMicro.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMicro, true)}, - { Metrics.AccuracyMacro.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMacro, true)}, - { Metrics.L1.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L1, false)}, - { Metrics.L2.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L2, false)}, - { Metrics.F1.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.F1, true)}, - { Metrics.AuPrc.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AuPrc, true)}, - { Metrics.TopKAccuracy.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.TopKAccuracy, true)}, - { Metrics.Rms.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Rms, false)}, - { Metrics.LossFn.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LossFn, false)}, - { Metrics.RSquared.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.RSquared, false)}, - { Metrics.LogLoss.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLoss, false)}, - { Metrics.LogLossReduction.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLossReduction, true)}, - { Metrics.Ndcg.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Ndcg, true)}, - { Metrics.Dcg.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dcg, true)}, - { Metrics.PositivePrecision.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositivePrecision, true)}, - { Metrics.PositiveRecall.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositiveRecall, true)}, - { Metrics.NegativePrecision.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativePrecision, true)}, - { Metrics.NegativeRecall.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativeRecall, true)}, - { Metrics.DrAtK.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtK, true)}, - { Metrics.DrAtPFpr.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtPFpr, true)}, - { Metrics.DrAtNumPos.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtNumPos, true)}, - { Metrics.NumAnomalies.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NumAnomalies, true)}, - { Metrics.ThreshAtK.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtK, false)}, - { Metrics.ThreshAtP.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtP, false)}, - { Metrics.ThreshAtNumPos.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtNumPos, false)}, - { Metrics.Nmi.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Nmi, false)}, - { Metrics.AvgMinScore.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AvgMinScore, false)}, - { Metrics.Dbi.ToString(), new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dbi, false)} - }; - - public static SupportedMetric GetSupportedMetric(IHostEnvironment env, string metricName) - { - Contracts.CheckValue(env, nameof(env)); - env.CheckNonEmpty(metricName, nameof(metricName)); - - if (_map.ContainsKey(metricName)) + SupportedMetric supportedMetric = null; + switch(metric) { - return _map[metricName]; + case Metrics.Auc: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Auc, true); + break; + case Metrics.AccuracyMicro: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMicro, true); + break; + case Metrics.AccuracyMacro: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMacro, true); + break; + case Metrics.L1: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L1, false); + break; + case Metrics.L2: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L2, false); + break; + case Metrics.F1: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.F1, true); + break; + case Metrics.AuPrc: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AuPrc, true); + break; + case Metrics.TopKAccuracy: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.TopKAccuracy, true); + break; + case Metrics.Rms: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Rms, false); + break; + case Metrics.LossFn: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LossFn, false); + break; + case Metrics.RSquared: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.RSquared, false); + break; + case Metrics.LogLoss: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLoss, false); + break; + case Metrics.LogLossReduction: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLossReduction, true); + break; + case Metrics.Ndcg: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Ndcg, true); + break; + case Metrics.Dcg: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dcg, true); + break; + case Metrics.PositivePrecision: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositivePrecision, true); + break; + case Metrics.PositiveRecall: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositiveRecall, true); + break; + case Metrics.NegativePrecision: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativePrecision, true); + break; + case Metrics.NegativeRecall: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativeRecall, true); + break; + case Metrics.DrAtK: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtK, true); + break; + case Metrics.DrAtPFpr: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtPFpr, true); + break; + case Metrics.DrAtNumPos: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtNumPos, true); + break; + case Metrics.NumAnomalies: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NumAnomalies, true); + break; + case Metrics.ThreshAtK: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtK, false); + break; + case Metrics.ThreshAtP: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtP, false); + break; + case Metrics.ThreshAtNumPos: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtNumPos, false); + break; + case Metrics.Nmi: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Nmi, true); + break; + case Metrics.AvgMinScore: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AvgMinScore, false); + break; + case Metrics.Dbi: + supportedMetric = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dbi, false); + break; + default: + throw new NotSupportedException($"Metric '{metric}' not supported."); } - - throw new NotSupportedException($"Metric '{metricName}' not supported."); + return supportedMetric; } } diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index 5a11b05be7..d63d83c733 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -34,7 +34,7 @@ public void TestLearn() int batchSize = 5; int numIterations = 10; int numTransformLevels = 3; - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); @@ -79,7 +79,7 @@ public void TestTextDatasetLearn() int numIterations = 35; int numTransformLevels = 1; int numSampleRows = 100; - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "AccuracyMicro"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); @@ -140,7 +140,7 @@ public void TestHyperparameterFreezing() int numTransformLevels = 3; using (var env = new TlcEnvironment()) { - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) brain PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); @@ -188,7 +188,7 @@ public void TestRegressionPipelineWithMinimizingMetric() int numTransformLevels = 1; using (var env = new TlcEnvironment()) { - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "L1"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro); // Using the simple, uniform random sampling (with replacement) brain PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); @@ -222,7 +222,7 @@ public void TestLearnerConstrainingByName() var retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; using (var env = new TlcEnvironment()) { - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) brain. PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); diff --git a/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs b/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs index fb17a92eac..03fa8dfe29 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPipelineSweeper.cs @@ -119,7 +119,7 @@ public void PipelineSweeperNoTransforms() const int numTransformLevels = 2; using (var env = new TlcEnvironment()) { - SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(env, "Auc"); + SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env); @@ -151,7 +151,7 @@ public void PipelineSweeperNoTransforms() AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State); Assert.NotNull(amlsOut); Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations); - Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.1); + Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.8); } }