Skip to content

PipelineSweeperMacro for Multi-Class Classification #539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 20 additions & 132 deletions src/Microsoft.ML.PipelineInference/AutoInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,67 +51,6 @@ public class LevelDependencyMap : Dictionary<ColumnInfo, List<TransformInference
/// </summary>
public class DependencyMap : Dictionary<int, LevelDependencyMap> { }

/// <summary>
/// AutoInference will support metrics as they are added here.
/// </summary>
public sealed class SupportedMetric
{
public static readonly SupportedMetric Auc = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Auc, true);
public static readonly SupportedMetric AccuracyMicro = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMicro, true);
public static readonly SupportedMetric AccuracyMacro = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AccuracyMacro, true);
public static readonly SupportedMetric L1 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L1, false);
public static readonly SupportedMetric L2 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.L2, false);
public static readonly SupportedMetric F1 = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.F1, true);
public static readonly SupportedMetric AuPrc = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AuPrc, true);
public static readonly SupportedMetric TopKAccuracy = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.TopKAccuracy, true);
public static readonly SupportedMetric Rms = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Rms, false);
public static readonly SupportedMetric LossFn = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LossFn, false);
public static readonly SupportedMetric RSquared = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.RSquared, false);
public static readonly SupportedMetric LogLoss = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLoss, false);
public static readonly SupportedMetric LogLossReduction = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.LogLossReduction, true);
public static readonly SupportedMetric Ndcg = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Ndcg, true);
public static readonly SupportedMetric Dcg = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dcg, true);
public static readonly SupportedMetric PositivePrecision = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositivePrecision, true);
public static readonly SupportedMetric PositiveRecall = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.PositiveRecall, true);
public static readonly SupportedMetric NegativePrecision = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativePrecision, true);
public static readonly SupportedMetric NegativeRecall = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NegativeRecall, true);
public static readonly SupportedMetric DrAtK = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtK, true);
public static readonly SupportedMetric DrAtPFpr = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtPFpr, true);
public static readonly SupportedMetric DrAtNumPos = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.DrAtNumPos, true);
public static readonly SupportedMetric NumAnomalies = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.NumAnomalies, true);
public static readonly SupportedMetric ThreshAtK = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtK, false);
public static readonly SupportedMetric ThreshAtP = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtP, false);
public static readonly SupportedMetric ThreshAtNumPos = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.ThreshAtNumPos, false);
public static readonly SupportedMetric Nmi = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Nmi, true);
public static readonly SupportedMetric AvgMinScore = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.AvgMinScore, false);
public static readonly SupportedMetric Dbi = new SupportedMetric(FieldNames.PipelineSweeperSupportedMetrics.Dbi, false);

public string Name { get; }
public bool IsMaximizing { get; }

private SupportedMetric(string name, bool isMaximizing)
{
Name = name;
IsMaximizing = isMaximizing;
}

public static SupportedMetric ByName(string name)
{
var fields =
typeof(SupportedMetric).GetFields(BindingFlags.Static | BindingFlags.Public);

foreach (var field in fields)
{
var metric = (SupportedMetric)field.GetValue(Auc);
if (name.Equals(metric.Name, StringComparison.OrdinalIgnoreCase))
return metric;
}
throw new NotSupportedException($"Metric '{name}' not supported.");
}

public override string ToString() => Name;
}

/// <summary>
/// Class for encapsulating an entrypoint experiment graph
/// and keeping track of the input and output nodes.
Expand Down Expand Up @@ -167,26 +106,6 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok
}
}

/// <summary>
/// Class containing some information about an exectuted pipeline.
/// These are analogous to IRunResult for smart sweepers.
/// </summary>
public sealed class RunSummary
{
public double MetricValue { get; }
public double TrainingMetricValue { get; }
public int NumRowsInTraining { get; }
public long RunTimeMilliseconds { get; }

public RunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue)
{
MetricValue = metricValue;
TrainingMetricValue = trainingMetricValue;
NumRowsInTraining = numRows;
RunTimeMilliseconds = runTimeMilliseconds;
}
}

[TlcModule.ComponentKind("AutoMlStateBase")]
public interface ISupportAutoMlStateFactory : IComponentFactory<IMlState>
{ }
Expand Down Expand Up @@ -218,42 +137,8 @@ public sealed class AutoMlMlState : IMlState
Desc = "State of an AutoML search and search space.")]
public sealed class Arguments : ISupportAutoMlStateFactory
{
// REVIEW: These should be the same as SupportedMetrics above. Not sure how to reference that class,
// without the C# API generator trying to create a version of that class in the API as well.
public enum Metrics
{
Auc,
AccuracyMicro,
AccuracyMacro,
L2,
F1,
AuPrc,
TopKAccuracy,
Rms,
LossFn,
RSquared,
LogLoss,
LogLossReduction,
Ndcg,
Dcg,
PositivePrecision,
PositiveRecall,
NegativePrecision,
NegativeRecall,
DrAtK,
DrAtPFpr,
DrAtNumPos,
NumAnomalies,
ThreshAtK,
ThreshAtP,
ThreshAtNumPos,
Nmi,
AvgMinScore,
Dbi
};

[Argument(ArgumentType.Required, HelpText = "Supported metric for evaluator.", ShortName = "metric")]
public Metrics Metric;
public PipelineSweeperSupportedMetrics.Metrics Metric;

[Argument(ArgumentType.Required, HelpText = "AutoML engine (pipeline optimizer) that generates next candidates.", ShortName = "engine")]
public ISupportIPipelineOptimizerFactory Engine;
Expand All @@ -271,7 +156,9 @@ public enum Metrics
}

public AutoMlMlState(IHostEnvironment env, Arguments args)
: this(env, SupportedMetric.ByName(Enum.GetName(typeof(Arguments.Metrics), args.Metric)), args.Engine.CreateComponent(env),
: this(env,
PipelineSweeperSupportedMetrics.GetSupportedMetric(args.Metric),
args.Engine.CreateComponent(env),
args.TerminatorArgs.CreateComponent(env), args.TrainerKind, requestedLearners: args.RequestedLearners)
{
}
Expand Down Expand Up @@ -355,8 +242,7 @@ private void ProcessPipeline(Sweeper.Algorithms.SweeperProbabilityUtils utils, S
testMetricVal += 1e-10;

// Save performance score
candidate.PerformanceSummary =
new RunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal);
candidate.PerformanceSummary = new PipelineSweeperRunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal);
_sortedSampledElements.Add(candidate.PerformanceSummary.MetricValue, candidate);
_history.Add(candidate);
}
Expand Down Expand Up @@ -524,6 +410,21 @@ public void AddEvaluated(PipelinePattern pipeline)
d += 1e-3;
_sortedSampledElements.Add(d, pipeline);
_history.Add(pipeline);

using (var ch = _host.Start("Suggested Pipeline"))
{
ch.Info($"PipelineSweeper Iteration Number : {_history.Count}");
ch.Info($"PipelineSweeper Pipeline Id : {pipeline.UniqueId}");

foreach (var transform in pipeline.Transforms)
{
ch.Info($"PipelineSweeper Transform : {transform.Transform}");
}

ch.Info($"PipelineSweeper Learner : {pipeline.Learner}");
ch.Info($"PipelineSweeper Train Metric Value : {pipeline.PerformanceSummary.TrainingMetricValue}");
ch.Info($"PipelineSweeper Test Metric Value : {pipeline.PerformanceSummary.MetricValue}");
}
}

public void AddEvaluated(PipelinePattern[] pipelines)
Expand All @@ -541,19 +442,6 @@ public PipelinePattern[] GetNextCandidates(int numberOfCandidates)
currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates);
BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles);

using (var ch = _host.Start("Suggested Pipeline"))
{
foreach (var pipeline in BatchCandidates)
{
ch.Info($"AutoInference Pipeline Id : {pipeline.UniqueId}");
foreach (var transform in pipeline.Transforms)
{
ch.Info($"AutoInference Transform : {transform.Transform}");
}
ch.Info($"AutoInference Learner : {pipeline.Learner}");
}
}

return BatchCandidates;
}

Expand Down
23 changes: 14 additions & 9 deletions src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ public static double ExtractValueFromIdv(IHostEnvironment env, IDataView result,
return outputValue;
}

public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null)
public static PipelineSweeperRunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(result, nameof(result));
env.CheckNonEmpty(metricColumnName, nameof(metricColumnName));

double testingMetricValue = ExtractValueFromIdv(env, result, metricColumnName);
double trainingMetricValue = trainResult != null ? ExtractValueFromIdv(env, trainResult, metricColumnName) : double.MinValue;
return new AutoInference.RunSummary(testingMetricValue, 0, 0, trainingMetricValue);
return new PipelineSweeperRunSummary(testingMetricValue, 0, 0, trainingMetricValue);
}

public static CommonInputs.IEvaluatorInput CloneEvaluatorInstance(CommonInputs.IEvaluatorInput evalInput) =>
Expand Down Expand Up @@ -566,14 +570,15 @@ private static ParameterSet ConvertToParameterSet(TlcModule.SweepableParamAttrib
return learner.PipelineNode.HyperSweeperParamSet;
}

public static IRunResult ConvertToRunResult(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
AutoInference.RunSummary rs, bool isMetricMaximizing) =>
new RunResult(ConvertToParameterSet(learner.PipelineNode.SweepParams, learner), rs.MetricValue, isMetricMaximizing);

public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool isMetricMaximizing) =>
history.Select(h =>
ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray();
public static IRunResult ConvertToRunResult(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelineSweeperRunSummary rs, bool isMetricMaximizing)
{
return new RunResult(ConvertToParameterSet(learner.PipelineNode.SweepParams, learner), rs.MetricValue, isMetricMaximizing);
}

public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool isMetricMaximizing)
{
return history.Select(h => ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray();
}
/// <summary>
/// Method to convert set of sweepable hyperparameters into strings of a format understood
/// by the current smart hyperparameter sweepers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,7 @@ public static CommonOutputs.MacroOutput<Output> PipelineSweep(
if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v) &&
node.Context.TryGetVariable(AutoMlUtils.GenerateOverallTrainingMetricVarName(pipeline.UniqueId), out var v2))
{
pipeline.PerformanceSummary =
AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value);
pipeline.PerformanceSummary = AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value);
autoMlState.AddEvaluated(pipeline);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.PipelineInference/PipelinePattern.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ public PipelineResultRow(string graphJson, double metricValue,
private readonly IHostEnvironment _env;
public readonly TransformInference.SuggestedTransform[] Transforms;
public readonly RecipeInference.SuggestedRecipe.SuggestedLearner Learner;
public AutoInference.RunSummary PerformanceSummary { get; set; }
public PipelineSweeperRunSummary PerformanceSummary { get; set; }
public string LoaderSettings { get; set; }
public Guid UniqueId { get; }

public PipelinePattern(TransformInference.SuggestedTransform[] transforms,
RecipeInference.SuggestedRecipe.SuggestedLearner learner,
string loaderSettings, IHostEnvironment env, AutoInference.RunSummary summary = null)
string loaderSettings, IHostEnvironment env, PipelineSweeperRunSummary summary = null)
{
// Make sure internal pipeline nodes and sweep params are cloned, not shared.
// Cloning the transforms and learner rather than assigning outright
Expand Down Expand Up @@ -205,7 +205,7 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var<IDataView> trainData,
/// Runs a train-test experiment on the current pipeline, through entrypoints.
/// </summary>
public void RunTrainTestExperiment(IDataView trainData, IDataView testData,
AutoInference.SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue,
SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue,
out double trainMetricValue)
{
var experiment = CreateTrainTestExperiment(trainData, testData, trainerKind, true, out var trainTestOutput);
Expand Down
38 changes: 38 additions & 0 deletions src/Microsoft.ML.PipelineInference/PipelineSweeperRunSummary.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.PipelineInference;
using Microsoft.ML.Runtime.EntryPoints.JsonUtils;
using Newtonsoft.Json.Linq;

namespace Microsoft.ML.Runtime.PipelineInference
{
/// <summary>
/// Class containing some information about an exectuted pipeline.
/// These are analogous to IRunResult for smart sweepers.
/// </summary>
public sealed class PipelineSweeperRunSummary
{
public double MetricValue { get; }
public double TrainingMetricValue { get; }
public int NumRowsInTraining { get; }
public long RunTimeMilliseconds { get; }

public PipelineSweeperRunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue)
{
MetricValue = metricValue;
TrainingMetricValue = trainingMetricValue;
NumRowsInTraining = numRows;
RunTimeMilliseconds = runTimeMilliseconds;
}
}
}
Loading