Skip to content

Convert ML.Sweeper usages of SubComponent to IComponentFactory. #734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 29, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -101,9 +101,7 @@ private void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLear
// If first time optimizing hyperparams, create new hyperparameter sweeper.
if (!_hyperSweepers.ContainsKey(learner.LearnerName))
{
var paramTups = AutoMlUtils.ConvertToSweepArgumentStrings(learner.PipelineNode.SweepParams);
var sps = paramTups.Select(tup =>
new SubComponent<IValueGenerator, SignatureSweeperParameter>(tup.Item1, tup.Item2)).ToArray();
var sps = AutoMlUtils.ConvertToComponentFactories(learner.PipelineNode.SweepParams);
if (sps.Length > 0)
{
_hyperSweepers[learner.LearnerName] = new KdoSweeper(Env,
91 changes: 53 additions & 38 deletions src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
Original file line number Diff line number Diff line change
@@ -579,58 +579,73 @@ public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool i
{
return history.Select(h => ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray();
}

/// <summary>
/// Method to convert set of sweepable hyperparameters into strings of a format understood
/// Method to convert set of sweepable hyperparameters into <see cref="IComponentFactory"/> instances used
/// by the current smart hyperparameter sweepers.
/// </summary>
public static Tuple<string, string[]>[] ConvertToSweepArgumentStrings(TlcModule.SweepableParamAttribute[] hps)
public static IComponentFactory<IValueGenerator>[] ConvertToComponentFactories(TlcModule.SweepableParamAttribute[] hps)
{
var results = new Tuple<string, string[]>[hps.Length];
var results = new IComponentFactory<IValueGenerator>[hps.Length];

for (int i = 0; i < hps.Length; i++)
{
string logSetting;
string numStepsSetting;
string stepSizeSetting;
switch (hps[i])
{
case TlcModule.SweepableDiscreteParamAttribute dp:
results[i] = new Tuple<string, string[]>("dp",
new[] { $"name={dp.Name}", $"{string.Join(" ", dp.Options.Select(o => $"v={o}"))}" });
results[i] = ComponentFactoryUtils.CreateFromFunction(env =>
{
var dpArgs = new DiscreteParamArguments()
{
Name = dp.Name,
Values = dp.Options.Select(o => o.ToString()).ToArray()
};
return new DiscreteValueGenerator(dpArgs);
});
break;

case TlcModule.SweepableFloatParamAttribute fp:
logSetting = fp.IsLogScale ? "log+" : "";
numStepsSetting = fp.NumSteps != null ? $"numsteps={fp.NumSteps}" : "";
stepSizeSetting = fp.StepSize != null ? $"stepsize={fp.StepSize}" : "";

results[i] =
new Tuple<string, string[]>("fp",
new[]
{
$"name={fp.Name}",
$"min={fp.Min}",
$"max={fp.Max}",
logSetting,
numStepsSetting,
stepSizeSetting
});
results[i] = ComponentFactoryUtils.CreateFromFunction(env =>
{
var fpArgs = new FloatParamArguments()
{
Name = fp.Name,
Min = fp.Min,
Max = fp.Max,
LogBase = fp.IsLogScale,
};
if (fp.NumSteps.HasValue)
{
fpArgs.NumSteps = fp.NumSteps.Value;
}
if (fp.StepSize.HasValue)
{
fpArgs.StepSize = fp.StepSize.Value;
}
return new FloatValueGenerator(fpArgs);
});
break;

case TlcModule.SweepableLongParamAttribute lp:
logSetting = lp.IsLogScale ? "logbase+" : "";
numStepsSetting = lp.NumSteps != null ? $"numsteps={lp.NumSteps}" : "";
stepSizeSetting = lp.StepSize != null ? $"stepsize={lp.StepSize}" : "";

results[i] =
new Tuple<string, string[]>("lp",
new[]
{
$"name={lp.Name}",
$"min={lp.Min}",
$"max={lp.Max}",
logSetting,
numStepsSetting,
stepSizeSetting
});
results[i] = ComponentFactoryUtils.CreateFromFunction(env =>
{
var lpArgs = new LongParamArguments()
{
Name = lp.Name,
Min = lp.Min,
Max = lp.Max,
LogBase = lp.IsLogScale
};
if (lp.NumSteps.HasValue)
{
lpArgs.NumSteps = lp.NumSteps.Value;
}
if (lp.StepSize.HasValue)
{
lpArgs.StepSize = lp.StepSize.Value;
}
return new LongValueGenerator(lpArgs);
});
break;
}
}
7 changes: 4 additions & 3 deletions src/Microsoft.ML.Sweeper/Algorithms/Grid.cs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
using System.Linq;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Sweeper;

@@ -28,8 +29,8 @@ public abstract class SweeperBase : ISweeper
{
public class ArgumentsBase
{
[Argument(ArgumentType.Multiple, HelpText = "Swept parameters", ShortName = "p")]
public SubComponent<IValueGenerator, SignatureSweeperParameter>[] SweptParameters;
[Argument(ArgumentType.Multiple, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))]
public IComponentFactory<IValueGenerator>[] SweptParameters;

[Argument(ArgumentType.LastOccurenceWins, HelpText = "Number of tries to generate distinct parameter sets.", ShortName = "r")]
public int Retries = 10;
@@ -49,7 +50,7 @@ protected SweeperBase(ArgumentsBase args, IHostEnvironment env, string name)

_args = args;

SweepParameters = args.SweptParameters.Select(p => p.CreateInstance(Host)).ToArray();
SweepParameters = args.SweptParameters.Select(p => p.CreateComponent(Host)).ToArray();
}

protected SweeperBase(ArgumentsBase args, IHostEnvironment env, IValueGenerator[] sweepParameters, string name)
11 changes: 6 additions & 5 deletions src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs
Original file line number Diff line number Diff line change
@@ -9,9 +9,10 @@
using System.Linq;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.FastTree.Internal;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Sweeper.Algorithms;
using Microsoft.ML.Runtime.FastTree.Internal;

[assembly: LoadableClass(typeof(KdoSweeper), typeof(KdoSweeper.Arguments), typeof(SignatureSweeper),
"KDO Sweeper", "KDOSweeper", "KDO")]
@@ -39,8 +40,8 @@ public sealed class KdoSweeper : ISweeper
{
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")]
public SubComponent<IValueGenerator, SignatureSweeperParameter>[] SweptParameters;
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))]
public IComponentFactory<IValueGenerator>[] SweptParameters;

[Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")]
public int RandomSeed;
@@ -99,7 +100,7 @@ public KdoSweeper(IHostEnvironment env, Arguments args)

_args = args;
_host.CheckUserArg(Utils.Size(args.SweptParameters) > 0, nameof(args.SweptParameters), "KDO sweeper needs at least one parameter to sweep over");
_sweepParameters = args.SweptParameters.Select(p => p.CreateInstance(_host)).ToArray();
_sweepParameters = args.SweptParameters.Select(p => p.CreateComponent(_host)).ToArray();
_randomSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase(), _sweepParameters);
_redundantSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase { Retries = 0 }, _sweepParameters);
_spu = new SweeperProbabilityUtils(_host);
@@ -144,7 +145,7 @@ public ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable<IRunResult> previ
// I'm not sure if this is too much detail, but it might be.
string errorMessage = $"Error: Sweep run results are missing metric values. \n\n" +
$"NOTE: Default metric of 'AUC' only viable for binary classification problems. \n" +
$"Please include an evaluator (ev) subcomponent with an appropriate metric specified for your task type.\n\n" +
$"Please include an evaluator (ev) component with an appropriate metric specified for your task type.\n\n" +
"Example RSP using alternate metric (i.e., AccuracyMicro):\nrunner=Local{\n\tev=Tlc{m=AccuracyMicro}\n\tpattern={...etc...}\n}";
throw _host.Except(new Exception(errorMessage), errorMessage);
}
13 changes: 7 additions & 6 deletions src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
using System.Linq;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Numeric;
using Microsoft.ML.Runtime.Sweeper;

@@ -21,11 +22,11 @@ public sealed class NelderMeadSweeper : ISweeper
{
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")]
public SubComponent[] SweptParameters;
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))]
public IComponentFactory<IValueGenerator>[] SweptParameters;

[Argument(ArgumentType.LastOccurenceWins, HelpText = "The sweeper used to get the initial results.", ShortName = "init")]
public SubComponent<ISweeper, SignatureSweeperFromParameterList> FirstBatchSweeper = new SubComponent<ISweeper, SignatureSweeperFromParameterList>("ldrandpl");
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The sweeper used to get the initial results.", ShortName = "init", SignatureType = typeof(SignatureSweeperFromParameterList))]
public IComponentFactory<IValueGenerator[], ISweeper> FirstBatchSweeper;

[Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")]
public int RandomSeed;
@@ -100,7 +101,7 @@ public NelderMeadSweeper(IHostEnvironment env, Arguments args)
_sweepParameters = new List<IValueGenerator>();
foreach (var sweptParameter in args.SweptParameters)
{
var parameter = ComponentCatalog.CreateInstance<IValueGenerator, SignatureSweeperParameter>(env, sweptParameter);
var parameter = sweptParameter.CreateComponent(env);
// REVIEW: ideas about how to support discrete values:
// 1. assign each discrete value a random number (1-n) to make mirroring possible
// 2. each time we need to mirror a discrete value, sample from the remaining value
@@ -112,7 +113,7 @@ public NelderMeadSweeper(IHostEnvironment env, Arguments args)
_sweepParameters.Add(parameterNumeric);
}

_initSweeper = args.FirstBatchSweeper.CreateInstance(env, new object[] { _sweepParameters.ToArray() });
_initSweeper = args.FirstBatchSweeper.CreateComponent(env, _sweepParameters.ToArray());
_dim = _sweepParameters.Count;
env.CheckUserArg(_dim > 1, nameof(args.SweptParameters), "Nelder-Mead sweeper needs at least two parameters to sweep over.");

7 changes: 4 additions & 3 deletions src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
using Microsoft.ML.Runtime.Sweeper;

using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.FastTree;
using Microsoft.ML.Runtime.FastTree.Internal;
using Microsoft.ML.Runtime.Internal.Utilities;
@@ -28,8 +29,8 @@ public sealed class SmacSweeper : ISweeper
{
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")]
public SubComponent<IValueGenerator, SignatureSweeperParameter>[] SweptParameters;
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))]
public IComponentFactory<IValueGenerator>[] SweptParameters;

[Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")]
public int RandomSeed;
@@ -83,7 +84,7 @@ public SmacSweeper(IHostEnvironment env, Arguments args)

_args = args;
_host.CheckUserArg(Utils.Size(args.SweptParameters) > 0, nameof(args.SweptParameters), "SMAC sweeper needs at least one parameter to sweep over");
_sweepParameters = args.SweptParameters.Select(p => p.CreateInstance(_host)).ToArray();
_sweepParameters = args.SweptParameters.Select(p => p.CreateComponent(_host)).ToArray();
_randomSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase(), _sweepParameters);
}

9 changes: 5 additions & 4 deletions src/Microsoft.ML.Sweeper/AsyncSweeper.cs
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Sweeper;

@@ -152,8 +153,8 @@ public sealed class DeterministicSweeperAsync : IAsyncSweeper, IDisposable
{
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Base sweeper", ShortName = "sweeper")]
public SubComponent<ISweeper, SignatureSweeper> Sweeper;
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Base sweeper", ShortName = "sweeper", SignatureType = typeof(SignatureSweeper))]
public IComponentFactory<ISweeper> Sweeper;

[Argument(ArgumentType.AtMostOnce, HelpText = "Sweep batch size", ShortName = "batchsize")]
public int BatchSize = 5;
@@ -193,13 +194,13 @@ public sealed class Arguments
public DeterministicSweeperAsync(IHostEnvironment env, Arguments args)
{
_host = env.Register("DeterministicSweeperAsync", args.RandomSeed);
_host.CheckUserArg(args.Sweeper.IsGood(), nameof(args.Sweeper), "Please specify a sweeper");
_host.CheckValue(args.Sweeper, nameof(args.Sweeper), "Please specify a sweeper");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CheckValue [](start = 18, length = 10)

why is this changed to CheckValue from CheckUserArgs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SubComponents have an IsGood() extension method that check both for null and for a non-empty "Kind" property. CheckUserArg only has an overload for bool, which IsGood() returns.

Now that this is an IComponentFactory, the only thing we can check for is null - which is what CheckValue does.

_host.CheckUserArg(args.BatchSize > 0, nameof(args.BatchSize), "Batch size must be positive");
_host.CheckUserArg(args.Relaxation >= 0, nameof(args.Relaxation), "Synchronization relaxation must be non-negative");
_host.CheckUserArg(args.Relaxation <= args.BatchSize, nameof(args.Relaxation),
"Synchronization relaxation cannot be larger than batch size");
_batchSize = args.BatchSize;
_baseSweeper = args.Sweeper.CreateInstance(_host);
_baseSweeper = args.Sweeper.CreateComponent(_host);
_host.CheckUserArg(!(_baseSweeper is NelderMeadSweeper) || args.Relaxation == 0, nameof(args.Relaxation),
"Nelder-Mead requires full synchronization (relaxation = 0)");

9 changes: 5 additions & 4 deletions src/Microsoft.ML.Sweeper/ConfigRunner.cs
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
using Microsoft.ML;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Sweeper;

@@ -46,9 +47,9 @@ public abstract class ArgumentsBase
[Argument(ArgumentType.AtMostOnce, HelpText = "The executable name, including the path (the default is MAML.exe)")]
public string Exe;

[Argument(ArgumentType.Multiple, HelpText = "Specify how to extract the metrics from the result file.", ShortName = "ev")]
public SubComponent<ISweepResultEvaluator<string>, SignatureSweepResultEvaluator> ResultProcessor
= new SubComponent<ISweepResultEvaluator<string>, SignatureSweepResultEvaluator>("Tlc");
[Argument(ArgumentType.Multiple, HelpText = "Specify how to extract the metrics from the result file.", ShortName = "ev", SignatureType = typeof(SignatureSweepResultEvaluator))]
public IComponentFactory<ISweepResultEvaluator<string>> ResultProcessor = ComponentFactoryUtils.CreateFromFunction(
env => new InternalSweepResultEvaluator(env, new InternalSweepResultEvaluator.Arguments()));

[Argument(ArgumentType.AtMostOnce, Hide = true)]
public bool CalledFromUnitTestSuite;
@@ -74,7 +75,7 @@ protected ExeConfigRunnerBase(ArgumentsBase args, IHostEnvironment env, string r
ArgsPattern = args.ArgsPattern;
OutputFolder = GetOutputFolderPath(args.OutputFolderName);
Prefix = string.IsNullOrEmpty(args.Prefix) ? "" : args.Prefix;
ResultProcessor = args.ResultProcessor.CreateInstance(Host);
ResultProcessor = args.ResultProcessor.CreateComponent(Host);
_calledFromUnitTestSuite = args.CalledFromUnitTestSuite;
RunNums = new List<int>();
}
18 changes: 10 additions & 8 deletions src/Microsoft.ML.Sweeper/SweepCommand.cs
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
using Microsoft.ML;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Sweeper;
using Microsoft.ML.Runtime.Data;
@@ -22,11 +23,12 @@ public sealed class SweepCommand : ICommand
{
public sealed class Arguments
{
[Argument(ArgumentType.Multiple, HelpText = "Config runner", ShortName = "run,ev,evaluator")]
public SubComponent<IConfigRunner, SignatureConfigRunner> Runner = new SubComponent<IConfigRunner, SignatureConfigRunner>("Local");
[Argument(ArgumentType.Multiple, HelpText = "Config runner", ShortName = "run,ev,evaluator", SignatureType = typeof(SignatureConfigRunner))]
public IComponentFactory<IConfigRunner> Runner = ComponentFactoryUtils.CreateFromFunction(
env => new LocalExeConfigRunner(env, new LocalExeConfigRunner.Arguments()));

[Argument(ArgumentType.Multiple, HelpText = "Sweeper", ShortName = "s")]
public SubComponent<ISweeper, SignatureSweeper> Sweeper;
[Argument(ArgumentType.Multiple, HelpText = "Sweeper", ShortName = "s", SignatureType = typeof(SignatureSweeper))]
public IComponentFactory<ISweeper> Sweeper;

[Argument(ArgumentType.AtMostOnce, HelpText = "Initial Sweep batch size (for instantiating sweep algorithm)", ShortName = "isbs")]
public int? InitialSweepBatchSize;
@@ -61,17 +63,17 @@ public SweepCommand(IHostEnvironment env, Arguments args)

_host = env.Register("SweepCommand", args.RandomSeed);

_host.CheckUserArg(args.Runner.IsGood(), nameof(args.Runner), "Please specify a runner");
_host.CheckUserArg(args.Sweeper.IsGood(), nameof(args.Sweeper), "Please specify a sweeper");
_host.CheckValue(args.Runner, nameof(args.Runner), "Please specify a runner");
_host.CheckValue(args.Sweeper, nameof(args.Sweeper), "Please specify a sweeper");
_host.CheckUserArg(args.SweepNumBatches > 0, nameof(args.SweepNumBatches), "Must be positive");
_host.CheckUserArg(!(args.InitialSweepBatchSize <= 0), nameof(args.InitialSweepBatchSize), "Must be positive if specified");
_host.CheckUserArg(args.SweepBatchSize > 0, nameof(args.SweepBatchSize), "Must be positive");

_numBatches = args.SweepNumBatches;
_initBatchSize = args.InitialSweepBatchSize ?? args.SweepBatchSize;
_batchSize = args.SweepBatchSize;
_runner = args.Runner.CreateInstance(_host);
_sweeper = args.Sweeper.CreateInstance(_host);
_runner = args.Runner.CreateComponent(_host);
_sweeper = args.Sweeper.CreateComponent(_host);
}

public void Run()