diff --git a/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs b/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs index 867c53053e..64ab03dfab 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs @@ -101,9 +101,7 @@ private void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLear // If first time optimizing hyperparams, create new hyperparameter sweeper. if (!_hyperSweepers.ContainsKey(learner.LearnerName)) { - var paramTups = AutoMlUtils.ConvertToSweepArgumentStrings(learner.PipelineNode.SweepParams); - var sps = paramTups.Select(tup => - new SubComponent(tup.Item1, tup.Item2)).ToArray(); + var sps = AutoMlUtils.ConvertToComponentFactories(learner.PipelineNode.SweepParams); if (sps.Length > 0) { _hyperSweepers[learner.LearnerName] = new KdoSweeper(Env, diff --git a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs index a8459c5b3c..ba4b1e3872 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs @@ -579,58 +579,73 @@ public static IRunResult[] ConvertToRunResults(PipelinePattern[] history, bool i { return history.Select(h => ConvertToRunResult(h.Learner, h.PerformanceSummary, isMetricMaximizing)).ToArray(); } + /// - /// Method to convert set of sweepable hyperparameters into strings of a format understood + /// Method to convert set of sweepable hyperparameters into instances used /// by the current smart hyperparameter sweepers. /// - public static Tuple[] ConvertToSweepArgumentStrings(TlcModule.SweepableParamAttribute[] hps) + public static IComponentFactory[] ConvertToComponentFactories(TlcModule.SweepableParamAttribute[] hps) { - var results = new Tuple[hps.Length]; + var results = new IComponentFactory[hps.Length]; for (int i = 0; i < hps.Length; i++) { - string logSetting; - string numStepsSetting; - string stepSizeSetting; switch (hps[i]) { case TlcModule.SweepableDiscreteParamAttribute dp: - results[i] = new Tuple("dp", - new[] { $"name={dp.Name}", $"{string.Join(" ", dp.Options.Select(o => $"v={o}"))}" }); + results[i] = ComponentFactoryUtils.CreateFromFunction(env => + { + var dpArgs = new DiscreteParamArguments() + { + Name = dp.Name, + Values = dp.Options.Select(o => o.ToString()).ToArray() + }; + return new DiscreteValueGenerator(dpArgs); + }); break; + case TlcModule.SweepableFloatParamAttribute fp: - logSetting = fp.IsLogScale ? "log+" : ""; - numStepsSetting = fp.NumSteps != null ? $"numsteps={fp.NumSteps}" : ""; - stepSizeSetting = fp.StepSize != null ? $"stepsize={fp.StepSize}" : ""; - - results[i] = - new Tuple("fp", - new[] - { - $"name={fp.Name}", - $"min={fp.Min}", - $"max={fp.Max}", - logSetting, - numStepsSetting, - stepSizeSetting - }); + results[i] = ComponentFactoryUtils.CreateFromFunction(env => + { + var fpArgs = new FloatParamArguments() + { + Name = fp.Name, + Min = fp.Min, + Max = fp.Max, + LogBase = fp.IsLogScale, + }; + if (fp.NumSteps.HasValue) + { + fpArgs.NumSteps = fp.NumSteps.Value; + } + if (fp.StepSize.HasValue) + { + fpArgs.StepSize = fp.StepSize.Value; + } + return new FloatValueGenerator(fpArgs); + }); break; + case TlcModule.SweepableLongParamAttribute lp: - logSetting = lp.IsLogScale ? "logbase+" : ""; - numStepsSetting = lp.NumSteps != null ? $"numsteps={lp.NumSteps}" : ""; - stepSizeSetting = lp.StepSize != null ? $"stepsize={lp.StepSize}" : ""; - - results[i] = - new Tuple("lp", - new[] - { - $"name={lp.Name}", - $"min={lp.Min}", - $"max={lp.Max}", - logSetting, - numStepsSetting, - stepSizeSetting - }); + results[i] = ComponentFactoryUtils.CreateFromFunction(env => + { + var lpArgs = new LongParamArguments() + { + Name = lp.Name, + Min = lp.Min, + Max = lp.Max, + LogBase = lp.IsLogScale + }; + if (lp.NumSteps.HasValue) + { + lpArgs.NumSteps = lp.NumSteps.Value; + } + if (lp.StepSize.HasValue) + { + lpArgs.StepSize = lp.StepSize.Value; + } + return new LongValueGenerator(lpArgs); + }); break; } } diff --git a/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs b/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs index 53e7046ebd..cac83a5eed 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs @@ -6,6 +6,7 @@ using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Sweeper; @@ -28,8 +29,8 @@ public abstract class SweeperBase : ISweeper { public class ArgumentsBase { - [Argument(ArgumentType.Multiple, HelpText = "Swept parameters", ShortName = "p")] - public SubComponent[] SweptParameters; + [Argument(ArgumentType.Multiple, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))] + public IComponentFactory[] SweptParameters; [Argument(ArgumentType.LastOccurenceWins, HelpText = "Number of tries to generate distinct parameter sets.", ShortName = "r")] public int Retries = 10; @@ -49,7 +50,7 @@ protected SweeperBase(ArgumentsBase args, IHostEnvironment env, string name) _args = args; - SweepParameters = args.SweptParameters.Select(p => p.CreateInstance(Host)).ToArray(); + SweepParameters = args.SweptParameters.Select(p => p.CreateComponent(Host)).ToArray(); } protected SweeperBase(ArgumentsBase args, IHostEnvironment env, IValueGenerator[] sweepParameters, string name) diff --git a/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs b/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs index 6479c51f7e..1cfedc394a 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs @@ -9,9 +9,10 @@ using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.FastTree.Internal; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Sweeper.Algorithms; -using Microsoft.ML.Runtime.FastTree.Internal; [assembly: LoadableClass(typeof(KdoSweeper), typeof(KdoSweeper.Arguments), typeof(SignatureSweeper), "KDO Sweeper", "KDOSweeper", "KDO")] @@ -39,8 +40,8 @@ public sealed class KdoSweeper : ISweeper { public sealed class Arguments { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")] - public SubComponent[] SweptParameters; + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))] + public IComponentFactory[] SweptParameters; [Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")] public int RandomSeed; @@ -99,7 +100,7 @@ public KdoSweeper(IHostEnvironment env, Arguments args) _args = args; _host.CheckUserArg(Utils.Size(args.SweptParameters) > 0, nameof(args.SweptParameters), "KDO sweeper needs at least one parameter to sweep over"); - _sweepParameters = args.SweptParameters.Select(p => p.CreateInstance(_host)).ToArray(); + _sweepParameters = args.SweptParameters.Select(p => p.CreateComponent(_host)).ToArray(); _randomSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase(), _sweepParameters); _redundantSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase { Retries = 0 }, _sweepParameters); _spu = new SweeperProbabilityUtils(_host); @@ -144,7 +145,7 @@ public ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable previ // I'm not sure if this is too much detail, but it might be. string errorMessage = $"Error: Sweep run results are missing metric values. \n\n" + $"NOTE: Default metric of 'AUC' only viable for binary classification problems. \n" + - $"Please include an evaluator (ev) subcomponent with an appropriate metric specified for your task type.\n\n" + + $"Please include an evaluator (ev) component with an appropriate metric specified for your task type.\n\n" + "Example RSP using alternate metric (i.e., AccuracyMicro):\nrunner=Local{\n\tev=Tlc{m=AccuracyMicro}\n\tpattern={...etc...}\n}"; throw _host.Except(new Exception(errorMessage), errorMessage); } diff --git a/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs b/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs index c982fc46ec..062047ab67 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs @@ -9,6 +9,7 @@ using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Numeric; using Microsoft.ML.Runtime.Sweeper; @@ -21,11 +22,11 @@ public sealed class NelderMeadSweeper : ISweeper { public sealed class Arguments { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")] - public SubComponent[] SweptParameters; + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))] + public IComponentFactory[] SweptParameters; - [Argument(ArgumentType.LastOccurenceWins, HelpText = "The sweeper used to get the initial results.", ShortName = "init")] - public SubComponent FirstBatchSweeper = new SubComponent("ldrandpl"); + [Argument(ArgumentType.LastOccurenceWins, HelpText = "The sweeper used to get the initial results.", ShortName = "init", SignatureType = typeof(SignatureSweeperFromParameterList))] + public IComponentFactory FirstBatchSweeper; [Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")] public int RandomSeed; @@ -100,7 +101,7 @@ public NelderMeadSweeper(IHostEnvironment env, Arguments args) _sweepParameters = new List(); foreach (var sweptParameter in args.SweptParameters) { - var parameter = ComponentCatalog.CreateInstance(env, sweptParameter); + var parameter = sweptParameter.CreateComponent(env); // REVIEW: ideas about how to support discrete values: // 1. assign each discrete value a random number (1-n) to make mirroring possible // 2. each time we need to mirror a discrete value, sample from the remaining value @@ -112,7 +113,7 @@ public NelderMeadSweeper(IHostEnvironment env, Arguments args) _sweepParameters.Add(parameterNumeric); } - _initSweeper = args.FirstBatchSweeper.CreateInstance(env, new object[] { _sweepParameters.ToArray() }); + _initSweeper = args.FirstBatchSweeper.CreateComponent(env, _sweepParameters.ToArray()); _dim = _sweepParameters.Count; env.CheckUserArg(_dim > 1, nameof(args.SweptParameters), "Nelder-Mead sweeper needs at least two parameters to sweep over."); diff --git a/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs b/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs index cf08cc99c3..b5614e3cbf 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs @@ -12,6 +12,7 @@ using Microsoft.ML.Runtime.Sweeper; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.FastTree; using Microsoft.ML.Runtime.FastTree.Internal; using Microsoft.ML.Runtime.Internal.Utilities; @@ -28,8 +29,8 @@ public sealed class SmacSweeper : ISweeper { public sealed class Arguments { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p")] - public SubComponent[] SweptParameters; + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Swept parameters", ShortName = "p", SignatureType = typeof(SignatureSweeperParameter))] + public IComponentFactory[] SweptParameters; [Argument(ArgumentType.AtMostOnce, HelpText = "Seed for the random number generator for the first batch sweeper", ShortName = "seed")] public int RandomSeed; @@ -83,7 +84,7 @@ public SmacSweeper(IHostEnvironment env, Arguments args) _args = args; _host.CheckUserArg(Utils.Size(args.SweptParameters) > 0, nameof(args.SweptParameters), "SMAC sweeper needs at least one parameter to sweep over"); - _sweepParameters = args.SweptParameters.Select(p => p.CreateInstance(_host)).ToArray(); + _sweepParameters = args.SweptParameters.Select(p => p.CreateComponent(_host)).ToArray(); _randomSweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase(), _sweepParameters); } diff --git a/src/Microsoft.ML.Sweeper/AsyncSweeper.cs b/src/Microsoft.ML.Sweeper/AsyncSweeper.cs index a86a4755d8..fa537e793a 100644 --- a/src/Microsoft.ML.Sweeper/AsyncSweeper.cs +++ b/src/Microsoft.ML.Sweeper/AsyncSweeper.cs @@ -10,6 +10,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Sweeper; @@ -152,8 +153,8 @@ public sealed class DeterministicSweeperAsync : IAsyncSweeper, IDisposable { public sealed class Arguments { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Base sweeper", ShortName = "sweeper")] - public SubComponent Sweeper; + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Base sweeper", ShortName = "sweeper", SignatureType = typeof(SignatureSweeper))] + public IComponentFactory Sweeper; [Argument(ArgumentType.AtMostOnce, HelpText = "Sweep batch size", ShortName = "batchsize")] public int BatchSize = 5; @@ -193,13 +194,13 @@ public sealed class Arguments public DeterministicSweeperAsync(IHostEnvironment env, Arguments args) { _host = env.Register("DeterministicSweeperAsync", args.RandomSeed); - _host.CheckUserArg(args.Sweeper.IsGood(), nameof(args.Sweeper), "Please specify a sweeper"); + _host.CheckValue(args.Sweeper, nameof(args.Sweeper), "Please specify a sweeper"); _host.CheckUserArg(args.BatchSize > 0, nameof(args.BatchSize), "Batch size must be positive"); _host.CheckUserArg(args.Relaxation >= 0, nameof(args.Relaxation), "Synchronization relaxation must be non-negative"); _host.CheckUserArg(args.Relaxation <= args.BatchSize, nameof(args.Relaxation), "Synchronization relaxation cannot be larger than batch size"); _batchSize = args.BatchSize; - _baseSweeper = args.Sweeper.CreateInstance(_host); + _baseSweeper = args.Sweeper.CreateComponent(_host); _host.CheckUserArg(!(_baseSweeper is NelderMeadSweeper) || args.Relaxation == 0, nameof(args.Relaxation), "Nelder-Mead requires full synchronization (relaxation = 0)"); diff --git a/src/Microsoft.ML.Sweeper/ConfigRunner.cs b/src/Microsoft.ML.Sweeper/ConfigRunner.cs index 8754a1a59f..1cc52cf4bd 100644 --- a/src/Microsoft.ML.Sweeper/ConfigRunner.cs +++ b/src/Microsoft.ML.Sweeper/ConfigRunner.cs @@ -11,6 +11,7 @@ using Microsoft.ML; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Sweeper; @@ -46,9 +47,9 @@ public abstract class ArgumentsBase [Argument(ArgumentType.AtMostOnce, HelpText = "The executable name, including the path (the default is MAML.exe)")] public string Exe; - [Argument(ArgumentType.Multiple, HelpText = "Specify how to extract the metrics from the result file.", ShortName = "ev")] - public SubComponent, SignatureSweepResultEvaluator> ResultProcessor - = new SubComponent, SignatureSweepResultEvaluator>("Tlc"); + [Argument(ArgumentType.Multiple, HelpText = "Specify how to extract the metrics from the result file.", ShortName = "ev", SignatureType = typeof(SignatureSweepResultEvaluator))] + public IComponentFactory> ResultProcessor = ComponentFactoryUtils.CreateFromFunction( + env => new InternalSweepResultEvaluator(env, new InternalSweepResultEvaluator.Arguments())); [Argument(ArgumentType.AtMostOnce, Hide = true)] public bool CalledFromUnitTestSuite; @@ -74,7 +75,7 @@ protected ExeConfigRunnerBase(ArgumentsBase args, IHostEnvironment env, string r ArgsPattern = args.ArgsPattern; OutputFolder = GetOutputFolderPath(args.OutputFolderName); Prefix = string.IsNullOrEmpty(args.Prefix) ? "" : args.Prefix; - ResultProcessor = args.ResultProcessor.CreateInstance(Host); + ResultProcessor = args.ResultProcessor.CreateComponent(Host); _calledFromUnitTestSuite = args.CalledFromUnitTestSuite; RunNums = new List(); } diff --git a/src/Microsoft.ML.Sweeper/SweepCommand.cs b/src/Microsoft.ML.Sweeper/SweepCommand.cs index 6c0250d666..dfdfe6b2eb 100644 --- a/src/Microsoft.ML.Sweeper/SweepCommand.cs +++ b/src/Microsoft.ML.Sweeper/SweepCommand.cs @@ -8,6 +8,7 @@ using Microsoft.ML; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Sweeper; using Microsoft.ML.Runtime.Data; @@ -22,11 +23,12 @@ public sealed class SweepCommand : ICommand { public sealed class Arguments { - [Argument(ArgumentType.Multiple, HelpText = "Config runner", ShortName = "run,ev,evaluator")] - public SubComponent Runner = new SubComponent("Local"); + [Argument(ArgumentType.Multiple, HelpText = "Config runner", ShortName = "run,ev,evaluator", SignatureType = typeof(SignatureConfigRunner))] + public IComponentFactory Runner = ComponentFactoryUtils.CreateFromFunction( + env => new LocalExeConfigRunner(env, new LocalExeConfigRunner.Arguments())); - [Argument(ArgumentType.Multiple, HelpText = "Sweeper", ShortName = "s")] - public SubComponent Sweeper; + [Argument(ArgumentType.Multiple, HelpText = "Sweeper", ShortName = "s", SignatureType = typeof(SignatureSweeper))] + public IComponentFactory Sweeper; [Argument(ArgumentType.AtMostOnce, HelpText = "Initial Sweep batch size (for instantiating sweep algorithm)", ShortName = "isbs")] public int? InitialSweepBatchSize; @@ -61,8 +63,8 @@ public SweepCommand(IHostEnvironment env, Arguments args) _host = env.Register("SweepCommand", args.RandomSeed); - _host.CheckUserArg(args.Runner.IsGood(), nameof(args.Runner), "Please specify a runner"); - _host.CheckUserArg(args.Sweeper.IsGood(), nameof(args.Sweeper), "Please specify a sweeper"); + _host.CheckValue(args.Runner, nameof(args.Runner), "Please specify a runner"); + _host.CheckValue(args.Sweeper, nameof(args.Sweeper), "Please specify a sweeper"); _host.CheckUserArg(args.SweepNumBatches > 0, nameof(args.SweepNumBatches), "Must be positive"); _host.CheckUserArg(!(args.InitialSweepBatchSize <= 0), nameof(args.InitialSweepBatchSize), "Must be positive if specified"); _host.CheckUserArg(args.SweepBatchSize > 0, nameof(args.SweepBatchSize), "Must be positive"); @@ -70,8 +72,8 @@ public SweepCommand(IHostEnvironment env, Arguments args) _numBatches = args.SweepNumBatches; _initBatchSize = args.InitialSweepBatchSize ?? args.SweepBatchSize; _batchSize = args.SweepBatchSize; - _runner = args.Runner.CreateInstance(_host); - _sweeper = args.Sweeper.CreateInstance(_host); + _runner = args.Runner.CreateComponent(_host); + _sweeper = args.Sweeper.CreateComponent(_host); } public void Run()