From 3d2166a83ad2d9e616f1fc2a04580c6299d3fd1d Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Wed, 29 May 2019 17:20:23 -0700 Subject: [PATCH 01/10] v0 changes --- .../ColumnInference/ColumnInformationUtil.cs | 18 ++ .../TrainerExtensions/SweepableParams.cs | 29 +++ .../CodeGenerator/CodeGenerationHelper.cs | 9 + src/mlnet/Commands/New/NewCommandHandler.cs | 7 +- src/mlnet/Program.cs | 30 ++- .../FirstTimeUseNoticeSentinel.cs | 2 +- .../TelemetryCommonProperties.cs | 2 +- .../DotNetAppInsights/UserLevelCacheWriter.cs | 2 +- .../Events/ExperimentCompletedEvent.cs | 34 +++ .../ExperimentIterationCompletedEvent.cs | 46 ++++ .../Telemetry/Events/InferColumnsEvent.cs | 41 ++++ .../Telemetry/Events/MLNetCommandEndEvent.cs | 28 +++ .../Telemetry/Events/MLNetCommandEvent.cs | 59 +++++ src/mlnet/Telemetry/Events/SystemInfoEvent.cs | 24 ++ src/mlnet/Telemetry/MlTelemetry.cs | 97 -------- src/mlnet/Telemetry/ProductVersion.cs | 7 +- src/mlnet/Telemetry/Telemetry.cs | 223 +++++++++++++----- src/mlnet/Utilities/ProgressHandlers.cs | 4 + 18 files changed, 483 insertions(+), 179 deletions(-) create mode 100644 src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs create mode 100644 src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs create mode 100644 src/mlnet/Telemetry/Events/InferColumnsEvent.cs create mode 100644 src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs create mode 100644 src/mlnet/Telemetry/Events/MLNetCommandEvent.cs create mode 100644 src/mlnet/Telemetry/Events/SystemInfoEvent.cs delete mode 100644 src/mlnet/Telemetry/MlTelemetry.cs diff --git a/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs b/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs index def93cf202..be02d510be 100644 --- a/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs +++ b/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs @@ -106,6 +106,24 @@ public static IEnumerable GetColumnNames(ColumnInformation columnInforma AddStringsToListIfNotNull(columnNames, columnInformation.TextColumnNames); return columnNames; } + + public static IDictionary CountColumnsByPurpose(ColumnInformation columnInformation) + { + var result = new Dictionary(); + var columnNames = GetColumnNames(columnInformation); + foreach (var columnName in columnNames) + { + var purpose = columnInformation.GetColumnPurpose(columnName); + if (purpose == null) + { + continue; + } + + result.TryGetValue(purpose.Value, out int count); + result[purpose.Value] = ++count; + } + return result; + } private static void AddStringsToListIfNotNull(List list, IEnumerable strings) { diff --git a/src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs b/src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs index c689222c30..5f23729a77 100644 --- a/src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs +++ b/src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs @@ -61,6 +61,11 @@ private static IEnumerable BuildLbfgsArgsParams() }; } + /// + /// The names of every hyperparameter swept across all trainers. + /// + public static ISet AllHyperparameterNames = GetAllSweepableParameterNames(); + public static IEnumerable BuildAveragePerceptronParams() { return BuildAveragedLinearArgsParams().Concat(BuildOnlineLinearArgsParams()); @@ -172,5 +177,29 @@ public static IEnumerable BuildSymSgdLogisticRegressionParams() new SweepableDiscreteParam("UpdateFrequency", new object[] { "", 5, 20 }) }; } + + /// + /// Gets the name of every hyperparameter swept across all trainers. + /// + public static ISet GetAllSweepableParameterNames() + { + var sweepableParams = new List(); + sweepableParams.AddRange(BuildAveragePerceptronParams()); + sweepableParams.AddRange(BuildAveragePerceptronParams()); + sweepableParams.AddRange(BuildFastForestParams()); + sweepableParams.AddRange(BuildFastTreeParams()); + sweepableParams.AddRange(BuildFastTreeTweedieParams()); + sweepableParams.AddRange(BuildLightGbmParamsMulticlass()); + sweepableParams.AddRange(BuildLightGbmParams()); + sweepableParams.AddRange(BuildLinearSvmParams()); + sweepableParams.AddRange(BuildLbfgsLogisticRegressionParams()); + sweepableParams.AddRange(BuildOnlineGradientDescentParams()); + sweepableParams.AddRange(BuildLbfgsPoissonRegressionParams()); + sweepableParams.AddRange(BuildSdcaParams()); + sweepableParams.AddRange(BuildOlsParams()); + sweepableParams.AddRange(BuildSgdParams()); + sweepableParams.AddRange(BuildSymSgdLogisticRegressionParams()); + return new HashSet(sweepableParams.Select(p => p.Name)); + } } } diff --git a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs index a58088ad78..6f74c405c4 100644 --- a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs +++ b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs @@ -12,6 +12,7 @@ using Microsoft.ML.CLI.CodeGenerator.CSharp; using Microsoft.ML.CLI.Data; using Microsoft.ML.CLI.ShellProgressBar; +using Microsoft.ML.CLI.Telemetry.Events; using Microsoft.ML.CLI.Utilities; using Microsoft.ML.Data; using NLog; @@ -51,7 +52,9 @@ public void GenerateCode() { inputColumnInformation.IgnoredColumnNames.Add(value); } + var inferColumnsStopwatch = Stopwatch.StartNew(); columnInference = automlEngine.InferColumns(context, inputColumnInformation); + InferColumnsEvent.TrackEvent(columnInference.ColumnInformation, inferColumnsStopwatch.Elapsed); } catch (Exception) { @@ -74,6 +77,9 @@ public void GenerateCode() // The reason why we are doing this way of defining 3 different results is because of the AutoML API // i.e there is no common class/interface to handle all three tasks together. + // Start a timer for the experiment + var stopwatch = Stopwatch.StartNew(); + List> completedBinaryRuns = new List>(); List> completedMulticlassRuns = new List>(); List> completedRegressionRuns = new List>(); @@ -236,6 +242,7 @@ public void GenerateCode() { var binaryMetric = new BinaryExperimentSettings().OptimizingMetric; var bestBinaryIteration = BestResultUtil.GetBestRun(completedBinaryRuns, binaryMetric); + ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, stopwatch.Elapsed); bestPipeline = bestBinaryIteration.Pipeline; bestModel = bestBinaryIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedBinaryRuns.Count()); @@ -253,6 +260,7 @@ public void GenerateCode() { var regressionMetric = new RegressionExperimentSettings().OptimizingMetric; var bestRegressionIteration = BestResultUtil.GetBestRun(completedRegressionRuns, regressionMetric); + ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, stopwatch.Elapsed); bestPipeline = bestRegressionIteration.Pipeline; bestModel = bestRegressionIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedRegressionRuns.Count()); @@ -270,6 +278,7 @@ public void GenerateCode() { var muliclassMetric = new MulticlassExperimentSettings().OptimizingMetric; var bestMulticlassIteration = BestResultUtil.GetBestRun(completedMulticlassRuns, muliclassMetric); + ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, stopwatch.Elapsed); bestPipeline = bestMulticlassIteration.Pipeline; bestModel = bestMulticlassIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedMulticlassRuns.Count()); diff --git a/src/mlnet/Commands/New/NewCommandHandler.cs b/src/mlnet/Commands/New/NewCommandHandler.cs index 2b6091849b..2f615150d6 100644 --- a/src/mlnet/Commands/New/NewCommandHandler.cs +++ b/src/mlnet/Commands/New/NewCommandHandler.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.DotNet.Cli.Telemetry; using Microsoft.ML.CLI.CodeGenerator; using Microsoft.ML.CLI.Data; @@ -11,18 +10,14 @@ namespace Microsoft.ML.CLI.Commands.New internal class NewCommand : ICommand { private readonly NewCommandSettings settings; - private readonly MlTelemetry telemetry; - internal NewCommand(NewCommandSettings settings, MlTelemetry telemetry) + internal NewCommand(NewCommandSettings settings) { this.settings = settings; - this.telemetry = telemetry; } public void Execute() { - telemetry.LogAutoTrainMlCommand(settings.Dataset.Name, settings.MlTask.ToString(), settings.Dataset.Length); - CodeGenerationHelper codeGenerationHelper = new CodeGenerationHelper(new AutoMLEngine(settings), settings); // Needs to be improved. codeGenerationHelper.GenerateCode(); } diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index 568eaa4d4a..6e01406c4e 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -5,12 +5,13 @@ using System; using System.CommandLine.Builder; using System.CommandLine.Invocation; +using System.Diagnostics; using System.IO; using System.Linq; -using Microsoft.DotNet.Cli.Telemetry; using Microsoft.ML.CLI.Commands; using Microsoft.ML.CLI.Commands.New; using Microsoft.ML.CLI.Data; +using Microsoft.ML.CLI.Telemetry.Events; using Microsoft.ML.CLI.Utilities; using NLog; using NLog.Targets; @@ -20,16 +21,26 @@ namespace Microsoft.ML.CLI class Program { private static Logger logger = LogManager.GetCurrentClassLogger(); + public static void Main(string[] args) { - var telemetry = new MlTelemetry(); + Telemetry.Telemetry.Initialize(); int exitCode = 1; + Exception ex = null; + var stopwatch = Stopwatch.StartNew(); + + var mlNetCommandEvent = new MLNetCommandEvent(); + // Create handler outside so that commandline and the handler is decoupled and testable. var handler = CommandHandler.Create( (options) => { try { + // Send telemetry event for command issued + mlNetCommandEvent.AutoTrainCommandSettings = options; + mlNetCommandEvent.TrackEvent(); + // Map the verbosity to internal levels var verbosity = Utils.GetVerbosity(options.Verbosity); @@ -37,7 +48,6 @@ public static void Main(string[] args) string outputBaseDir = string.Empty; if (options.Name == null) { - options.Name = "Sample" + Utils.GetTaskKind(options.MlTask).ToString(); outputBaseDir = Path.Combine(options.OutputPath.FullName, options.Name); } @@ -50,7 +60,7 @@ public static void Main(string[] args) options.OutputPath = new DirectoryInfo(outputBaseDir); // Instantiate the command - var command = new NewCommand(options, telemetry); + var command = new NewCommand(options); // Override the Logger Configuration var logconsole = LogManager.Configuration.FindTargetByName("logconsole"); @@ -67,6 +77,7 @@ public static void Main(string[] args) } catch (Exception e) { + ex = e; logger.Log(LogLevel.Error, e.Message); logger.Log(LogLevel.Debug, e.ToString()); logger.Log(LogLevel.Info, Strings.LookIntoLogFile); @@ -95,13 +106,20 @@ public static void Main(string[] args) var explicitlySpecifiedOptions = options.Where(opt => !opt.IsImplicit).Select(opt => opt.Name); - telemetry.SetCommandAndParameters(command.Name, explicitlySpecifiedOptions); + mlNetCommandEvent.CommandLineParametersUsed = explicitlySpecifiedOptions; } } } + // Send system info telemetry + SystemInfoEvent.TrackEvent(); + parser.InvokeAsync(parseResult).Wait(); + // Send exit telemetry + MLNetCommandEndEvent.TrackEvent(exitCode, !parseResult.Errors.Any(), stopwatch.Elapsed, ex); + // Flush pending telemetry logs + Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(5)); Environment.Exit(exitCode); } } -} +} \ No newline at end of file diff --git a/src/mlnet/Telemetry/DotNetAppInsights/FirstTimeUseNoticeSentinel.cs b/src/mlnet/Telemetry/DotNetAppInsights/FirstTimeUseNoticeSentinel.cs index 0ea9dfd9dd..7deeb71460 100644 --- a/src/mlnet/Telemetry/DotNetAppInsights/FirstTimeUseNoticeSentinel.cs +++ b/src/mlnet/Telemetry/DotNetAppInsights/FirstTimeUseNoticeSentinel.cs @@ -2,8 +2,8 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System.IO; -using Microsoft.DotNet.AutoML; using Microsoft.Extensions.EnvironmentAbstractions; +using Microsoft.ML.CLI.Telemetry; namespace Microsoft.DotNet.Configurer { diff --git a/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs b/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs index 44fcf64ec9..9446379406 100644 --- a/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs +++ b/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs @@ -3,9 +3,9 @@ using System; using System.Collections.Generic; -using Microsoft.DotNet.AutoML; using System.IO; using Microsoft.DotNet.Configurer; +using Microsoft.ML.CLI.Telemetry; using RuntimeEnvironment = Microsoft.DotNet.PlatformAbstractions.RuntimeEnvironment; using RuntimeInformation = System.Runtime.InteropServices.RuntimeInformation; diff --git a/src/mlnet/Telemetry/DotNetAppInsights/UserLevelCacheWriter.cs b/src/mlnet/Telemetry/DotNetAppInsights/UserLevelCacheWriter.cs index b674859078..1e339fa52b 100644 --- a/src/mlnet/Telemetry/DotNetAppInsights/UserLevelCacheWriter.cs +++ b/src/mlnet/Telemetry/DotNetAppInsights/UserLevelCacheWriter.cs @@ -3,8 +3,8 @@ using System; using System.IO; -using Microsoft.DotNet.AutoML; using Microsoft.Extensions.EnvironmentAbstractions; +using Microsoft.ML.CLI.Telemetry; namespace Microsoft.DotNet.Configurer { diff --git a/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs new file mode 100644 index 0000000000..6321578eb9 --- /dev/null +++ b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using Microsoft.ML.AutoML; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// Telemetry event for AutoML experiment completion. + /// + internal static class ExperimentCompletedEvent + { + public static void TrackEvent(RunDetail bestRun, + List> allRuns, + TimeSpan duration) + { + Telemetry.TrackEvent("experiment-completed", + new Dictionary() + { + { "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() }, + { "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) }, + { "BestTrainer", bestRun.TrainerName }, + { "NumIterations", allRuns.Count().ToString() }, + { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, + }, + duration); + } + } +} diff --git a/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs b/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs new file mode 100644 index 0000000000..679fe4a200 --- /dev/null +++ b/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.ML.AutoML; +using Newtonsoft.Json; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// Telemetry event for completion of experiment iteration. + /// + internal static class ExperimentIterationCompletedEvent + { + public static void TrackEvent(int iterationNum, + RunDetail runDetail, + double score) + { + Telemetry.TrackEvent("experiment-iteration-completed", + new Dictionary() + { + { "IterationNum", iterationNum.ToString() }, + { "Metrics", GetMetricsStr(runDetail.ValidationMetrics) }, + { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, + { "Pipeline", Telemetry.GetSanitizedPipelineStr(runDetail.Pipeline) }, + { "PipelineInferenceTimeInSeconds", runDetail.PipelineInferenceTimeInSeconds.ToString() }, + { "Score", score.ToString() }, + { "TrainerName", runDetail.TrainerName }, + }, + TimeSpan.FromSeconds(runDetail.RuntimeInSeconds), + runDetail.Exception); + } + + private static string GetMetricsStr(TMetrics metrics) + { + if (metrics == null) + { + return null; + } + return JsonConvert.SerializeObject(metrics); + } + } +} diff --git a/src/mlnet/Telemetry/Events/InferColumnsEvent.cs b/src/mlnet/Telemetry/Events/InferColumnsEvent.cs new file mode 100644 index 0000000000..6b6b8f2523 --- /dev/null +++ b/src/mlnet/Telemetry/Events/InferColumnsEvent.cs @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.ML.AutoML; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// Telemetry event for AutoML column inferencing. + /// + internal static class InferColumnsEvent + { + public static void TrackEvent(ColumnInformation inferredColumns, + TimeSpan duration) + { + var properties = new Dictionary(); + + // Include count of each column type present as a property + var columnsByPurpose = ColumnInformationUtil.CountColumnsByPurpose(inferredColumns); + var totalColumnCount = 0; + foreach (var kvp in columnsByPurpose) + { + totalColumnCount += kvp.Value; + if (kvp.Key == ColumnPurpose.Label) + { + continue; + } + properties[kvp.Key + "ColumnCount"] = kvp.Value.ToString(); + } + + properties["ColumnCount"] = totalColumnCount.ToString(); + properties["PeakMemory"] = Process.GetCurrentProcess().PeakWorkingSet64.ToString(); + + Telemetry.TrackEvent("infer-columns", properties, duration); + } + } +} diff --git a/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs b/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs new file mode 100644 index 0000000000..f6f263ef68 --- /dev/null +++ b/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// Telemetry event for end of the ML.NET command issued. + /// + internal class MLNetCommandEndEvent + { + public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex) + { + Telemetry.TrackEvent("mlnet-command-end", + new Dictionary + { + { "CommandParseSucceeded", commandParseSucceeded.ToString() }, + { "ExitCode", exitCode.ToString() }, + { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, + }, + duration, ex); + } + } +} diff --git a/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs b/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs new file mode 100644 index 0000000000..4969d8d0f4 --- /dev/null +++ b/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.DotNet.Cli.Telemetry; +using Microsoft.ML.CLI.Data; +using Microsoft.ML.CLI.Utilities; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + internal class MLNetCommandEvent + { + public NewCommandSettings AutoTrainCommandSettings { get; set; } + public IEnumerable CommandLineParametersUsed { get; set; } + + public void TrackEvent() + { + Telemetry.TrackEvent("mlnet-command", + new Dictionary + { + { "Cache", Utils.GetCacheSettings(AutoTrainCommandSettings.Cache).ToString() }, + { "Command", "auto-train" }, + { "CommandLineParametersUsed", string.Join(",", CommandLineParametersUsed) }, + { "FilenameHash", HashFilename(AutoTrainCommandSettings.Dataset.Name) }, + { "FileSizeBucket", GetFileSizeBucketStr(AutoTrainCommandSettings.Dataset) }, + { "HasHeader", AutoTrainCommandSettings.HasHeader.ToString() }, + { "IgnoredColumnsCount", AutoTrainCommandSettings.IgnoreColumns.Count.ToString() }, + { "LearningTaskType", AutoTrainCommandSettings.MlTask }, + { "MaxExplorationTime", AutoTrainCommandSettings.MaxExplorationTime.ToString() }, + { "ValidFilenameHash", HashFilename(AutoTrainCommandSettings.ValidationDataset?.Name) }, + { "ValidFileSizeBucket", GetFileSizeBucketStr(AutoTrainCommandSettings.ValidationDataset) }, + { "TestFilenameHash", HashFilename(AutoTrainCommandSettings.TestDataset?.Name) }, + { "TestFileSizeBucket", GetFileSizeBucketStr(AutoTrainCommandSettings.TestDataset) }, + }); + } + + private static string HashFilename(string filename) + { + return string.IsNullOrEmpty(filename) ? null : Sha256Hasher.Hash(filename); + } + + private static double CalcFileSizeBucket(FileInfo fileInfo) + { + return Math.Pow(2, Math.Ceiling(Math.Log(fileInfo.Length, 2))); + } + + private static string GetFileSizeBucketStr(FileInfo fileInfo) + { + if (fileInfo == null || !fileInfo.Exists) + { + return null; + } + return CalcFileSizeBucket(fileInfo).ToString(); + } + } +} diff --git a/src/mlnet/Telemetry/Events/SystemInfoEvent.cs b/src/mlnet/Telemetry/Events/SystemInfoEvent.cs new file mode 100644 index 0000000000..c79d4ddc1a --- /dev/null +++ b/src/mlnet/Telemetry/Events/SystemInfoEvent.cs @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// System info telemetry event. + /// + internal class SystemInfoEvent + { + public static void TrackEvent() + { + Telemetry.TrackEvent("system-info", + new Dictionary + { + { "LogicalCores", Environment.ProcessorCount.ToString() }, + }); + } + } +} diff --git a/src/mlnet/Telemetry/MlTelemetry.cs b/src/mlnet/Telemetry/MlTelemetry.cs deleted file mode 100644 index 3eeed79df2..0000000000 --- a/src/mlnet/Telemetry/MlTelemetry.cs +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) .NET Foundation and contributors. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -using System.Collections.Generic; -using Microsoft.DotNet.Configurer; - -namespace Microsoft.DotNet.Cli.Telemetry -{ - public class MlTelemetry - { - private bool _firstTimeUse = false; - private bool _enabled = false; - private List _parameters = new List(); - private string _command; - - public void SetCommandAndParameters(string command, IEnumerable parameters) - { - if (parameters != null) - { - _parameters.AddRange(parameters); - } - - _command = command; - } - - public void LogAutoTrainMlCommand(string dataFileName, string task, long dataFileSize) - { - CheckFistTimeUse(); - - if (!_enabled) - { - return; - } - - var telemetry = new Telemetry(); - - var fileSizeBucket = Math.Pow(2, Math.Ceiling(Math.Log(dataFileSize, 2))); - - var fileNameHash = string.IsNullOrEmpty(dataFileName) ? string.Empty : Sha256Hasher.Hash(dataFileName); - - var paramString = string.Join(",", _parameters); - - var propertiesToLog = new Dictionary - { - { "Command", _command }, - { "FileSizeBucket", fileSizeBucket.ToString() }, - { "FileNameHash", fileNameHash }, - { "CommandLineParametersUsed", paramString }, - { "LearningTaskType", task } - }; - - telemetry.TrackEvent("mlnet-command", propertiesToLog, new Dictionary()); - } - - private void CheckFistTimeUse() - { - using (IFirstTimeUseNoticeSentinel firstTimeUseNoticeSentinel = new FirstTimeUseNoticeSentinel()) - { - // if we're in first time use invocation and there are repeat telemetry calls, don't send telemetry - if (_firstTimeUse) - { - return; - } - - _firstTimeUse = !firstTimeUseNoticeSentinel.Exists(); - - if (_firstTimeUse) - { - Console.WriteLine( -@"Welcome to the ML.NET CLI! --------------------------- -Learn more about ML.NET CLI: https://aka.ms/mlnet-cli -Use 'mlnet --help' to see available commands or visit: https://aka.ms/mlnet-cli-docs - -Telemetry ---------- -The ML.NET CLI tool collects usage data in order to help us improve your experience. -The data is anonymous and doesn't include personal information or data from your datasets. -You can opt-out of telemetry by setting the MLDOTNET_CLI_TELEMETRY_OPTOUT environment variable to '1' or 'true' using your favorite shell. - -Read more about ML.NET CLI Tool telemetry: https://aka.ms/mlnet-cli-telemetry -"); - - firstTimeUseNoticeSentinel.CreateIfNotExists(); - - // since the user didn't yet have a chance to read the above message and decide to opt out, - // don't log any telemetry on the first invocation. - - return; - } - - _enabled = true; - } - } - } -} \ No newline at end of file diff --git a/src/mlnet/Telemetry/ProductVersion.cs b/src/mlnet/Telemetry/ProductVersion.cs index 1c3afa4130..1a3b21aa55 100644 --- a/src/mlnet/Telemetry/ProductVersion.cs +++ b/src/mlnet/Telemetry/ProductVersion.cs @@ -1,9 +1,10 @@ -// Copyright (c) .NET Foundation and contributors. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. using System.Reflection; -namespace Microsoft.DotNet.AutoML +namespace Microsoft.ML.CLI.Telemetry { public class Product { diff --git a/src/mlnet/Telemetry/Telemetry.cs b/src/mlnet/Telemetry/Telemetry.cs index 71d5fda541..b8682a37a6 100644 --- a/src/mlnet/Telemetry/Telemetry.cs +++ b/src/mlnet/Telemetry/Telemetry.cs @@ -1,144 +1,239 @@ -// Copyright (c) .NET Foundation and contributors. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading; using System.Threading.Tasks; using Microsoft.ApplicationInsights; +using Microsoft.DotNet.Cli.Telemetry; using Microsoft.DotNet.Cli.Utils; +using Microsoft.DotNet.Configurer; using Microsoft.DotNet.PlatformAbstractions; +using Microsoft.ML.AutoML; -namespace Microsoft.DotNet.Cli.Telemetry +namespace Microsoft.ML.CLI.Telemetry { - public class Telemetry : ITelemetry + /// + /// Houses CLI telemetry collection and utility methods. + /// + internal static class Telemetry { - private TelemetryClient _client = null; - private Dictionary _commonProperties = new Dictionary(); - private Task _trackEventTask = null; + private static TelemetryClient _client; + private static Dictionary _commonProperties; + private static bool _enabled; + private static Task _initializationTask; + private static int _outstandingTelemetryEventCount; private const string InstrumentationKey = "c059917c-818d-489a-bfcb-351eaab73f2a"; private const string MlTelemetryOptout = "MLDOTNET_CLI_TELEMETRY_OPTOUT"; private const string MachineId = "MachineId"; - - public bool Enabled { get; } - public Telemetry() + public static void Initialize() { var optedOut = Env.GetEnvironmentVariableAsBool(MlTelemetryOptout, false); - - Enabled = !optedOut; - - if (!Enabled) + _enabled = !optedOut; + if (!_enabled) { return; } - - //initialize in task to offload to parallel thread - _trackEventTask = Task.Factory.StartNew(() => InitializeTelemetry()); + + // Initialize in task to offload to parallel thread + _initializationTask = Task.Factory.StartNew(() => InitializeTask()); } - public void TrackEvent( - string eventName, + /// + /// Send telemetry event. + /// + public static void TrackEvent( + string eventName, IDictionary properties, - IDictionary measurements) + TimeSpan? duration = null, + Exception ex = null) { - if (!Enabled) + if (!_enabled) { return; } - - //continue task in existing parallel thread - _trackEventTask = _trackEventTask.ContinueWith( - x => TrackEventTask(eventName, properties, measurements) - ); + // Increment count of outstanding telemetry events. + Interlocked.Increment(ref _outstandingTelemetryEventCount); + _initializationTask.ContinueWith(x => TrackEventTask(eventName, properties, duration, ex)); } - public void ThreadBlockingTrackEvent(string eventName, IDictionary properties, IDictionary measurements) + /// + /// Wait for all outstanding telemetry tasks to complete. + /// + public static void Flush(TimeSpan timeout) { - if (!Enabled) + var sw = Stopwatch.StartNew(); + while (sw.Elapsed < timeout) { - return; + if (_outstandingTelemetryEventCount == 0) + { + break; + } + Task.Delay(200).Wait(); } + } - TrackEventTask(eventName, properties, measurements); + /// + /// Get serialized pipeline to log. Be careful to exclude PII. + /// + public static string GetSanitizedPipelineStr(Pipeline pipeline) + { + if (pipeline?.Nodes == null) + { + return null; + } + var transformNodes = pipeline.Nodes.Where(n => n.NodeType == PipelineNodeType.Transform); + var trainerNode = pipeline.Nodes.FirstOrDefault(n => n.NodeType == PipelineNodeType.Trainer); + var sb = new StringBuilder(); + foreach (var transformNode in transformNodes) + { + sb.Append(transformNode.Name); + sb.Append(","); + } + if (trainerNode != null) + { + sb.Append(trainerNode.Name); + sb.Append("{"); + var serializedHyperparams = trainerNode.Properties + .Where(p => SweepableParams.AllHyperparameterNames.Contains(p.Key)) + .Select(p => $"{p.Key}: {p.Value}"); + sb.Append(string.Join(", ", serializedHyperparams)); + sb.Append("}"); + } + return sb.ToString(); } - private void InitializeTelemetry() + private static void InitializeTask() { try { + // Since the user didn't yet have a chance to read the above message and decide to opt out, + // don't log any telemetry on the first invocation. + if (CheckFirstTimeUse()) + { + _enabled = false; + return; + } + _client = new TelemetryClient(); _client.InstrumentationKey = InstrumentationKey; _client.Context.Device.OperatingSystem = RuntimeEnvironment.OperatingSystem; - // we don't want hostname etc to be sent in plain text. - // these need to be set to some non-empty values to override default behavior. + // We don't want hostname etc to be sent in plaintext. + // These need to be set to some non-empty values to override default behavior. _client.Context.Cloud.RoleInstance = "-"; _client.Context.Cloud.RoleName = "-"; _commonProperties = new TelemetryCommonProperties().GetTelemetryCommonProperties(); + // Add a session ID to each log sent during the life of this process + _commonProperties["SessionId"] = Guid.NewGuid().ToString(); } catch (Exception e) { _client = null; - // we dont want to fail the tool if telemetry fails. + // We dont want to fail the tool if telemetry fails. Debug.Fail(e.ToString()); } } - private void TrackEventTask( + private static void TrackEventTask( string eventName, IDictionary properties, - IDictionary measurements) + TimeSpan? duration, + Exception ex) { - if (_client == null) + if (_client != null) { - return; + try + { + var eventProperties = GetEventProperties(properties, duration, ex); + _client.TrackEvent(eventName, eventProperties); + _client.Flush(); + } + catch (Exception e) + { + Debug.Fail(e.ToString()); + } } + Interlocked.Decrement(ref _outstandingTelemetryEventCount); + } - try + private static Dictionary GetEventProperties(IDictionary properties, + TimeSpan? duration, Exception ex) + { + var eventProperties = new Dictionary(_commonProperties); + + if (duration != null) { - var eventProperties = GetEventProperties(properties); - var eventMeasurements = GetEventMeasures(measurements); - - _client.TrackEvent(eventName, eventProperties, eventMeasurements); - _client.Flush(); + eventProperties["Duration"] = duration.Value.TotalMilliseconds.ToString(); } - catch (Exception e) + + if (ex != null) { - Debug.Fail(e.ToString()); + eventProperties["Exception"] = GetSanitizedExceptionStr(ex); } - } - private Dictionary GetEventMeasures(IDictionary measurements) - { - Dictionary eventMeasurements = new Dictionary(); - if (measurements != null) + if (properties != null) { - foreach (KeyValuePair measurement in measurements) + foreach (KeyValuePair property in properties) { - eventMeasurements[measurement.Key] = measurement.Value; + if (property.Value != null) + { + eventProperties[property.Key] = property.Value; + } } } - return eventMeasurements; + + return eventProperties; } - private Dictionary GetEventProperties(IDictionary properties) + private static bool CheckFirstTimeUse() { - if (properties != null) + using (IFirstTimeUseNoticeSentinel firstTimeUseNoticeSentinel = new FirstTimeUseNoticeSentinel()) { - var eventProperties = new Dictionary(_commonProperties); - foreach (KeyValuePair property in properties) + var firstTimeUse = !firstTimeUseNoticeSentinel.Exists(); + + if (firstTimeUse) { - eventProperties[property.Key] = property.Value; + Console.WriteLine( +@"Welcome to the ML.NET CLI! +-------------------------- +Learn more about ML.NET CLI: https://aka.ms/mlnet-cli +Use 'mlnet --help' to see available commands or visit: https://aka.ms/mlnet-cli-docs + +Telemetry +--------- +The ML.NET CLI tool collects usage data in order to help us improve your experience. +The data is anonymous and doesn't include personal information or data from your datasets. +You can opt-out of telemetry by setting the MLDOTNET_CLI_TELEMETRY_OPTOUT environment variable to '1' or 'true' using your favorite shell. + +Read more about ML.NET CLI Tool telemetry: https://aka.ms/mlnet-cli-telemetry +"); + + firstTimeUseNoticeSentinel.CreateIfNotExists(); + + return true; } - return eventProperties; - } - else - { - return _commonProperties; + + return false; } } + + /// + /// Get exception string to log. Exclude the exception message, as it + /// may contain PII. + /// + private static string GetSanitizedExceptionStr(Exception ex) + { + return $@"{ex.GetType()} +{ex.StackTrace}"; + } } } \ No newline at end of file diff --git a/src/mlnet/Utilities/ProgressHandlers.cs b/src/mlnet/Utilities/ProgressHandlers.cs index 1fc5996816..b963f0c078 100644 --- a/src/mlnet/Utilities/ProgressHandlers.cs +++ b/src/mlnet/Utilities/ProgressHandlers.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using Microsoft.ML.AutoML; using Microsoft.ML.CLI.ShellProgressBar; +using Microsoft.ML.CLI.Telemetry.Events; using Microsoft.ML.Data; using NLog; @@ -47,6 +48,7 @@ public void Report(RunDetail iterationResult) return; iterationIndex++; + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) @@ -103,6 +105,7 @@ public void Report(RunDetail iterationResult) if (this.isStopped) return; iterationIndex++; + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) @@ -173,6 +176,7 @@ public void Report(RunDetail iterationResult) } iterationIndex++; + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) From 0814b164760805cf525453d7135b6c4c487aab31 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Fri, 31 May 2019 12:20:04 -0700 Subject: [PATCH 02/10] add application exit event --- src/mlnet/Program.cs | 7 +++-- .../Telemetry/Events/ApplicationExitEvent.cs | 28 +++++++++++++++++++ .../Telemetry/Events/MLNetCommandEndEvent.cs | 4 +-- 3 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 src/mlnet/Telemetry/Events/ApplicationExitEvent.cs diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index 6e01406c4e..197f1c8bbc 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -83,6 +83,8 @@ public static void Main(string[] args) logger.Log(LogLevel.Info, Strings.LookIntoLogFile); logger.Log(LogLevel.Error, Strings.Exiting); } + + MLNetCommandEndEvent.TrackEvent(stopwatch.Elapsed, ex); }); var parser = new CommandLineBuilder() @@ -93,7 +95,8 @@ public static void Main(string[] args) var parseResult = parser.Parse(args); - if (parseResult.Errors.Count == 0) + var commandParseSucceeded = !parseResult.Errors.Any(); + if (commandParseSucceeded) { if (parseResult.RootCommandResult.Children.Count > 0) { @@ -116,7 +119,7 @@ public static void Main(string[] args) parser.InvokeAsync(parseResult).Wait(); // Send exit telemetry - MLNetCommandEndEvent.TrackEvent(exitCode, !parseResult.Errors.Any(), stopwatch.Elapsed, ex); + ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex); // Flush pending telemetry logs Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(5)); Environment.Exit(exitCode); diff --git a/src/mlnet/Telemetry/Events/ApplicationExitEvent.cs b/src/mlnet/Telemetry/Events/ApplicationExitEvent.cs new file mode 100644 index 0000000000..3406272fac --- /dev/null +++ b/src/mlnet/Telemetry/Events/ApplicationExitEvent.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Microsoft.ML.CLI.Telemetry.Events +{ + /// + /// Telemetry event for CLI application exit. + /// + internal class ApplicationExitEvent + { + public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex) + { + Telemetry.TrackEvent("application-exit", + new Dictionary + { + { "CommandParseSucceeded", commandParseSucceeded.ToString() }, + { "ExitCode", exitCode.ToString() }, + { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, + }, + duration, ex); + } + } +} diff --git a/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs b/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs index f6f263ef68..961773606d 100644 --- a/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs +++ b/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs @@ -13,13 +13,11 @@ namespace Microsoft.ML.CLI.Telemetry.Events /// internal class MLNetCommandEndEvent { - public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex) + public static void TrackEvent(TimeSpan duration, Exception ex) { Telemetry.TrackEvent("mlnet-command-end", new Dictionary { - { "CommandParseSucceeded", commandParseSucceeded.ToString() }, - { "ExitCode", exitCode.ToString() }, { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, }, duration, ex); From 6236345bd29c93276031fae223332432229ed2f2 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Fri, 31 May 2019 12:27:00 -0700 Subject: [PATCH 03/10] add machine learning task type to experiment & experiment iteration metrics --- src/mlnet/CodeGenerator/CodeGenerationHelper.cs | 6 +++--- src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs | 3 +++ .../Telemetry/Events/ExperimentIterationCompletedEvent.cs | 4 +++- src/mlnet/Utilities/ProgressHandlers.cs | 6 +++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs index 6f74c405c4..64ecdb49af 100644 --- a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs +++ b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs @@ -242,7 +242,7 @@ public void GenerateCode() { var binaryMetric = new BinaryExperimentSettings().OptimizingMetric; var bestBinaryIteration = BestResultUtil.GetBestRun(completedBinaryRuns, binaryMetric); - ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, stopwatch.Elapsed); + ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, TaskKind.BinaryClassification, stopwatch.Elapsed); bestPipeline = bestBinaryIteration.Pipeline; bestModel = bestBinaryIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedBinaryRuns.Count()); @@ -260,7 +260,7 @@ public void GenerateCode() { var regressionMetric = new RegressionExperimentSettings().OptimizingMetric; var bestRegressionIteration = BestResultUtil.GetBestRun(completedRegressionRuns, regressionMetric); - ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, stopwatch.Elapsed); + ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, TaskKind.Regression, stopwatch.Elapsed); bestPipeline = bestRegressionIteration.Pipeline; bestModel = bestRegressionIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedRegressionRuns.Count()); @@ -278,7 +278,7 @@ public void GenerateCode() { var muliclassMetric = new MulticlassExperimentSettings().OptimizingMetric; var bestMulticlassIteration = BestResultUtil.GetBestRun(completedMulticlassRuns, muliclassMetric); - ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, stopwatch.Elapsed); + ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, TaskKind.MulticlassClassification, stopwatch.Elapsed); bestPipeline = bestMulticlassIteration.Pipeline; bestModel = bestMulticlassIteration.Model; ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedMulticlassRuns.Count()); diff --git a/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs index 6321578eb9..68a7ef4a20 100644 --- a/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs +++ b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs @@ -17,6 +17,7 @@ internal static class ExperimentCompletedEvent { public static void TrackEvent(RunDetail bestRun, List> allRuns, + TaskKind machineLearningTask, TimeSpan duration) { Telemetry.TrackEvent("experiment-completed", @@ -25,6 +26,8 @@ public static void TrackEvent(RunDetail bestRun, { "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() }, { "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) }, { "BestTrainer", bestRun.TrainerName }, + { "BestTrainer", bestRun.TrainerName }, + { "MachineLearningTask", machineLearningTask.ToString() }, { "NumIterations", allRuns.Count().ToString() }, { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, }, diff --git a/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs b/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs index 679fe4a200..02946df974 100644 --- a/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs +++ b/src/mlnet/Telemetry/Events/ExperimentIterationCompletedEvent.cs @@ -17,12 +17,14 @@ internal static class ExperimentIterationCompletedEvent { public static void TrackEvent(int iterationNum, RunDetail runDetail, - double score) + double score, + TaskKind machineLearningTask) { Telemetry.TrackEvent("experiment-iteration-completed", new Dictionary() { { "IterationNum", iterationNum.ToString() }, + { "MachineLearningTask", machineLearningTask.ToString() }, { "Metrics", GetMetricsStr(runDetail.ValidationMetrics) }, { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, { "Pipeline", Telemetry.GetSanitizedPipelineStr(runDetail.Pipeline) }, diff --git a/src/mlnet/Utilities/ProgressHandlers.cs b/src/mlnet/Utilities/ProgressHandlers.cs index b963f0c078..6e119a9c94 100644 --- a/src/mlnet/Utilities/ProgressHandlers.cs +++ b/src/mlnet/Utilities/ProgressHandlers.cs @@ -48,7 +48,7 @@ public void Report(RunDetail iterationResult) return; iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.Regression); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) @@ -105,7 +105,7 @@ public void Report(RunDetail iterationResult) if (this.isStopped) return; iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.BinaryClassification); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) @@ -176,7 +176,7 @@ public void Report(RunDetail iterationResult) } iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult)); + ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.MulticlassClassification); completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (progressBar != null) From b8211f1107773e3ca1dd9ebf487c9a0536d059c4 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Fri, 31 May 2019 12:34:08 -0700 Subject: [PATCH 04/10] remove duplicate property --- src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs index 68a7ef4a20..84b76cd8df 100644 --- a/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs +++ b/src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs @@ -26,7 +26,6 @@ public static void TrackEvent(RunDetail bestRun, { "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() }, { "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) }, { "BestTrainer", bestRun.TrainerName }, - { "BestTrainer", bestRun.TrainerName }, { "MachineLearningTask", machineLearningTask.ToString() }, { "NumIterations", allRuns.Count().ToString() }, { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, From 535e6c26e410a2321bb1c339038c7abbd1306290 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Thu, 6 Jun 2019 17:40:27 -0700 Subject: [PATCH 05/10] merge --- .../ColumnInference/ColumnInformationUtil.cs | 4 +- src/mlnet/Program.cs | 2 +- .../TelemetryCommonProperties.cs | 1 - src/mlnet/Telemetry/MlTelemetry.cs | 102 ------------------ src/mlnet/Telemetry/Telemetry.cs | 6 +- src/mlnet/Utilities/ProgressHandlers.cs | 6 +- 6 files changed, 9 insertions(+), 112 deletions(-) delete mode 100644 src/mlnet/Telemetry/MlTelemetry.cs diff --git a/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs b/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs index be02d510be..1228fc2183 100644 --- a/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs +++ b/src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs @@ -106,7 +106,7 @@ public static IEnumerable GetColumnNames(ColumnInformation columnInforma AddStringsToListIfNotNull(columnNames, columnInformation.TextColumnNames); return columnNames; } - + public static IDictionary CountColumnsByPurpose(ColumnInformation columnInformation) { var result = new Dictionary(); @@ -118,7 +118,7 @@ public static IDictionary CountColumnsByPurpose(ColumnInform { continue; } - + result.TryGetValue(purpose.Value, out int count); result[purpose.Value] = ++count; } diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index 4dc5940104..3c216eec94 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -116,7 +116,7 @@ public static void Main(string[] args) // Send system info telemetry SystemInfoEvent.TrackEvent(); - + parser.InvokeAsync(parseResult).Wait(); // Send exit telemetry ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex); diff --git a/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs b/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs index 99bd362364..7449afd7a7 100644 --- a/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs +++ b/src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; using System.IO; -using Microsoft.DotNet.AutoML; using Microsoft.DotNet.Configurer; using Microsoft.ML.CLI.Telemetry; using RuntimeEnvironment = Microsoft.DotNet.PlatformAbstractions.RuntimeEnvironment; diff --git a/src/mlnet/Telemetry/MlTelemetry.cs b/src/mlnet/Telemetry/MlTelemetry.cs deleted file mode 100644 index a9ae092e04..0000000000 --- a/src/mlnet/Telemetry/MlTelemetry.cs +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (c) .NET Foundation and contributors. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -using System.Collections.Generic; -using Microsoft.DotNet.Configurer; - -namespace Microsoft.DotNet.Cli.Telemetry -{ - public class MlTelemetry - { - private bool _firstTimeUse; - private bool _enabled; - private List _parameters; - private string _command; - - public MlTelemetry() - { - _parameters = new List(); - } - - public void SetCommandAndParameters(string command, IEnumerable parameters) - { - if (parameters != null) - { - _parameters.AddRange(parameters); - } - - _command = command; - } - - public void LogAutoTrainMlCommand(string dataFileName, string task, long dataFileSize) - { - CheckFistTimeUse(); - - if (!_enabled) - { - return; - } - - var telemetry = new Telemetry(); - - var fileSizeBucket = Math.Pow(2, Math.Ceiling(Math.Log(dataFileSize, 2))); - - var fileNameHash = string.IsNullOrEmpty(dataFileName) ? string.Empty : Sha256Hasher.Hash(dataFileName); - - var paramString = string.Join(",", _parameters); - - var propertiesToLog = new Dictionary - { - { "Command", _command }, - { "FileSizeBucket", fileSizeBucket.ToString() }, - { "FileNameHash", fileNameHash }, - { "CommandLineParametersUsed", paramString }, - { "LearningTaskType", task } - }; - - telemetry.TrackEvent("mlnet-command", propertiesToLog, new Dictionary()); - } - - private void CheckFistTimeUse() - { - using (IFirstTimeUseNoticeSentinel firstTimeUseNoticeSentinel = new FirstTimeUseNoticeSentinel()) - { - // if we're in first time use invocation and there are repeat telemetry calls, don't send telemetry - if (_firstTimeUse) - { - return; - } - - _firstTimeUse = !firstTimeUseNoticeSentinel.Exists(); - - if (_firstTimeUse) - { - Console.WriteLine( -@"Welcome to the ML.NET CLI! --------------------------- -Learn more about ML.NET CLI: https://aka.ms/mlnet-cli -Use 'mlnet --help' to see available commands or visit: https://aka.ms/mlnet-cli-docs - -Telemetry ---------- -The ML.NET CLI tool collects usage data in order to help us improve your experience. -The data is anonymous and doesn't include personal information or data from your datasets. -You can opt-out of telemetry by setting the MLDOTNET_CLI_TELEMETRY_OPTOUT environment variable to '1' or 'true' using your favorite shell. - -Read more about ML.NET CLI Tool telemetry: https://aka.ms/mlnet-cli-telemetry -"); - - firstTimeUseNoticeSentinel.CreateIfNotExists(); - - // since the user didn't yet have a chance to read the above message and decide to opt out, - // don't log any telemetry on the first invocation. - - return; - } - - _enabled = true; - } - } - } -} \ No newline at end of file diff --git a/src/mlnet/Telemetry/Telemetry.cs b/src/mlnet/Telemetry/Telemetry.cs index b8682a37a6..a0f8ee258b 100644 --- a/src/mlnet/Telemetry/Telemetry.cs +++ b/src/mlnet/Telemetry/Telemetry.cs @@ -32,7 +32,7 @@ internal static class Telemetry private const string InstrumentationKey = "c059917c-818d-489a-bfcb-351eaab73f2a"; private const string MlTelemetryOptout = "MLDOTNET_CLI_TELEMETRY_OPTOUT"; private const string MachineId = "MachineId"; - + public static void Initialize() { var optedOut = Env.GetEnvironmentVariableAsBool(MlTelemetryOptout, false); @@ -126,9 +126,9 @@ private static void InitializeTask() _client.InstrumentationKey = InstrumentationKey; _client.Context.Device.OperatingSystem = RuntimeEnvironment.OperatingSystem; - // We don't want hostname etc to be sent in plaintext. + // We don't want hostname etc to be sent in plaintext. // These need to be set to some non-empty values to override default behavior. - _client.Context.Cloud.RoleInstance = "-"; + _client.Context.Cloud.RoleInstance = "-"; _client.Context.Cloud.RoleName = "-"; _commonProperties = new TelemetryCommonProperties().GetTelemetryCommonProperties(); diff --git a/src/mlnet/Utilities/ProgressHandlers.cs b/src/mlnet/Utilities/ProgressHandlers.cs index 2820a0c778..8f1e1f8d8d 100644 --- a/src/mlnet/Utilities/ProgressHandlers.cs +++ b/src/mlnet/Utilities/ProgressHandlers.cs @@ -48,7 +48,7 @@ public void Report(RunDetail iterationResult) return; _iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.Regression); + ExperimentIterationCompletedEvent.TrackEvent(_iterationIndex, iterationResult, _getScore(iterationResult), TaskKind.Regression); _completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (_progressBar != null) @@ -105,7 +105,7 @@ public void Report(RunDetail iterationResult) if (_isStopped) return; _iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.BinaryClassification); + ExperimentIterationCompletedEvent.TrackEvent(_iterationIndex, iterationResult, _getScore(iterationResult), TaskKind.BinaryClassification); _completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (_progressBar != null) @@ -176,7 +176,7 @@ public void Report(RunDetail iterationResult) } _iterationIndex++; - ExperimentIterationCompletedEvent.TrackEvent(iterationIndex, iterationResult, GetScore(iterationResult), TaskKind.MulticlassClassification); + ExperimentIterationCompletedEvent.TrackEvent(_iterationIndex, iterationResult, _getScore(iterationResult), TaskKind.MulticlassClassification); _completedIterations.Add(iterationResult); UpdateBestResult(iterationResult); if (_progressBar != null) From c10191d958b968a80b0231e2846d8f24c04e29bf Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Wed, 12 Jun 2019 14:06:49 -0700 Subject: [PATCH 06/10] only flush telemetry once at end of application exit; lower flush timeout to 1 second --- src/mlnet/Program.cs | 2 +- src/mlnet/Telemetry/Telemetry.cs | 40 +++++++++++++------------------- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index 3c216eec94..ff26a0f3cc 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -121,7 +121,7 @@ public static void Main(string[] args) // Send exit telemetry ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex); // Flush pending telemetry logs - Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(5)); + Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(1)); Environment.Exit(exitCode); } } diff --git a/src/mlnet/Telemetry/Telemetry.cs b/src/mlnet/Telemetry/Telemetry.cs index a0f8ee258b..9b0cd62912 100644 --- a/src/mlnet/Telemetry/Telemetry.cs +++ b/src/mlnet/Telemetry/Telemetry.cs @@ -7,7 +7,6 @@ using System.Diagnostics; using System.Linq; using System.Text; -using System.Threading; using System.Threading.Tasks; using Microsoft.ApplicationInsights; using Microsoft.DotNet.Cli.Telemetry; @@ -27,7 +26,6 @@ internal static class Telemetry private static Dictionary _commonProperties; private static bool _enabled; private static Task _initializationTask; - private static int _outstandingTelemetryEventCount; private const string InstrumentationKey = "c059917c-818d-489a-bfcb-351eaab73f2a"; private const string MlTelemetryOptout = "MLDOTNET_CLI_TELEMETRY_OPTOUT"; @@ -59,25 +57,19 @@ public static void TrackEvent( { return; } - // Increment count of outstanding telemetry events. - Interlocked.Increment(ref _outstandingTelemetryEventCount); _initializationTask.ContinueWith(x => TrackEventTask(eventName, properties, duration, ex)); } /// - /// Wait for all outstanding telemetry tasks to complete. + /// Flush outstanding telemetry, and wait for the specified timeout for this to complete. /// public static void Flush(TimeSpan timeout) { - var sw = Stopwatch.StartNew(); - while (sw.Elapsed < timeout) + if (!_enabled || _client == null) { - if (_outstandingTelemetryEventCount == 0) - { - break; - } - Task.Delay(200).Wait(); + return; } + Task.Run(() => _client.Flush()).Wait(timeout); } /// @@ -149,20 +141,20 @@ private static void TrackEventTask( TimeSpan? duration, Exception ex) { - if (_client != null) + if (_client == null) { - try - { - var eventProperties = GetEventProperties(properties, duration, ex); - _client.TrackEvent(eventName, eventProperties); - _client.Flush(); - } - catch (Exception e) - { - Debug.Fail(e.ToString()); - } + return; + } + + try + { + var eventProperties = GetEventProperties(properties, duration, ex); + _client.TrackEvent(eventName, eventProperties); + } + catch (Exception e) + { + Debug.Fail(e.ToString()); } - Interlocked.Decrement(ref _outstandingTelemetryEventCount); } private static Dictionary GetEventProperties(IDictionary properties, From d1873209abcf5cfdf166f36a3c5a74152419e604 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Wed, 12 Jun 2019 15:49:45 -0700 Subject: [PATCH 07/10] add command attribute to every telemetry event --- src/mlnet/Commands/CommandDefinitions.cs | 4 +++- src/mlnet/Telemetry/Telemetry.cs | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mlnet/Commands/CommandDefinitions.cs b/src/mlnet/Commands/CommandDefinitions.cs index 953b10859b..2dc3461328 100644 --- a/src/mlnet/Commands/CommandDefinitions.cs +++ b/src/mlnet/Commands/CommandDefinitions.cs @@ -14,9 +14,11 @@ namespace Microsoft.ML.CLI.Commands { internal static class CommandDefinitions { + public const string AutoTrainCommandName = "auto-train"; + internal static System.CommandLine.Command AutoTrain(ICommandHandler handler) { - var newCommand = new System.CommandLine.Command("auto-train", "Create a new .NET project using ML.NET to train and run a model", handler: handler) + var newCommand = new System.CommandLine.Command(AutoTrainCommandName, "Create a new .NET project using ML.NET to train and run a model", handler: handler) { MlTask(), Dataset(), diff --git a/src/mlnet/Telemetry/Telemetry.cs b/src/mlnet/Telemetry/Telemetry.cs index 9b0cd62912..589f6227fe 100644 --- a/src/mlnet/Telemetry/Telemetry.cs +++ b/src/mlnet/Telemetry/Telemetry.cs @@ -14,6 +14,7 @@ using Microsoft.DotNet.Configurer; using Microsoft.DotNet.PlatformAbstractions; using Microsoft.ML.AutoML; +using Microsoft.ML.CLI.Commands; namespace Microsoft.ML.CLI.Telemetry { @@ -183,6 +184,8 @@ private static Dictionary GetEventProperties(IDictionary Date: Wed, 12 Jun 2019 15:54:47 -0700 Subject: [PATCH 08/10] remove explicit command property add from MLNetCommandEvent, since it's now logged across all events --- src/mlnet/Telemetry/Events/MLNetCommandEvent.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs b/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs index 4969d8d0f4..b1cdf0ef0b 100644 --- a/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs +++ b/src/mlnet/Telemetry/Events/MLNetCommandEvent.cs @@ -22,7 +22,6 @@ public void TrackEvent() new Dictionary { { "Cache", Utils.GetCacheSettings(AutoTrainCommandSettings.Cache).ToString() }, - { "Command", "auto-train" }, { "CommandLineParametersUsed", string.Join(",", CommandLineParametersUsed) }, { "FilenameHash", HashFilename(AutoTrainCommandSettings.Dataset.Name) }, { "FileSizeBucket", GetFileSizeBucketStr(AutoTrainCommandSettings.Dataset) }, From 1b518ff826b75e1d2b2fb3789ee1b3d508ca4881 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Wed, 12 Jun 2019 16:19:12 -0700 Subject: [PATCH 09/10] increase telemetry flush timeout from 1 to 3 seconds --- src/mlnet/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index ff26a0f3cc..838f689d02 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -121,7 +121,7 @@ public static void Main(string[] args) // Send exit telemetry ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex); // Flush pending telemetry logs - Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(1)); + Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(3)); Environment.Exit(exitCode); } } From d4069cc1eb6476a154a1648f727d474562bb7346 Mon Sep 17 00:00:00 2001 From: Daniel Holstein Date: Sun, 16 Jun 2019 23:34:00 -0700 Subject: [PATCH 10/10] delete not useful MLNet command end event --- src/mlnet/Program.cs | 2 -- .../Telemetry/Events/MLNetCommandEndEvent.cs | 26 ------------------- 2 files changed, 28 deletions(-) delete mode 100644 src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs diff --git a/src/mlnet/Program.cs b/src/mlnet/Program.cs index 838f689d02..fed0c026c8 100644 --- a/src/mlnet/Program.cs +++ b/src/mlnet/Program.cs @@ -83,8 +83,6 @@ public static void Main(string[] args) _logger.Log(LogLevel.Info, Strings.LookIntoLogFile); _logger.Log(LogLevel.Error, Strings.Exiting); } - - MLNetCommandEndEvent.TrackEvent(stopwatch.Elapsed, ex); }); var parser = new CommandLineBuilder() diff --git a/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs b/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs deleted file mode 100644 index 961773606d..0000000000 --- a/src/mlnet/Telemetry/Events/MLNetCommandEndEvent.cs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; - -namespace Microsoft.ML.CLI.Telemetry.Events -{ - /// - /// Telemetry event for end of the ML.NET command issued. - /// - internal class MLNetCommandEndEvent - { - public static void TrackEvent(TimeSpan duration, Exception ex) - { - Telemetry.TrackEvent("mlnet-command-end", - new Dictionary - { - { "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() }, - }, - duration, ex); - } - } -}