From c3aebc75022e1b01d93a6328917883359f28551a Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Thu, 21 Feb 2019 15:08:35 -0800 Subject: [PATCH 1/5] Added samples & docs for BinaryClassification.StochasticGradientDescent, plus a bunch of typo fixing. --- .../StochasticGradientDescent.cs | 47 +++++++++++++ .../StochasticGradientDescentWithOptions.cs | 56 +++++++++++++++ .../EntryPoints/InputBase.cs | 2 +- src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 12 ++++ .../Standard/Online/AveragedLinear.cs | 2 +- .../Standard/Online/AveragedPerceptron.cs | 2 +- .../Standard/Online/OnlineLinear.cs | 4 +- .../Standard/SdcaBinary.cs | 53 ++++++++++++++- .../StandardLearnersCatalog.cs | 68 +++++++++++-------- src/Microsoft.ML.StaticPipe/LbfgsStatic.cs | 4 +- src/Microsoft.ML.StaticPipe/SgdStatic.cs | 8 +-- .../TreeTrainersStatic.cs | 4 +- 12 files changed, 219 insertions(+), 43 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs new file mode 100644 index 0000000000..672968decf --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs @@ -0,0 +1,47 @@ +using Microsoft.ML; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class StochasticGradientDescent + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset. + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing. + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Create data training pipeline. + var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent(); + + // Fit this pipeline to the training data. + var model = pipeline.Fit(trainTestData.TrainSet); + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(trainTestData.TestSet); + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.67 + // Negative Precision: 0.90 + // Negative Recall: 0.91 + // Positive Precision: 0.68 + // Positive Recall: 0.65 + // LogLoss: 0.48 + // LogLossReduction: 38.31 + // Entropy: 0.78 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs new file mode 100644 index 0000000000..005b765fbf --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs @@ -0,0 +1,56 @@ +using Microsoft.ML; +using Microsoft.ML.Trainers; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class StochasticGradientDescentWithOptions + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset. + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing. + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Define the trainer options. + var options = new SgdBinaryTrainer.Options() + { + MaxIterations = 30, + ConvergenceTolerance = 5e-5, + PositiveInstanceWeight = 1.2f, + }; + + // Create data training pipeline. + var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent(options); + + // Fit this pipeline to the training data. + var model = pipeline.Fit(trainTestData.TrainSet); + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(trainTestData.TestSet); + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.67 + // Negative Precision: 0.91 + // Negative Recall: 0.89 + // Positive Precision: 0.65 + // Positive Recall: 0.70 + // LogLoss: 0.48 + // LogLossReduction: 37.52 + // Entropy: 0.78 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs index d6c59cd862..cd6faf1547 100644 --- a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs +++ b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs @@ -95,7 +95,7 @@ public abstract class LearnerInputBaseWithLabel : LearnerInputBase public abstract class LearnerInputBaseWithWeight : LearnerInputBaseWithLabel { /// - /// Column to use for example weight. + /// The name of the example weight column. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Column to use for example weight", ShortName = "weight", SortOrder = 4, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] public Optional WeightColumn = Optional.Implicit(DefaultColumnNames.Weight); diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs index 93de658b1e..f6d1609ab0 100644 --- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -23,6 +23,18 @@ public static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); } + /// + /// Pretty-print CalibratedBinaryClassificationMetrics objects. + /// + /// Calibrated binary classification metrics. + public static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics) + { + PrintMetrics(metrics as BinaryClassificationMetrics); + Console.WriteLine($"LogLoss: {metrics.LogLoss:F2}"); + Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction:F2}"); + Console.WriteLine($"Entropy: {metrics.Entropy:F2}"); + } + /// /// Pretty-print RegressionMetrics objects. /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index 688fd872ab..37abdfd88e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -60,7 +60,7 @@ public abstract class AveragedLinearOptions : OnlineLinearOptions public bool DoLazyUpdates = true; /// - /// L2 weight for regularization. + /// The L2 weight for regularization. /// [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)] [TGUI(Label = "L2 Regularization Weight")] diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 5349d24fd1..5d53083992 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -54,7 +54,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer - /// Options for the averaged perceptron trainer. + /// Options for the . /// public sealed class Options : AveragedLinearOptions { diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index 6347de391d..8848281e6f 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -24,7 +24,7 @@ public abstract class OnlineLinearOptions : LearnerInputBaseWithLabel /// /// Number of passes through the training dataset. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter, numIterations", SortOrder = 50)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter,numIterations", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumberOfIterations = OnlineDefault.NumIterations; @@ -43,7 +43,7 @@ public abstract class OnlineLinearOptions : LearnerInputBaseWithLabel /// This property is only used if the provided value is positive and is not specified. /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts,initWtsDiameter", SortOrder = 140)] [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")] [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)] public float InitialWeightsDiameter = 0; diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index b1678fd7b8..c728055362 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -1723,36 +1723,77 @@ public abstract class SgdBinaryTrainerBase : { public class OptionsBase : LearnerInputBaseWithWeight { + /// + /// The L2 weight for regularization. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization constant", ShortName = "l2", SortOrder = 50)] [TGUI(Label = "L2 Regularization Constant", SuggestedSweeps = "1e-7,5e-7,1e-6,5e-6,1e-5")] [TlcModule.SweepableDiscreteParam("L2Const", new object[] { 1e-7f, 5e-7f, 1e-6f, 5e-6f, 1e-5f })] public float L2Weight = Defaults.L2Weight; + /// + /// The degree of lock-free parallelism used by SGD. + /// + /// + /// Defaults to automatic depending on data sparseness. Determinism is not guaranteed. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.", ShortName = "nt,t,threads", SortOrder = 50)] [TGUI(Label = "Number of threads", SuggestedSweeps = "1,2,4")] public int? NumThreads; + /// + /// The convergence tolerance. If the exponential moving average of loss reductions falls below this tolerance, + /// the algorithm is deemed to have converged and will stop. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Exponential moving averaged improvement tolerance for convergence", ShortName = "tol")] [TGUI(SuggestedSweeps = "1e-2,1e-3,1e-4,1e-5")] [TlcModule.SweepableDiscreteParam("ConvergenceTolerance", new object[] { 1e-2f, 1e-3f, 1e-4f, 1e-5f })] public double ConvergenceTolerance = 1e-4; + /// + /// The maximum number of passes through the training dataset. + /// + /// + /// Set to 1 to simulate online learning. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning.", ShortName = "iter")] [TGUI(Label = "Max number of iterations", SuggestedSweeps = "1,5,10,20")] [TlcModule.SweepableDiscreteParam("MaxIterations", new object[] { 1, 5, 10, 20 })] public int MaxIterations = Defaults.MaxIterations; + /// + /// The initial learning rate used by SGD. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate (only used by SGD)", ShortName = "ilr,lr")] [TGUI(Label = "Initial Learning Rate (for SGD)")] public double InitLearningRate = Defaults.InitLearningRate; + /// + /// Determines whether to shuffle data for each training iteration. + /// + /// + /// to shuffle data for each training iteration; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Shuffle data every epoch?", ShortName = "shuf")] [TlcModule.SweepableDiscreteParam("Shuffle", null, isBool: true)] public bool Shuffle = true; + /// + /// The weight to be applied to the positive class. This is useful for training with imbalanced data. + /// + /// + /// Default value is 1, which means no extra weight. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Apply weight to the positive class, for imbalanced data", ShortName = "piw")] public float PositiveInstanceWeight = 1; + /// + /// Determines the frequency of checking for convergence in terms of number of iterations. + /// + /// + /// Default equals ." + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Default equals number of threads", ShortName = "checkFreq")] public int? CheckFrequency; @@ -1802,7 +1843,7 @@ internal static class Defaults /// The environment to use. /// The name of the feature column. /// The name of the label column. - /// The name for the example weight column. + /// The name of the example weight column. /// The maximum number of iterations; set to 1 to simulate online learning. /// The initial learning rate used by SGD. /// The L2 regularizer constant. @@ -2077,13 +2118,19 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig } /// - /// Train logistic regression using a parallel stochastic gradient method. + /// The for training logistic regression using a parallel stochastic gradient method. /// + /// + /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated + /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements SGD for binary classification + /// that supports multi-threading without any locking. If the associated optimization problem is sparse, it achieves a nearly optimal + /// rate of convergence. For more details, please refer to http://arxiv.org/pdf/1106.5730v2.pdf. + /// public sealed class SgdBinaryTrainer : SgdBinaryTrainerBase> { /// - /// Options available to training logistic regression using the implemented stochastic gradient method. + /// Options for the . /// public sealed class Options : OptionsBase { diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 30ce7e8d8c..7e5dadfd4a 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -19,15 +19,22 @@ namespace Microsoft.ML public static class StandardLearnersCatalog { /// - /// Predict a target using logistic regression trained with the trainer. + /// Predict a target using a linear classification model trained with . /// - /// The binary classificaiton catalog trainer object. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. - /// The maximum number of iterations; set to 1 to simulate online learning. - /// The initial learning rate used by SGD. - /// The L2 regularization constant. + /// The binary classification catalog trainer object. + /// The name of the label column, or dependent variable. + /// The features, or independent variables. + /// The name of the example weight column. + /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. + /// The initial learning rate used by SGD. + /// The L2 weight for regularization. + /// + /// + /// + /// + /// public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, @@ -43,10 +50,17 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati } /// - /// Predict a target using logistic regression trained with the trainer. + /// Predict a target using a linear classification model trained with and advanced options. /// - /// The binary classificaiton catalog trainer object. - /// Advanced arguments to the algorithm. + /// The binary classification catalog trainer object. + /// Trainer options. + /// + /// + /// + /// + /// public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, SgdBinaryTrainer.Options options) { @@ -58,16 +72,16 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati } /// - /// Predict a target using a linear classification model trained with the trainer. + /// Predict a target using a linear classification model trained with . /// - /// The binary classificaiton catalog trainer object. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The binary classification catalog trainer object. + /// The name of the label column, or dependent variable. + /// The features, or independent variables. + /// The name of the example weight column. /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. - /// The maximum number of iterations; set to 1 to simulate online learning. - /// The initial learning rate used by SGD. - /// The L2 regularization constant. + /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. + /// The initial learning rate used by SGD. + /// The L2 weight for regularization. public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrated(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, @@ -84,10 +98,10 @@ public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrat } /// - /// Predict a target using a linear classification model trained with the trainer. + /// Predict a target using a linear classification model trained with and advanced options. /// - /// The binary classificaiton catalog trainer object. - /// Advanced arguments to the algorithm. + /// The binary classification catalog trainer object. + /// Trainer options. public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrated(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, SgdNonCalibratedBinaryTrainer.Options options) { @@ -141,7 +155,7 @@ public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this Regressi /// /// Predict a target using a logistic regression model trained with the SDCA trainer. - /// The trained model can produce probablity by feeding the output value of the linear + /// The trained model can produce probability by feeding the output value of the linear /// function to a . /// /// The binary classification catalog trainer object. @@ -173,7 +187,7 @@ public static SdcaBinaryTrainer StochasticDualCoordinateAscent( /// /// Predict a target using a logistic regression model trained with the SDCA trainer. - /// The trained model can produce probablity via feeding output value of the linear + /// The trained model can produce probability via feeding output value of the linear /// function to a . Compared with , /// this function allows more advanced settings by accepting . /// @@ -290,7 +304,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// to decrease the as iterations progress; otherwise, . /// Default is . /// - /// L2 weight for regularization. + /// The L2 weight for regularization. /// Number of passes through the training dataset. /// /// @@ -396,7 +410,7 @@ public static OnlineGradientDescentTrainer OnlineGradientDescent(this Regression /// /// Predict a target using a linear binary classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The label column name, or dependent variable. /// The features, or independent variables. /// The optional example weights. @@ -430,7 +444,7 @@ public static LogisticRegression LogisticRegression(this BinaryClassificationCat /// /// Predict a target using a linear binary classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// Advanced arguments to the algorithm. public static LogisticRegression LogisticRegression(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, LROptions options) { diff --git a/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs index 7e2ed821c7..8c84cbe3b9 100644 --- a/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs @@ -21,7 +21,7 @@ public static class LbfgsBinaryClassificationStaticExtensions /// /// Predict a target using a linear binary classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The label, or dependent variable. /// The features, or independent variables. /// The optional example weights. @@ -67,7 +67,7 @@ public static (Scalar score, Scalar probability, Scalar pred /// /// Predict a target using a linear binary classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The label, or dependent variable. /// The features, or independent variables. /// The optional example weights. diff --git a/src/Microsoft.ML.StaticPipe/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs index 6bf0373b7b..c81b3a9f11 100644 --- a/src/Microsoft.ML.StaticPipe/SgdStatic.cs +++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs @@ -19,7 +19,7 @@ public static class SgdStaticExtensions /// /// Predict a target using logistic regression trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The name of the label column. /// The name of the feature column. /// The name for the example weight column. @@ -59,7 +59,7 @@ public static (Scalar score, Scalar probability, Scalar pred /// /// Predict a target using logistic regression trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The name of the label column. /// The name of the feature column. /// The name for the example weight column. @@ -99,7 +99,7 @@ public static (Scalar score, Scalar probability, Scalar pred /// /// Predict a target using a linear classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The name of the label column. /// The name of the feature column. /// The name for the example weight column. @@ -142,7 +142,7 @@ public static (Scalar score, Scalar predictedLabel) StochasticGradi /// /// Predict a target using a linear classification model trained with the trainer. /// - /// The binary classificaiton catalog trainer object. + /// The binary classification catalog trainer object. /// The name of the label column. /// The name of the feature column. /// The name for the example weight column. diff --git a/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs b/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs index bcc3570caf..13a1f82045 100644 --- a/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs @@ -110,7 +110,7 @@ public static Scalar FastTree(this RegressionCatalog.RegressionTrainers c /// /// FastTree extension method. - /// Predict a target using a decision tree binary classificaiton model trained with the . + /// Predict a target using a decision tree binary classification model trained with the . /// /// The . /// The label column. @@ -160,7 +160,7 @@ public static (Scalar score, Scalar probability, Scalar pred /// /// FastTree extension method. - /// Predict a target using a decision tree binary classificaiton model trained with the . + /// Predict a target using a decision tree binary classification model trained with the . /// /// The . /// The label column. From 16d136e9625303e990cae0257bacf2b5fcb44b6a Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 22 Feb 2019 12:00:06 -0800 Subject: [PATCH 2/5] Addressed PR comments. --- .../Trainers/BinaryClassification/AveragedPerceptron.cs | 2 +- .../BinaryClassification/AveragedPerceptronWithOptions.cs | 2 +- .../BinaryClassification/StochasticGradientDescent.cs | 2 +- .../StochasticGradientDescentWithOptions.cs | 7 +++++-- .../SymbolicStochasticGradientDescent.cs | 2 +- .../SymbolicStochasticGradientDescentWithOptions.cs | 2 +- src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 2 +- src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs | 2 ++ 8 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs index 767d398dc6..8da2982ecb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification public static class AveragedPerceptron { // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs index 830b5981cc..1c58ee48aa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification public static class AveragedPerceptronWithOptions { // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs index 672968decf..bbebc47d9a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification public static class StochasticGradientDescent { // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs index 005b765fbf..d28e0a19d1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification public static class StochasticGradientDescentWithOptions { // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { @@ -24,8 +24,11 @@ public static void Example() // Define the trainer options. var options = new SgdBinaryTrainer.Options() { - MaxIterations = 30, + // Make the convergence tolerance tighter. ConvergenceTolerance = 5e-5, + // Increase the maximum number of passes over training data. + MaxIterations = 30, + // Give the instances of the positive class slightly more weight. PositiveInstanceWeight = 1.2f, }; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs index 49b31342e0..c0687d6ee7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs @@ -4,7 +4,7 @@ public static class SymbolicStochasticGradientDescent { // This example requires installation of additional nuget package Microsoft.ML.HalLearners. // In this example we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs index d05d64454c..9dd4f50c87 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs @@ -4,7 +4,7 @@ public static class SymbolicStochasticGradientDescentWithOptions { // This example requires installation of additional nuget package Microsoft.ML.HalLearners. // In this example we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. + // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult public static void Example() { diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs index f6d1609ab0..16f72e3392 100644 --- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -26,7 +26,7 @@ public static void PrintMetrics(BinaryClassificationMetrics metrics) /// /// Pretty-print CalibratedBinaryClassificationMetrics objects. /// - /// Calibrated binary classification metrics. + /// object. public static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics) { PrintMetrics(metrics as BinaryClassificationMetrics); diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index c728055362..7162ae40f1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -2119,6 +2119,8 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// /// The for training logistic regression using a parallel stochastic gradient method. + /// The trained model is calibrated and can produce probability by feeding the output value of the + /// linear function to a . /// /// /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated From 10a01c141aab4d728ac059f679179401d832a504 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 22 Feb 2019 13:03:46 -0800 Subject: [PATCH 3/5] Mentioned Hogwild --- src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index 7162ae40f1..0aa53d46f9 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -2124,9 +2124,9 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// /// /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated - /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements SGD for binary classification - /// that supports multi-threading without any locking. If the associated optimization problem is sparse, it achieves a nearly optimal - /// rate of convergence. For more details, please refer to http://arxiv.org/pdf/1106.5730v2.pdf. + /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification + /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal + /// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf. /// public sealed class SgdBinaryTrainer : SgdBinaryTrainerBase> From f2a42eb9bd660880441c2f2ae13d1864d14ad090 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Mon, 25 Feb 2019 11:18:45 -0800 Subject: [PATCH 4/5] Updates to exampleWeightColumnName. --- src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 5c3272bd64..a4a7f89d9d 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -24,7 +24,7 @@ public static class StandardLearnersCatalog /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The name of the example weight column. + /// The name of the example weight column (optional). /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD. /// The L2 weight for regularization. @@ -77,7 +77,7 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The name of the example weight column. + /// The name of the example weight column (optional). /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD. From 6909bfc7a2b7ccaffca4367ef9dcebb33679926e Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Mon, 25 Feb 2019 11:57:22 -0800 Subject: [PATCH 5/5] Fixed trailing whitespaces. --- src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index a4a7f89d9d..89e7eda99f 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -77,7 +77,7 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The name of the example weight column (optional). + /// The name of the example weight column (optional). /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD.