diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
index 767d398dc6..8da2982ecb 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
     public static class AveragedPerceptron
     {
         // In this examples we will use the adult income dataset. The goal is to predict
-        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // if a person's income is above $50K or not, based on demographic information about that person.
         // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
         public static void Example()
         {
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
index 830b5981cc..1c58ee48aa 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -6,7 +6,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
     public static class AveragedPerceptronWithOptions
     {
         // In this examples we will use the adult income dataset. The goal is to predict
-        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // if a person's income is above $50K or not, based on demographic information about that person.
         // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
         public static void Example()
         {
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs
new file mode 100644
index 0000000000..bbebc47d9a
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs
@@ -0,0 +1,47 @@
+using Microsoft.ML;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class StochasticGradientDescent
+    {
+        // In this examples we will use the adult income dataset. The goal is to predict
+        // if a person's income is above $50K or not, based on demographic information about that person.
+        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset.
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing.
+            var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Create data training pipeline.
+            var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent();
+
+            // Fit this pipeline to the training data.
+            var model = pipeline.Fit(trainTestData.TrainSet);
+
+            // Evaluate how the model is doing on the test data.
+            var dataWithPredictions = model.Transform(trainTestData.TestSet);
+            var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Expected output:
+            //   Accuracy: 0.85
+            //   AUC: 0.90
+            //   F1 Score: 0.67
+            //   Negative Precision: 0.90
+            //   Negative Recall: 0.91
+            //   Positive Precision: 0.68
+            //   Positive Recall: 0.65
+            //   LogLoss: 0.48
+            //   LogLossReduction: 38.31
+            //   Entropy: 0.78
+        }
+    }
+}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs
new file mode 100644
index 0000000000..d28e0a19d1
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs
@@ -0,0 +1,59 @@
+using Microsoft.ML;
+using Microsoft.ML.Trainers;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class StochasticGradientDescentWithOptions
+    {
+        // In this examples we will use the adult income dataset. The goal is to predict
+        // if a person's income is above $50K or not, based on demographic information about that person.
+        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset.
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing.
+            var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Define the trainer options.
+            var options = new SgdBinaryTrainer.Options()
+            {
+                // Make the convergence tolerance tighter.
+                ConvergenceTolerance = 5e-5,
+                // Increase the maximum number of passes over training data.
+                MaxIterations = 30,
+                // Give the instances of the positive class slightly more weight.
+                PositiveInstanceWeight = 1.2f,
+            };
+
+            // Create data training pipeline.
+            var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent(options);
+
+            // Fit this pipeline to the training data.
+            var model = pipeline.Fit(trainTestData.TrainSet);
+
+            // Evaluate how the model is doing on the test data.
+            var dataWithPredictions = model.Transform(trainTestData.TestSet);
+            var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Expected output:
+            //   Accuracy: 0.85
+            //   AUC: 0.90
+            //   F1 Score: 0.67
+            //   Negative Precision: 0.91
+            //   Negative Recall: 0.89
+            //   Positive Precision: 0.65
+            //   Positive Recall: 0.70
+            //   LogLoss: 0.48
+            //   LogLossReduction: 37.52
+            //   Entropy: 0.78
+        }
+    }
+}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs
index 49b31342e0..c0687d6ee7 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs
@@ -4,7 +4,7 @@ public static class SymbolicStochasticGradientDescent
     {
         // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
         // In this example we will use the adult income dataset. The goal is to predict
-        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // if a person's income is above $50K or not, based on demographic information about that person.
         // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
         public static void Example()
         {
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs
index d05d64454c..9dd4f50c87 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs
@@ -4,7 +4,7 @@ public static class SymbolicStochasticGradientDescentWithOptions
     {
         // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
         // In this example we will use the adult income dataset. The goal is to predict
-        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // if a person's income is above $50K or not, based on demographic information about that person.
         // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
         public static void Example()
         {
diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs
index 3e991829f4..61f9246c0f 100644
--- a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs
+++ b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs
@@ -95,7 +95,7 @@ public abstract class LearnerInputBaseWithLabel : LearnerInputBase
     public abstract class LearnerInputBaseWithWeight : LearnerInputBaseWithLabel
     {
         /// <summary>
-        /// Column to use for example weight.
+        /// The name of the example weight column.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Column to use for example weight", ShortName = "weight", SortOrder = 4, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
         public string WeightColumn = null;
diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
index 93de658b1e..16f72e3392 100644
--- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
@@ -23,6 +23,18 @@ public static void PrintMetrics(BinaryClassificationMetrics metrics)
             Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
         }
 
+        /// <summary>
+        /// Pretty-print CalibratedBinaryClassificationMetrics objects.
+        /// </summary>
+        /// <param name="metrics"><see cref="CalibratedBinaryClassificationMetrics"/> object.</param>
+        public static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics)
+        {
+            PrintMetrics(metrics as BinaryClassificationMetrics);
+            Console.WriteLine($"LogLoss: {metrics.LogLoss:F2}");
+            Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction:F2}");
+            Console.WriteLine($"Entropy: {metrics.Entropy:F2}");
+        }
+
         /// <summary>
         /// Pretty-print RegressionMetrics objects.
         /// </summary>
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index 688fd872ab..37abdfd88e 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -60,7 +60,7 @@ public abstract class AveragedLinearOptions : OnlineLinearOptions
         public bool DoLazyUpdates = true;
 
         /// <summary>
-        /// L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+        /// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)]
         [TGUI(Label = "L2 Regularization Weight")]
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index 5349d24fd1..5d53083992 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -54,7 +54,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
         private readonly Options _args;
 
         /// <summary>
-        /// Options for the averaged perceptron trainer.
+        /// Options for the <see cref="AveragedPerceptronTrainer"/>.
         /// </summary>
         public sealed class Options : AveragedLinearOptions
         {
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index 6347de391d..8848281e6f 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -24,7 +24,7 @@ public abstract class OnlineLinearOptions : LearnerInputBaseWithLabel
         /// <summary>
         /// Number of passes through the training dataset.
         /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter, numIterations", SortOrder = 50)]
+        [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter,numIterations", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
         public int NumberOfIterations = OnlineDefault.NumIterations;
@@ -43,7 +43,7 @@ public abstract class OnlineLinearOptions : LearnerInputBaseWithLabel
         /// This property is only used if the provided value is positive and <see cref="InitialWeights"/> is not specified.
         /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution.
         /// </value>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)]
+        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts,initWtsDiameter", SortOrder = 140)]
         [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")]
         [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)]
         public float InitialWeightsDiameter = 0;
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
index 30f205eb93..58057b377a 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
@@ -1723,36 +1723,77 @@ public abstract class SgdBinaryTrainerBase<TModel> :
     {
         public class OptionsBase : LearnerInputBaseWithWeight
         {
+            /// <summary>
+            /// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization constant", ShortName = "l2", SortOrder = 50)]
             [TGUI(Label = "L2 Regularization Constant", SuggestedSweeps = "1e-7,5e-7,1e-6,5e-6,1e-5")]
             [TlcModule.SweepableDiscreteParam("L2Const", new object[] { 1e-7f, 5e-7f, 1e-6f, 5e-6f, 1e-5f })]
             public float L2Weight = Defaults.L2Weight;
 
+            /// <summary>
+            /// The degree of lock-free parallelism used by SGD.
+            /// </summary>
+            /// <value>
+            /// Defaults to automatic depending on data sparseness. Determinism is not guaranteed.
+            /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.", ShortName = "nt,t,threads", SortOrder = 50)]
             [TGUI(Label = "Number of threads", SuggestedSweeps = "1,2,4")]
             public int? NumThreads;
 
+            /// <summary>
+            /// The convergence tolerance. If the exponential moving average of loss reductions falls below this tolerance,
+            /// the algorithm is deemed to have converged and will stop.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Exponential moving averaged improvement tolerance for convergence", ShortName = "tol")]
             [TGUI(SuggestedSweeps = "1e-2,1e-3,1e-4,1e-5")]
             [TlcModule.SweepableDiscreteParam("ConvergenceTolerance", new object[] { 1e-2f, 1e-3f, 1e-4f, 1e-5f })]
             public double ConvergenceTolerance = 1e-4;
 
+            /// <summary>
+            /// The maximum number of passes through the training dataset.
+            /// </summary>
+            /// <value>
+            /// Set to 1 to simulate online learning.
+            /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning.", ShortName = "iter")]
             [TGUI(Label = "Max number of iterations", SuggestedSweeps = "1,5,10,20")]
             [TlcModule.SweepableDiscreteParam("MaxIterations", new object[] { 1, 5, 10, 20 })]
             public int MaxIterations = Defaults.MaxIterations;
 
+            /// <summary>
+            /// The initial <a href="tmpurl_lr">learning rate</a> used by SGD.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate (only used by SGD)", ShortName = "ilr,lr")]
             [TGUI(Label = "Initial Learning Rate (for SGD)")]
             public double InitLearningRate = Defaults.InitLearningRate;
 
+            /// <summary>
+            /// Determines whether to shuffle data for each training iteration.
+            /// </summary>
+            /// <value>
+            /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
+            /// Default is <see langword="true" />.
+            /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Shuffle data every epoch?", ShortName = "shuf")]
             [TlcModule.SweepableDiscreteParam("Shuffle", null, isBool: true)]
             public bool Shuffle = true;
 
+            /// <summary>
+            /// The weight to be applied to the positive class. This is useful for training with imbalanced data.
+            /// </summary>
+            /// <value>
+            /// Default value is 1, which means no extra weight.
+            /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Apply weight to the positive class, for imbalanced data", ShortName = "piw")]
             public float PositiveInstanceWeight = 1;
 
+            /// <summary>
+            /// Determines the frequency of checking for convergence in terms of number of iterations.
+            /// </summary>
+            /// <value>
+            /// Default equals <see cref="NumThreads"/>."
+            /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Default equals number of threads", ShortName = "checkFreq")]
             public int? CheckFrequency;
 
@@ -1802,7 +1843,7 @@ internal static class Defaults
         /// <param name="env">The environment to use.</param>
         /// <param name="featureColumn">The name of the feature column.</param>
         /// <param name="labelColumn">The name of the label column.</param>
-        /// <param name="weightColumn">The name for the example weight column.</param>
+        /// <param name="weightColumn">The name of the example weight column.</param>
         /// <param name="maxIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
         /// <param name="initLearningRate">The initial learning rate used by SGD.</param>
         /// <param name="l2Weight">The L2 regularizer constant.</param>
@@ -2077,13 +2118,21 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     }
 
     /// <summary>
-    /// Train logistic regression using a parallel stochastic gradient method.
+    /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
+    /// The trained model is <a href='tmpurl_calib'>calibrated</a> and can produce probability by feeding the output value of the
+    /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
+    /// <remarks>
+    /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
+    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
+    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
+    /// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf.
+    /// </remarks>
     public sealed class SgdBinaryTrainer :
         SgdBinaryTrainerBase<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>
     {
         /// <summary>
-        /// Options available to training logistic regression using the implemented stochastic gradient method.
+        /// Options for the <see cref="SgdBinaryTrainer"/>.
         /// </summary>
         public sealed class Options : OptionsBase
         {
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 3291aa4d83..89e7eda99f 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -19,15 +19,22 @@ namespace Microsoft.ML
     public static class StandardLearnersCatalog
     {
         /// <summary>
-        ///  Predict a target using logistic regression trained with the <see cref="SgdBinaryTrainer"/> trainer.
+        /// Predict a target using a linear classification model trained with <see cref="SgdBinaryTrainer"/>.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
+        /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="maxIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial learning rate used by SGD.</param>
-        /// <param name="l2Weight">The L2 regularization constant.</param>
+        /// <param name="maxIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
+        /// <param name="initLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="l2Weight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[StochasticGradientDescent](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
@@ -43,10 +50,17 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati
         }
 
         /// <summary>
-        ///  Predict a target using logistic regression trained with the <see cref="SgdBinaryTrainer"/> trainer.
+        /// Predict a target using a linear classification model trained with <see cref="SgdBinaryTrainer"/> and advanced options.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
-        /// <param name="options">Advanced arguments to the algorithm.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
+        /// <param name="options">Trainer options.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[StochasticGradientDescentWithOptions](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             SgdBinaryTrainer.Options options)
         {
@@ -58,16 +72,16 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati
         }
 
         /// <summary>
-        ///  Predict a target using a linear classification model trained with the <see cref="SgdNonCalibratedBinaryTrainer"/> trainer.
+        ///  Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedBinaryTrainer"/>.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
+        /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="loss">The loss function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
-        /// <param name="maxIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial learning rate used by SGD.</param>
-        /// <param name="l2Weight">The L2 regularization constant.</param>
+        /// <param name="maxIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
+        /// <param name="initLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="l2Weight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrated(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
@@ -84,10 +98,10 @@ public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrat
         }
 
         /// <summary>
-        ///  Predict a target using a linear classification model trained with the <see cref="SgdNonCalibratedBinaryTrainer"/> trainer.
+        /// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedBinaryTrainer"/> and advanced options.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
-        /// <param name="options">Advanced arguments to the algorithm.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
+        /// <param name="options">Trainer options.</param>
         public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrated(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             SgdNonCalibratedBinaryTrainer.Options options)
         {
@@ -141,7 +155,7 @@ public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this Regressi
 
         /// <summary>
         /// Predict a target using a logistic regression model trained with the SDCA trainer.
-        /// The trained model can produce probablity by feeding the output value of the linear
+        /// The trained model can produce probability by feeding the output value of the linear
         /// function to a <see cref="PlattCalibrator"/>.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
@@ -173,7 +187,7 @@ public static SdcaBinaryTrainer StochasticDualCoordinateAscent(
 
         /// <summary>
         /// Predict a target using a logistic regression model trained with the SDCA trainer.
-        /// The trained model can produce probablity via feeding output value of the linear
+        /// The trained model can produce probability via feeding output value of the linear
         /// function to a <see cref="PlattCalibrator"/>. Compared with <see cref="StochasticDualCoordinateAscent(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, float?, float?, int?)"/>,
         /// this function allows more advanced settings by accepting <see cref="SdcaBinaryTrainer.Options"/>.
         /// </summary>
@@ -290,7 +304,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// <see langword="true" /> to decrease the <paramref name="learningRate"/> as iterations progress; otherwise, <see langword="false" />.
         /// Default is <see langword="false" />.
         /// </param>
-        /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l2RegularizerWeight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <param name="numIterations">Number of passes through the training dataset.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -396,7 +410,7 @@ public static OnlineGradientDescentTrainer OnlineGradientDescent(this Regression
         /// <summary>
         ///  Predict a target using a linear binary classification model trained with the <see cref="Trainers.LogisticRegression"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
@@ -430,7 +444,7 @@ public static LogisticRegression LogisticRegression(this BinaryClassificationCat
         /// <summary>
         ///  Predict a target using a linear binary classification model trained with the <see cref="Trainers.LogisticRegression"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Advanced arguments to the algorithm.</param>
         public static LogisticRegression LogisticRegression(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, LROptions options)
         {
diff --git a/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs
index 273bbc3320..82d6df6fbd 100644
--- a/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs
@@ -21,7 +21,7 @@ public static class LbfgsBinaryClassificationStaticExtensions
         /// <summary>
         ///  Predict a target using a linear binary classification model trained with the <see cref="Microsoft.ML.Trainers.LogisticRegression"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The label, or dependent variable.</param>
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="weights">The optional example weights.</param>
@@ -67,7 +67,7 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <summary>
         ///  Predict a target using a linear binary classification model trained with the <see cref="Microsoft.ML.Trainers.LogisticRegression"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The label, or dependent variable.</param>
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="weights">The optional example weights.</param>
diff --git a/src/Microsoft.ML.StaticPipe/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
index 66b342c6a3..f893320366 100644
--- a/src/Microsoft.ML.StaticPipe/SgdStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
@@ -19,7 +19,7 @@ public static class SgdStaticExtensions
         /// <summary>
         ///  Predict a target using logistic regression trained with the <see cref="SgdBinaryTrainer"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
@@ -59,7 +59,7 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <summary>
         ///  Predict a target using logistic regression trained with the <see cref="SgdBinaryTrainer"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
@@ -99,7 +99,7 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <summary>
         ///  Predict a target using a linear classification model trained with the <see cref="SgdNonCalibratedBinaryTrainer"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
@@ -142,7 +142,7 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) StochasticGradi
         /// <summary>
         ///  Predict a target using a linear classification model trained with the <see cref="SgdNonCalibratedBinaryTrainer"/> trainer.
         /// </summary>
-        /// <param name="catalog">The binary classificaiton catalog trainer object.</param>
+        /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
diff --git a/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs b/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs
index c0bdbb2c3d..e5289e0d98 100644
--- a/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs
@@ -110,7 +110,7 @@ public static Scalar<float> FastTree(this RegressionCatalog.RegressionTrainers c
 
         /// <summary>
         /// FastTree <see cref="BinaryClassificationCatalog"/> extension method.
-        /// Predict a target using a decision tree binary classificaiton model trained with the <see cref="FastTreeBinaryClassificationTrainer"/>.
+        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryClassificationTrainer"/>.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="label">The label column.</param>
@@ -160,7 +160,7 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
 
         /// <summary>
         /// FastTree <see cref="BinaryClassificationCatalog"/> extension method.
-        /// Predict a target using a decision tree binary classificaiton model trained with the <see cref="FastTreeBinaryClassificationTrainer"/>.
+        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryClassificationTrainer"/>.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="label">The label column.</param>