diff --git a/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedLinear.cs
index 0bcd1b3c6e..8d8ef61f97 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedLinear.cs
@@ -21,7 +21,7 @@ namespace Microsoft.ML.Trainers
     public abstract class AveragedLinearOptions : OnlineLinearOptions
     {
         /// <summary>
-        /// <a href="tmpurl_lr">Learning rate</a>.
+        /// Learning rate.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)]
         [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")]
@@ -61,7 +61,7 @@ public abstract class AveragedLinearOptions : OnlineLinearOptions
         public bool LazyUpdate = true;
 
         /// <summary>
-        /// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+        /// The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg,L2RegularizerWeight", SortOrder = 50)]
         [TGUI(Label = "L2 Regularization Weight")]
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedPerceptron.cs
index 50a4b1c61c..97f66ecdf8 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/Online/AveragedPerceptron.cs
@@ -79,18 +79,18 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
         public sealed class Options : AveragedLinearOptions
         {
             /// <summary>
-            /// A custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             [Argument(ArgumentType.Multiple, Name = "LossFunction", HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             internal ISupportClassificationLossFactory ClassificationLossFunctionFactory = new HingeLoss.Options();
 
             /// <summary>
-            /// A custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             public IClassificationLoss LossFunction { get; set; }
 
             /// <summary>
-            /// The <a href="tmpurl_calib">calibrator</a> for producing probabilities. Default is exponential (aka Platt) calibration.
+            /// The <a href="https://en.wikipedia.org/wiki/Calibration_(statistics)">calibrator</a> for producing probabilities. Default is exponential (aka Platt) calibration.
             /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
             internal ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory();
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardTrainers/Standard/Online/OnlineGradientDescent.cs
index a0f678ae95..53e6745243 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/Online/OnlineGradientDescent.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/Online/OnlineGradientDescent.cs
@@ -52,7 +52,7 @@ public sealed class Options : AveragedLinearOptions
             internal ISupportRegressionLossFactory RegressionLossFunctionFactory = new SquaredLossFactory();
 
             /// <summary>
-            /// A custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             public IRegressionLoss LossFunction { get; set; }
 
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
index 5c763998c2..bbe982b6cb 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -157,7 +157,7 @@ public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : Stochast
         public abstract class OptionsBase : TrainerInputBaseWithWeight
         {
             /// <summary>
-            /// The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.
+            /// The L2 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter.
             /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.", NullName = "<Auto>", ShortName = "l2, L2Const", SortOrder = 1)]
             [TGUI(Label = "L2 Regularizer Constant", SuggestedSweeps = "<Auto>,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2")]
@@ -166,7 +166,7 @@ public abstract class OptionsBase : TrainerInputBaseWithWeight
 
             // REVIEW: make the default positive when we know how to consume a sparse model
             /// <summary>
-            /// The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter.
+            /// The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter.
             /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
                 NullName = "<Auto>", Name = "L1Threshold", ShortName = "l1", SortOrder = 2)]
@@ -1547,7 +1547,7 @@ private protected override BinaryPredictionTransformer<TModelParameters> MakeTra
 
     /// <summary>
     /// The <see cref="IEstimator{TTransformer}"/> for training a binary logistic regression classification model using the stochastic dual coordinate ascent method.
-    /// The trained model is <a href='tmpurl_calib'>calibrated</a> and can produce probability by feeding the output value of the
+    /// The trained model is <a href='https://en.wikipedia.org/wiki/Calibration_(statistics)'>calibrated</a> and can produce probability by feeding the output value of the
     /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
     /// <include file='doc.xml' path='doc/members/member[@name="SDCA_remarks"]/*' />
@@ -1623,7 +1623,7 @@ public sealed class SdcaNonCalibratedBinaryTrainer : SdcaBinaryTrainerBase<Linea
         public sealed class Options : BinaryOptionsBase
         {
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>.
+            /// The custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             /// <value>
             /// If unspecified, <see cref="LogLoss"/> will be used.
@@ -1632,7 +1632,7 @@ public sealed class Options : BinaryOptionsBase
             internal ISupportSdcaClassificationLossFactory LossFunctionFactory = new LogLossFactory();
 
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>.
+            /// The custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             /// <value>
             /// If unspecified, <see cref="LogLoss"/> will be used.
@@ -1776,7 +1776,7 @@ public abstract class SgdBinaryTrainerBase<TModel> :
         public class OptionsBase : TrainerInputBaseWithWeight
         {
             /// <summary>
-            /// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+            /// The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.
             /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization constant", ShortName = "l2, L2Weight", SortOrder = 50)]
             [TGUI(Label = "L2 Regularization Constant", SuggestedSweeps = "1e-7,5e-7,1e-6,5e-6,1e-5")]
@@ -1814,7 +1814,7 @@ public class OptionsBase : TrainerInputBaseWithWeight
             public int NumberOfIterations = Defaults.NumberOfIterations;
 
             /// <summary>
-            /// The initial <a href="tmpurl_lr">learning rate</a> used by SGD.
+            /// The initial learning rate used by SGD.
             /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate (only used by SGD)", Name = "InitialLearningRate", ShortName = "ilr,lr,InitLearningRate")]
             [TGUI(Label = "Initial Learning Rate (for SGD)")]
@@ -2171,7 +2171,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
 
     /// <summary>
     /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
-    /// The trained model is <a href='tmpurl_calib'>calibrated</a> and can produce probability by feeding the output value of the
+    /// The trained model is <a href='https://en.wikipedia.org/wiki/Calibration_(statistics)'>calibrated</a> and can produce probability by feeding the output value of the
     /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
     /// <remarks>
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs
index 2344651d59..bc4a578c3a 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs
@@ -42,7 +42,7 @@ public abstract class SdcaMulticlassTrainerBase<TModel> : SdcaTrainerBase<SdcaMu
         public class MulticlassOptions : OptionsBase
         {
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>.
+            /// The custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             /// <value>
             /// If unspecified, <see cref="LogLoss"/> will be used.
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
index 3b242f907c..1f27c19bcc 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
@@ -38,7 +38,7 @@ public sealed class SdcaRegressionTrainer : SdcaTrainerBase<SdcaRegressionTraine
         public sealed class Options : OptionsBase
         {
             /// <summary>
-            /// A custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             /// <value>
             /// Defaults to <see cref="SquaredLoss"/>
@@ -47,7 +47,7 @@ public sealed class Options : OptionsBase
             internal ISupportSdcaRegressionLossFactory LossFunctionFactory = new SquaredLossFactory();
 
             /// <summary>
-            /// A custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
             /// </summary>
             /// <value>
             /// Defaults to <see cref="SquaredLoss"/>
diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index 4eaeb9dc53..fd06f33389 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -25,8 +25,8 @@ public static class StandardTrainersCatalog
         /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -79,10 +79,10 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -135,9 +135,9 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function minimized in the training process. Using, for example, its default <see cref="SquaredLoss"/> leads to a least square trainer.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, its default <see cref="SquaredLoss"/> leads to a least square trainer.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
         /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -187,8 +187,8 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
         /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -239,9 +239,9 @@ public static SdcaLogisticRegressionBinaryTrainer SdcaLogisticRegression(
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function minimized in the training process. Defaults to <see cref="LogLoss"/> if not specified.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Defaults to <see cref="LogLoss"/> if not specified.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
         /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -287,8 +287,8 @@ public static SdcaNonCalibratedBinaryTrainer SdcaNonCalibrated(
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
         /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -337,9 +337,9 @@ public static SdcaMaximumEntropyMulticlassTrainer SdcaMaximumEntropy(this Multic
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function to be minimized. Defaults to <see cref="LogLoss"/> if not specified.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function to be minimized. Defaults to <see cref="LogLoss"/> if not specified.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
         /// <param name="maximumNumberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -388,13 +388,13 @@ public static SdcaNonCalibratedMulticlassTrainer SdcaNonCalibrated(this Multicla
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/>.</param>
         /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function minimized in the training process. If <see langword="null"/>, <see cref="HingeLoss"/> would be used and lead to a max-margin averaged perceptron trainer.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. If <see langword="null"/>, <see cref="HingeLoss"/> would be used and lead to a max-margin averaged perceptron trainer.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
         /// <param name="decreaseLearningRate">
         /// <see langword="true" /> to decrease the <paramref name="learningRate"/> as iterations progress; otherwise, <see langword="false" />.
         /// Default is <see langword="false" />.
         /// </param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="numberOfIterations">Number of passes through the training dataset.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -462,10 +462,10 @@ public IClassificationLoss CreateComponent(IHostEnvironment env)
         /// <param name="catalog">The regression catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
-        /// <param name="lossFunction">The <a href="tmpurl_loss">loss</a> function minimized in the training process. Using, for example, <see cref="SquaredLoss"/> leads to a least square trainer.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="SquaredLoss"/> leads to a least square trainer.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
         /// <param name="decreaseLearningRate">Decrease learning rate as iterations progress.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="numberOfIterations">The number of passes through the training dataset.</param>
         public static OnlineGradientDescentTrainer OnlineGradientDescent(this RegressionCatalog.RegressionTrainers catalog,
             string labelColumnName = DefaultColumnNames.Label,
@@ -505,8 +505,8 @@ public static OnlineGradientDescentTrainer OnlineGradientDescent(this Regression
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="enforceNonNegativity">Enforce non-negative weights.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="historySize">Memory size for <see cref="Trainers.LbfgsLogisticRegressionBinaryTrainer"/>. Low=faster, less accurate.</param>
         /// <param name="optimizationTolerance">Threshold for optimizer convergence.</param>
         /// <example>
@@ -559,8 +559,8 @@ public static LbfgsLogisticRegressionBinaryTrainer LbfgsLogisticRegression(this
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="optimizationTolerance">Threshold for optimizer convergence.</param>
         /// <param name="historySize">Number of previous iterations to remember for estimating the Hessian. Lower values mean faster but less accurate estimates.</param>
         /// <param name="enforceNonNegativity">Enforce non-negative weights.</param>
@@ -613,8 +613,8 @@ public static LbfgsPoissonRegressionTrainer LbfgsPoissonRegression(this Regressi
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="enforceNonNegativity">Enforce non-negative weights.</param>
-        /// <param name="l1Regularization">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="historySize">Memory size for <see cref="Microsoft.ML.Trainers.LbfgsMaximumEntropyMulticlassTrainer"/>. Low=faster, less accurate.</param>
         /// <param name="optimizationTolerance">Threshold for optimizer convergence.</param>
         public static LbfgsMaximumEntropyMulticlassTrainer LbfgsMaximumEntropy(this MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog,
diff --git a/src/Microsoft.ML.StaticPipe/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
index e228a064ef..c0c02cb7e9 100644
--- a/src/Microsoft.ML.StaticPipe/SgdStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
@@ -21,8 +21,8 @@ public static class SgdStaticExtensions
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
         /// <param name="numberOfIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
@@ -101,8 +101,8 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
         /// <param name="numberOfIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="learningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="learningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="lossFunction">The loss function to use.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the