diff --git a/build/Dependencies.props b/build/Dependencies.props
index 7a79b3a087..47f34e0e1e 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -9,6 +9,7 @@
     <SystemReflectionEmitLightweightPackageVersion>4.3.0</SystemReflectionEmitLightweightPackageVersion>
     <SystemThreadingTasksDataflowPackageVersion>4.8.0</SystemThreadingTasksDataflowPackageVersion>
     <SystemComponentModelCompositionVersion>4.5.0</SystemComponentModelCompositionVersion>
+	<MathNumericPackageVersion>4.6.0</MathNumericPackageVersion>
   </PropertyGroup>
 
   <!-- Other/Non-Core Product Dependencies -->
diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
index 75517c587e..f479d0e970 100644
--- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
+++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
@@ -8,6 +8,7 @@
   <ItemGroup>
     <ProjectReference Include="../Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.nupkgproj" />
 
+    <PackageReference Include="MathNet.Numerics.Signed" Version="$(MathNumericPackageVersion)" />
     <PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftJsonPackageVersion)" />
     <PackageReference Include="System.Reflection.Emit.Lightweight" Version="$(SystemReflectionEmitLightweightPackageVersion)" />
     <PackageReference Include="System.Threading.Tasks.Dataflow" Version="$(SystemThreadingTasksDataflowPackageVersion)" />
diff --git a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs
new file mode 100644
index 0000000000..66868c1c9a
--- /dev/null
+++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs
@@ -0,0 +1,92 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Utilities;
+using Microsoft.ML.Trainers.HalLearners;
+using System;
+
+namespace Microsoft.ML.Runtime.Learners
+{
+    using Mkl = OlsLinearRegressionTrainer.Mkl;
+
+    public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd
+    {
+        /// <summary>
+        /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
+        /// p-value and z-Score.
+        /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+        /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+        /// </summary>
+        /// <param name="hessian"></param>
+        /// <param name="weightIndices"></param>
+        /// <param name="numSelectedParams"></param>
+        /// <param name="currentWeightsCount"></param>
+        /// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
+        /// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
+        public override VBuffer<float> ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.AssertValue(hessian, nameof(hessian));
+            Contracts.Assert(numSelectedParams > 0);
+            Contracts.Assert(currentWeightsCount > 0);
+            Contracts.Assert(l2Weight > 0);
+
+            // Apply Cholesky Decomposition to find the inverse of the Hessian.
+            Double[] invHessian = null;
+            try
+            {
+                // First, find the Cholesky decomposition LL' of the Hessian.
+                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian);
+                // Note that hessian is already modified at this point. It is no longer the original Hessian,
+                // but instead represents the Cholesky decomposition L.
+                // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
+                // of the original information matrix.
+                Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian);
+                // At this point, hessian should contain the inverse of the original Hessian matrix.
+                // Swap hessian with invHessian to avoid confusion in the following context.
+                Utils.Swap(ref hessian, ref invHessian);
+                Contracts.Assert(hessian == null);
+            }
+            catch (DllNotFoundException)
+            {
+                throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
+            }
+
+            float[] stdErrorValues = new float[numSelectedParams];
+            stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);
+
+            for (int i = 1; i < numSelectedParams; i++)
+            {
+                // Initialize with inverse Hessian.
+                stdErrorValues[i] = (float)invHessian[i * (i + 1) / 2 + i];
+            }
+
+            if (l2Weight > 0)
+            {
+                // Iterate through all entries of inverse Hessian to make adjustment to variance.
+                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+                // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+                int ioffset = 1;
+                for (int iRow = 1; iRow < numSelectedParams; iRow++)
+                {
+                    for (int iCol = 0; iCol <= iRow; iCol++)
+                    {
+                        var entry = (float)invHessian[ioffset++];
+                        AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues);
+                    }
+                }
+
+                Contracts.Assert(ioffset == invHessian.Length);
+            }
+
+            for (int i = 1; i < numSelectedParams; i++)
+                stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+            // currentWeights vector size is Weights2 + the bias
+            return new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices);
+        }
+    }
+}
diff --git a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
index 415752aa8d..671913b203 100644
--- a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
+++ b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
@@ -6,5 +6,6 @@
 using Microsoft.ML;
 
 [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)]
+[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)]
 
 [assembly: WantsToBeBestFriends]
diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
index b1624559cd..d1c2fba257 100644
--- a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
+++ b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+﻿<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
@@ -6,6 +6,10 @@
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
+  <ItemGroup>
+    <PackageReference Include="MathNet.Numerics.Signed" Version="$(MathNumericPackageVersion)" />
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
index 1f5f49fe40..60c81b0ed1 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -4,6 +4,7 @@
 
 using System;
 using System.Collections.Generic;
+using MathNet.Numerics.LinearAlgebra;
 using Microsoft.ML.Core.Data;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.CommandLine;
@@ -40,11 +41,27 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase<LogisticRegres
 
         public sealed class Arguments : ArgumentsBase
         {
+            /// <summary>
+            /// If set to <value>true</value>training statistics will be generated at the end of training.
+            /// If you have a large number of learned training parameters(more than 500),
+            /// generating the training statistics might take a few seconds.
+            /// More than 1000 weights might take a few minutes. For those cases consider using the instance of <see cref="ComputeLRTrainingStd"/>
+            /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)]
             public bool ShowTrainingStats = false;
+
+            /// <summary>
+            /// The instance of <see cref="ComputeLRTrainingStd"/> that computes the training statistics at the end of training.
+            /// If you have a large number of learned training parameters(more than 500),
+            /// generating the training statistics might take a few seconds.
+            /// More than 1000 weights might take a few minutes. For those cases consider using the instance of <see cref="ComputeLRTrainingStd"/>
+            /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
+            /// </summary>
+            public ComputeLRTrainingStd StdComputer;
         }
 
-        private Double _posWeight;
+        private double _posWeight;
         private LinearModelStatistics _stats;
 
         /// <summary>
@@ -78,6 +95,9 @@ public LogisticRegression(IHostEnvironment env,
 
             _posWeight = 0;
             ShowTrainingStats = Args.ShowTrainingStats;
+
+            if (ShowTrainingStats && Args.StdComputer == null)
+                Args.StdComputer = new ComputeLRTrainingStdImpl();
         }
 
         /// <summary>
@@ -88,6 +108,9 @@ internal LogisticRegression(IHostEnvironment env, Arguments args)
         {
             _posWeight = 0;
             ShowTrainingStats = Args.ShowTrainingStats;
+
+            if (ShowTrainingStats && Args.StdComputer == null)
+                Args.StdComputer = new ComputeLRTrainingStdImpl();
         }
 
         public override PredictionKind PredictionKind => PredictionKind.BinaryClassification;
@@ -330,7 +353,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
                 }
             }
 
-            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+            if (Args.StdComputer == null)
+                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+            else
+            {
+                var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Length, ch, L2Weight);
+                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, std);
+            }
         }
 
         protected override void ProcessPriorDistribution(float label, float weight)
@@ -397,4 +426,125 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm
                 () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn));
         }
     }
+
+    /// <summary>
+    /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+    /// p-value and z-Score.
+    /// If you need fast calculations, use the <see cref="ComputeLRTrainingStd"/> implementation in the Microsoft.ML.HALLearners package,
+    /// which makes use of hardware acceleration.
+    /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+    /// </summary>
+    public abstract class ComputeLRTrainingStd
+    {
+        /// <summary>
+        /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+        /// p-value and z-Score.
+        /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+        /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+        /// </summary>
+        public abstract VBuffer<float> ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight);
+
+        /// <summary>
+        /// Adjust the variance for regularized cases.
+        /// </summary>
+        [BestFriend]
+        internal void AdjustVariance(float inverseEntry, int iRow, int iCol, float l2Weight, float[] stdErrorValues2)
+        {
+            var adjustment = l2Weight * inverseEntry * inverseEntry;
+            stdErrorValues2[iRow] -= adjustment;
+
+            if (0 < iCol && iCol < iRow)
+                stdErrorValues2[iCol] -= adjustment;
+        }
+    }
+
+    /// <summary>
+    /// Extends the <see cref="ComputeLRTrainingStd"/> implementing <see cref="ComputeLRTrainingStd.ComputeStd(double[], int[], int, int, IChannel, float)"/> making use of Math.Net numeric
+    /// If you need faster calculations(have non-sparse weight vectors of more than 300 features), use the instance of ComputeLRTrainingStd from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration
+    /// for those computations.
+    /// </summary>
+    public sealed class ComputeLRTrainingStdImpl : ComputeLRTrainingStd
+    {
+        /// <summary>
+        /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+        /// p-value and z-Score.
+        /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+        /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+        /// </summary>
+        /// <param name="hessian"></param>
+        /// <param name="weightIndices"></param>
+        /// <param name="numSelectedParams"></param>
+        /// <param name="currentWeightsCount"></param>
+        /// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
+        /// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
+        public override VBuffer<float> ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.AssertValue(hessian, nameof(hessian));
+            Contracts.Assert(numSelectedParams > 0);
+            Contracts.Assert(currentWeightsCount > 0);
+            Contracts.Assert(l2Weight > 0);
+
+            double[,] matrixHessian = new double[numSelectedParams, numSelectedParams];
+
+            int hessianLength = 0;
+            int dimension = numSelectedParams - 1;
+
+            for (int row = dimension; row >= 0; row--)
+            {
+                for (int col = 0; col <= dimension; col++)
+                {
+                    if ((row + col) <= dimension)
+                    {
+                        if ((row + col) == dimension)
+                        {
+                            matrixHessian[row, col] = hessian[hessianLength];
+                        }
+                        else
+                        {
+                            matrixHessian[row, col] = hessian[hessianLength];
+                            matrixHessian[dimension - col, dimension - row] = hessian[hessianLength];
+                        }
+                        hessianLength++;
+                    }
+                    else
+                        continue;
+                }
+            }
+
+            var h = Matrix<double>.Build.DenseOfArray(matrixHessian);
+            var invers = h.Inverse();
+
+            float[] stdErrorValues = new float[numSelectedParams];
+            stdErrorValues[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]);
+
+            for (int i = 1; i < numSelectedParams; i++)
+            {
+                // Initialize with inverse Hessian.
+                // The diagonal of the inverse Hessian.
+                stdErrorValues[i] = (float)invers[i, numSelectedParams - i - 1];
+            }
+
+            if (l2Weight > 0)
+            {
+                // Iterate through all entries of inverse Hessian to make adjustment to variance.
+                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+                // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+                for (int iRow = 1; iRow < numSelectedParams; iRow++)
+                {
+                    for (int iCol = 0; iCol <= iRow; iCol++)
+                    {
+                        float entry = (float)invers[iRow, numSelectedParams - iCol - 1];
+                        AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues);
+                    }
+                }
+            }
+
+            for (int i = 1; i < numSelectedParams; i++)
+                stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+            return new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices);
+        }
+    }
 }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
index bd69453d5a..1eeb043c01 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
@@ -2,17 +2,16 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
-using System.Collections.Generic;
-using System.ComponentModel;
-using System.IO;
-using System.Linq;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Runtime.Internal.CpuMath;
 using Microsoft.ML.Runtime.Internal.Utilities;
 using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Runtime.Model;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
 
 // This is for deserialization from a model repository.
 [assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
@@ -84,13 +83,13 @@ private static VersionInfo GetVersionInfo()
         // of the variance-covariance matrix.
         private readonly VBuffer<Single>? _coeffStdError;
 
-        public long TrainingExampleCount { get { return _trainingExampleCount; } }
+        public long TrainingExampleCount => _trainingExampleCount;
 
-        public Single Deviance { get { return _deviance; } }
+        public Single Deviance => _deviance;
 
-        public Single NullDeviance { get { return _nullDeviance; } }
+        public Single NullDeviance => _nullDeviance;
 
-        public int ParametersCount { get { return _paramCount; } }
+        public int ParametersCount => _paramCount;
 
         internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
         {
@@ -111,7 +110,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
             _coeffStdError = coeffStdError;
         }
 
-        public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
+        internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
         {
             Contracts.CheckValue(env, nameof(env));
             _env = env;
@@ -157,7 +156,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
             _coeffStdError = new VBuffer<Single>(length, _paramCount, stdErrorValues, stdErrorIndices);
         }
 
-        public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
+        internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
         {
             Contracts.CheckValue(env, nameof(env));
             env.CheckValue(ctx, nameof(ctx));
@@ -209,6 +208,9 @@ private void SaveCore(ModelSaveContext ctx)
             ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.GetIndices());
         }
 
+        /// <summary>
+        /// Computes the standart deviation, Z-Score and p-Value.
+        /// </summary>
         public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue)
         {
             if (!stats._coeffStdError.HasValue)
@@ -223,7 +225,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
             stdError = stats._coeffStdError.Value.Values[0];
             Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
             zScore = bias / stdError;
-            pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
+            pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
             return true;
         }
 
diff --git a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
index 6d58cb8d2d..4bd1c57233 100644
--- a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
+++ b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
@@ -13,3 +13,15 @@ Count of training examples:	32561
 Residual Deviance:         	26705.74
 Null Deviance:             	35948.08
 AIC:                       	26719.74
+
+Coefficients statistics:
+Coefficient    	Estimate	Std. Error	z value  	Pr(>|z|)
+(Bias)         	-8.228298 	0.1161297 	-70.85435 	0 ***
+education-num  	5.066041  	0.1048074 	48.33666  	0 ***
+capital-gain   	18.58347  	0.4694776 	39.5833   	0 ***
+age            	3.86064   	0.1061118 	36.38277  	0 ***
+hours-per-week 	3.946534  	0.1258723 	31.35349  	0 ***
+capital-loss   	2.81616   	0.13793   	20.41732  	0 ***
+fnlwgt         	0.7489593 	0.2048056 	3.656927  	0.0002553463 ***
+---
+Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
index 5c5d36e4b6..057ef0ff87 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
@@ -5,6 +5,14 @@
 #@   col={name={Residual Deviance} type=R4 src=1}
 #@   col={name={Null Deviance} type=R4 src=2}
 #@   col=AIC:R4:3
+#@   col=BiasEstimate:R4:4
+#@   col=BiasStandardError:R4:5
+#@   col=BiasZScore:R4:6
+#@   col=BiasPValue:R4:7
+#@   col=Estimate:R4:8-16
+#@   col=StandardError:R4:17-25
+#@   col=ZScore:R4:26-34
+#@   col=PValue:R4:35-43
 #@ }
-Count of training examples	Residual Deviance	Null Deviance	AIC
-521	98.29433	669.0935	118.294327
+Count of training examples	Residual Deviance	Null Deviance	AIC	BiasEstimate	BiasStandardError	BiasZScore	BiasPValue	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1
+521	98.29433	669.0935	118.294327	-5.120674	0.699818552	-7.31714535	0	2.353567	1.78653753	1.9442488	1.38072	1.0831089	2.43588924	1.61141682	1.34575915	-0.7715381	0.4267568	0.42040658	0.41370967	0.482155383	0.456691444	0.451504	0.4605175	0.478413582	0.342069477	5.5150075	4.249547	4.69954872	2.86364126	2.37164259	5.395056	3.4991436	2.81296182	-2.255501	5.96046448E-08	2.14576721E-05	2.62260437E-06	0.00418818	0.0177091956	5.96046448E-08	0.000466823578	0.00490885973	0.0241017938
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
index 152e94f64d..dbb2224574 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
@@ -5,6 +5,14 @@
 #@   col={name={Residual Deviance} type=R4 src=1}
 #@   col={name={Null Deviance} type=R4 src=2}
 #@   col=AIC:R4:3
+#@   col=BiasEstimate:R4:4
+#@   col=BiasStandardError:R4:5
+#@   col=BiasZScore:R4:6
+#@   col=BiasPValue:R4:7
+#@   col=Estimate:R4:8-16
+#@   col=StandardError:R4:17-25
+#@   col=ZScore:R4:26-34
+#@   col=PValue:R4:35-43
 #@ }
-Count of training examples	Residual Deviance	Null Deviance	AIC
-520	94.1969452	673.3445	114.196945
+Count of training examples	Residual Deviance	Null Deviance	AIC	BiasEstimate	BiasStandardError	BiasZScore	BiasPValue	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1	Features.thickness	Features.uniform_size	Features.uniform_shape	Features.adhesion	Features.epit_size	Features.bare_nuclei	Features.bland_chromatin	Features.normal_nucleoli	Cat.1
+520	94.1969452	673.3445	114.196945	-4.860323	0.712811947	-6.81852055	0	2.143086	1.49418533	1.71121442	1.38318741	0.883200347	3.16845965	1.38684654	1.51904845	-0.8226236	0.430655479	0.4099987	0.4222687	0.4832917	0.457050323	0.457937717	0.445124656	0.4728626	0.338379949	4.976335	3.64436626	4.05243	2.86201358	1.93239188	6.918975	3.11563635	3.21245217	-2.43106484	6.556511E-07	0.0002681017	5.07235527E-05	0.00420969725	0.05331099	0	0.00183564425	0.00131618977	0.0150545239
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
index beeec64d77..d89d7a7619 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
@@ -14,6 +14,16 @@ Count of training examples: 521
 Residual Deviance: 98.29433
 Null Deviance: 669.0935
 AIC: 118.2943
+(Bias): System.Single[]
+Features.thickness: System.Single[]
+Features.bare_nuclei: System.Single[]
+Features.uniform_shape: System.Single[]
+Features.uniform_size: System.Single[]
+Features.bland_chromatin: System.Single[]
+Features.adhesion: System.Single[]
+Features.normal_nucleoli: System.Single[]
+Features.epit_size: System.Single[]
+Cat.1: System.Single[]
 Partition model 1 summary:
 Per-feature gain summary for the boosted tree ensemble:
 Features.uniform_size: 1
@@ -43,6 +53,16 @@ Count of training examples: 520
 Residual Deviance: 94.19695
 Null Deviance: 673.3445
 AIC: 114.1969
+(Bias): System.Single[]
+Features.bare_nuclei: System.Single[]
+Features.thickness: System.Single[]
+Features.uniform_shape: System.Single[]
+Features.uniform_size: System.Single[]
+Features.normal_nucleoli: System.Single[]
+Features.bland_chromatin: System.Single[]
+Features.adhesion: System.Single[]
+Features.epit_size: System.Single[]
+Cat.1: System.Single[]
 Partition model 3 summary:
 Per-feature gain summary for the boosted tree ensemble:
 Features.uniform_size: 1
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
index 50abe9df54..fadb2e27c8 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
@@ -17,6 +17,21 @@ Count of training examples:	521
 Residual Deviance:         	98.29433
 Null Deviance:             	669.0935
 AIC:                       	118.2943
+
+Coefficients statistics:
+Coefficient    	Estimate	Std. Error	z value  	Pr(>|z|)
+(Bias)         	-5.120674 	0.6998186 	-7.317145 	0 ***
+Features.thickness	2.353567  	0.4267568 	5.515007  	5.960464E-08 ***
+Features.bare_nuclei	2.435889  	0.451504  	5.395056  	5.960464E-08 ***
+Features.uniform_shape	1.944249  	0.4137097 	4.699549  	2.622604E-06 ***
+Features.uniform_size	1.786538  	0.4204066 	4.249547  	2.145767E-05 ***
+Features.bland_chromatin	1.611417  	0.4605175 	3.499144  	0.0004668236 ***
+Features.adhesion	1.38072   	0.4821554 	2.863641  	0.00418818 **
+Features.normal_nucleoli	1.345759  	0.4784136 	2.812962  	0.00490886 **
+Features.epit_size	1.083109  	0.4566914 	2.371643  	0.0177092 *
+Cat.1          	-0.7715381	0.3420695 	-2.255501 	0.02410179 *
+---
+Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 Partition model 1 summary:
 
 Per-feature gain summary for the boosted tree ensemble:
@@ -50,6 +65,21 @@ Count of training examples:	520
 Residual Deviance:         	94.19695
 Null Deviance:             	673.3445
 AIC:                       	114.1969
+
+Coefficients statistics:
+Coefficient    	Estimate	Std. Error	z value  	Pr(>|z|)
+(Bias)         	-4.860323 	0.7128119 	-6.818521 	0 ***
+Features.bare_nuclei	3.16846   	0.4579377 	6.918975  	0 ***
+Features.thickness	2.143086  	0.4306555 	4.976335  	6.556511E-07 ***
+Features.uniform_shape	1.711214  	0.4222687 	4.05243   	5.072355E-05 ***
+Features.uniform_size	1.494185  	0.4099987 	3.644366  	0.0002681017 ***
+Features.normal_nucleoli	1.519048  	0.4728626 	3.212452  	0.00131619 **
+Features.bland_chromatin	1.386847  	0.4451247 	3.115636  	0.001835644 **
+Features.adhesion	1.383187  	0.4832917 	2.862014  	0.004209697 **
+Features.epit_size	0.8832003 	0.4570503 	1.932392  	0.05331099 .
+Cat.1          	-0.8226236	0.3383799 	-2.431065 	0.01505452 *
+---
+Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 Partition model 3 summary:
 
 Per-feature gain summary for the boosted tree ensemble:
diff --git a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
index 8e04238c73..c467f102be 100644
--- a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
@@ -5,6 +5,14 @@
 #@   col={name={Residual Deviance} type=R4 src=1}
 #@   col={name={Null Deviance} type=R4 src=2}
 #@   col=AIC:R4:3
+#@   col=BiasEstimate:R4:4
+#@   col=BiasStandardError:R4:5
+#@   col=BiasZScore:R4:6
+#@   col=BiasPValue:R4:7
+#@   col=Estimate:R4:8-16
+#@   col=StandardError:R4:17-25
+#@   col=ZScore:R4:26-34
+#@   col=PValue:R4:35-43
 #@ }
-Count of training examples	Residual Deviance	Null Deviance	AIC
-683	126.83107	884.350159	146.83107
+Count of training examples	Residual Deviance	Null Deviance	AIC	BiasEstimate	BiasStandardError	BiasZScore	BiasPValue	thickness	uniform_size	uniform_shape	adhesion	epit_size	bare_nuclei	bland_chromatin	normal_nucleoli	mitoses	thickness	uniform_size	uniform_shape	adhesion	epit_size	bare_nuclei	bland_chromatin	normal_nucleoli	mitoses	thickness	uniform_size	uniform_shape	adhesion	epit_size	bare_nuclei	bland_chromatin	normal_nucleoli	mitoses	thickness	uniform_size	uniform_shape	adhesion	epit_size	bare_nuclei	bland_chromatin	normal_nucleoli	mitoses
+683	126.83107	884.350159	146.83107	-6.186806	0.459383339	-13.4676332	0	2.65800762	1.68089855	1.944068	1.42514718	0.8536965	2.9325006	1.74816787	1.58165014	0.595681	0.455618978	0.429146379	0.431570023	0.479817748	0.470442533	0.4381438	0.469593167	0.4714128	0.467883229	5.83383846	3.916842	4.504641	2.97018433	1.814667	6.69301	3.72272849	3.35512757	1.27314031	0	8.9764595E-05	6.67572E-06	0.002976358	0.06957501	0	0.00019711256	0.0007933974	0.202968419
diff --git a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
index 614166c904..273028ddaa 100644
--- a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
+++ b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
@@ -535,41 +535,52 @@ private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchColl
                 double f1 = double.Parse(firstCollection[i].ToString());
                 double f2 = double.Parse(secondCollection[i].ToString());
 
-                // this follows the IEEE recommendations for how to compare floating point numbers
-                double allowedVariance = Math.Pow(10, -digitsOfPrecision);
-                double delta = Round(f1, digitsOfPrecision) - Round(f2, digitsOfPrecision);
-                // limitting to the digits we care about. 
-                delta = Math.Round(delta, digitsOfPrecision);
-
-                bool inRange = delta > -allowedVariance && delta < allowedVariance;
-
-                // for some cases, rounding up is not beneficial
-                // so checking on whether the difference is significant prior to rounding, and failing only then. 
-                // example, for 5 digits of precision. 
-                // F1 = 1.82844949 Rounds to 1.8284
-                // F2 = 1.8284502  Rounds to 1.8285
-                // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers 
-                // (1.82844949 - 1.8284502) = -0.00000071
+                if(!CompareNumbersWithTolerance(f1, f2, i, digitsOfPrecision))
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        public bool CompareNumbersWithTolerance(double expected, double actual, int? iterationOnCollection = null, int digitsOfPrecision = DigitsOfPrecision)
+        {
+            // this follows the IEEE recommendations for how to compare floating point numbers
+            double allowedVariance = Math.Pow(10, -digitsOfPrecision);
+            double delta = Round(expected, digitsOfPrecision) - Round(actual, digitsOfPrecision);
+            // limitting to the digits we care about. 
+            delta = Math.Round(delta, digitsOfPrecision);
+
+            bool inRange = delta > -allowedVariance && delta < allowedVariance;
+
+            // for some cases, rounding up is not beneficial
+            // so checking on whether the difference is significant prior to rounding, and failing only then. 
+            // example, for 5 digits of precision. 
+            // F1 = 1.82844949 Rounds to 1.8284
+            // F2 = 1.8284502  Rounds to 1.8285
+            // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers 
+            // (1.82844949 - 1.8284502) = -0.00000071
 
                 double delta2 = 0;
                 if (!inRange)
                 {
-                    delta2 = Math.Round(f1 - f2, digitsOfPrecision);
+                    delta2 = Math.Round(expected - actual, digitsOfPrecision);
                     inRange = delta2 >= -allowedVariance && delta2 <= allowedVariance;
                 }
 
                 if (!inRange)
                 {
-                    Fail(_allowMismatch, $"Output and baseline mismatch at line {i}." + Environment.NewLine +
-                        $"Values to compare are {firstCollection[i]} and {secondCollection[i]}" + Environment.NewLine +
-                        $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine +
-                        $"\t delta: {delta}" + Environment.NewLine +
-                        $"\t delta2: {delta2}" + Environment.NewLine);
-                    return false;
+                    var message = iterationOnCollection != null ? "" : $"Output and baseline mismatch at line {iterationOnCollection}." + Environment.NewLine;
+
+                    Fail(_allowMismatch, message +
+                            $"Values to compare are {expected} and {actual}" + Environment.NewLine +
+                            $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine +
+                            $"\t delta: {delta}" + Environment.NewLine +
+                            $"\t delta2: {delta2}" + Environment.NewLine);
                 }
-            }
 
-            return true;
+            return inRange;
         }
 
         private static double Round(double value, int digitsOfPrecision)
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
index dc5950f8d8..30906c8940 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
@@ -4,6 +4,7 @@
 
 using Microsoft.ML.Core.Data;
 using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Calibration;
 using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Trainers;
 using Xunit;
@@ -38,5 +39,41 @@ public void TestEstimatorPoissonRegression()
             TestEstimatorCore(pipe, dataView);
             Done();
         }
+
+        [Fact]
+        public void TestLogisticRegressionStats()
+        {
+            (IEstimator<ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();
+
+            pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { s.ShowTrainingStats = true; }));
+            var transformerChain = pipe.Fit(dataView) as TransformerChain<BinaryPredictionTransformer<ParameterMixingCalibratedPredictor>>;
+
+            var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
+            var stats = linearModel.Statistics;
+            LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);
+
+            CompareNumbersWithTolerance(stdError, 0.250672936);
+            CompareNumbersWithTolerance(zScore, 7.97852373);
+        }
+
+        [Fact]
+        public void TestLogisticRegressionStats_MKL()
+        {
+            (IEstimator<ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();
+
+            pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => {
+                s.ShowTrainingStats = true;
+                s.StdComputer = new ComputeLRTrainingStdThroughHal();
+            }));
+
+            var transformerChain = pipe.Fit(dataView) as TransformerChain<BinaryPredictionTransformer<ParameterMixingCalibratedPredictor>>;
+
+            var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
+            var stats = linearModel.Statistics;
+            LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);
+
+            CompareNumbersWithTolerance(stdError, 0.250672936);
+            CompareNumbersWithTolerance(zScore, 7.97852373);
+        }
     }
 }
diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
index c7ee67537a..c4bd2fe38c 100644
--- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
+++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
@@ -18,7 +18,7 @@ public sealed class InstanceInitializerAnalyzer : DiagnosticAnalyzer
         internal const string DiagnosticId = "MSML_NoInstanceInitializers";
 
         private const string Title = "No initializers on instance fields or properties";
-        private const string Format = "Member {0} has a {1} initialier outside the constructor";
+        private const string Format = "Member {0} has a {1} initializer outside the constructor";
 
         private static DiagnosticDescriptor Rule =
             new DiagnosticDescriptor(DiagnosticId, Title, Format, Category,