dotnet · sfilipi · Nov 13, 2018 · Oct 26, 2018 · Oct 30, 2018 · Oct 31, 2018
diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs
@@ -0,0 +1,87 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Utilities;
+using Microsoft.ML.Trainers.HalLearners;
+using System;
+
+namespace Microsoft.ML.Runtime.Learners
+{
+    using Mkl = OlsLinearRegressionTrainer.Mkl;
+
+    /// <include file='doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
+    /// <include file='doc.xml' path='docs/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
+    public static class LogisticRegressionTrainingStats
+    {
+
+        public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
+            Contracts.Assert(l2Weight > 0);
+
+            int numSelectedParams = model.Statistics.ParametersCount;
+
+            // Apply Cholesky Decomposition to find the inverse of the Hessian.
+            Double[] invHessian = null;
+            try
+            {
+                // First, find the Cholesky decomposition LL' of the Hessian.
+                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
+                // Note that hessian is already modified at this point. It is no longer the original Hessian,
+                // but instead represents the Cholesky decomposition L.
+                // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
+                // of the original information matrix.
+                Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
+                // At this point, hessian should contain the inverse of the original Hessian matrix.
+                // Swap hessian with invHessian to avoid confusion in the following context.
+                Utils.Swap(ref model.Statistics.Hessian, ref invHessian);
+                Contracts.Assert(model.Statistics.Hessian == null);
+            }
+            catch (DllNotFoundException)
+            {
+                throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
+            }
+
+            float[] stdErrorValues = new float[numSelectedParams];
+            stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);
+
+            for (int i = 1; i < numSelectedParams; i++)
+            {
+                // Initialize with inverse Hessian.
+                stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
+            }
+
+            if (l2Weight > 0)
+            {
+                // Iterate through all entries of inverse Hessian to make adjustment to variance.
+                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
-                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
+                // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+``` #Resolved
-                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
+                // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+``` #Resolved
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
-                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+``` #Resolved
-                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+``` #Resolved
+                int ioffset = 1;
+                for (int iRow = 1; iRow < numSelectedParams; iRow++)
+                {
+                    for (int iCol = 0; iCol <= iRow; iCol++)
+                    {
+                        var entry = (Single)invHessian[ioffset];
+                        var adjustment = -l2Weight * entry * entry;
+                        stdErrorValues[iRow] -= adjustment;
+                        if (0 < iCol && iCol < iRow)
+                            stdErrorValues[iCol] -= adjustment;
+                        ioffset++;
+                    }
+                }
+
+                Contracts.Assert(ioffset == invHessian.Length);
+            }
+
+            for (int i = 1; i < numSelectedParams; i++)
+                stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+           VBuffer<float> stdErrors = new VBuffer<float>(model.Weights2.Count, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices);
+            model.Statistics.SetCoeffStdError(stdErrors);
+        }
+    }
+}
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs
@@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight
             [Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)]
             public bool EnforceNonNegativity = Defaults.EnforceNonNegativity;
 
-            internal static class Defaults
+            public static class Defaults
             {
-                internal const float L2Weight = 1;
-                internal const float L1Weight = 1;
-                internal const float OptTol = 1e-7f;
-                internal const int MemorySize = 20;
-                internal const int MaxIterations = int.MaxValue;
-                internal const bool EnforceNonNegativity = false;
+                public const float L2Weight = 1;
+                public const float L1Weight = 1;
+                public const float OptTol = 1e-7f;
+                public const int MemorySize = 20;
+                public const int MaxIterations = int.MaxValue;
+                public const bool EnforceNonNegativity = false;
             }
         }
 
@@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu
         }
 
         protected virtual int ClassCount => 1;
-        protected int BiasCount => ClassCount;
+        public int BiasCount => ClassCount;
         protected int WeightCount => ClassCount * NumFeatures;
         protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory,
             out VBuffer<float> init, out ITerminationCriterion terminationCriterion)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -44,7 +44,7 @@ public sealed class Arguments : ArgumentsBase
             public bool ShowTrainingStats = false;
         }
 
-        private Double _posWeight;
+        private double _posWeight;
         private LinearModelStatistics _stats;
 
         /// <summary>
@@ -329,8 +329,9 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
                     }
                 }
             }
-
             _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+            _stats.Hessian = hessian;
+            _stats.WeightIndices = weightIndices;
         }
 
         protected override void ProcessPriorDistribution(float label, float weight)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
@@ -2,17 +2,16 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
-using System.Collections.Generic;
-using System.ComponentModel;
-using System.IO;
-using System.Linq;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Runtime.Internal.CpuMath;
 using Microsoft.ML.Runtime.Internal.Utilities;
 using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Runtime.Model;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
 
 // This is for deserialization from a model repository.
 [assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
@@ -82,17 +81,22 @@ private static VersionInfo GetVersionInfo()
         // It could be null when there are too many non-zero weights so that
         // the memory is insufficient to hold the Hessian matrix necessary for the computation
         // of the variance-covariance matrix.
-        private readonly VBuffer<Single>? _coeffStdError;
+        private VBuffer<Single>? _coeffStdError;
 
-        public long TrainingExampleCount { get { return _trainingExampleCount; } }
+        public long TrainingExampleCount => _trainingExampleCount;
 
-        public Single Deviance { get { return _deviance; } }
+        public Single Deviance => _deviance;
 
-        public Single NullDeviance { get { return _nullDeviance; } }
+        public Single NullDeviance => _nullDeviance;
 
-        public int ParametersCount { get { return _paramCount; } }
+        public int ParametersCount => _paramCount;
 
-        internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
+        public Double[] Hessian;
+
+        // Indices of bias and non-zero weight slots.
+        public int[] WeightIndices;
+
+        public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
         {
             Contracts.AssertValue(env);
             env.Assert(trainingExampleCount > 0);
@@ -104,7 +108,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
             _nullDeviance = nullDeviance;
         }
 
-        internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer<Single> coeffStdError)
+        public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer<Single> coeffStdError)
             : this(env, trainingExampleCount, paramCount, deviance, nullDeviance)
         {
             _env.Assert(coeffStdError.Count == _paramCount);
@@ -222,7 +226,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
             stdError = stats._coeffStdError.Value.Values[0];
             Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
             zScore = bias / stdError;
-            pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
+            pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
             return true;
         }
 
@@ -285,6 +289,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat
                 };
         }
 
+        public void SetCoeffStdError(VBuffer<Single> coeffStdError)
+        {
+            _env.Assert(coeffStdError.Count == _paramCount);
+            _coeffStdError = coeffStdError;
+        }
+
         private IEnumerable<CoefficientStatistics> GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema)
         {
             Contracts.AssertValue(_env);

diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
@@ -4,6 +4,7 @@
 
 using Microsoft.ML.Core.Data;
 using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Calibration;
 using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Trainers;
 using Xunit;
@@ -38,5 +39,33 @@ public void TestEstimatorPoissonRegression()
             TestEstimatorCore(pipe, dataView);
             Done();
         }
+
+        [Fact]
+        public void TestLogisticRegressionStats()
+        {
+            (IEstimator<ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();
+
+            pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; }));
+            var transformerChain = pipe.Fit(dataView) as TransformerChain<BinaryPredictionTransformer<ParameterMixingCalibratedPredictor>>;
+
+            var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
+            var stats = linearModel.Statistics;
+
+            LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);
+
+            Assert.Equal(0.0f, stdError);
+            Assert.Equal(0.0f, zScore);
+            Assert.Equal(0.0f, pValue);
+
+            using (var ch = Env.Start("Calcuating STD for LR."))
+                linearModel.ComputeExtendedTrainingStatistics(ch);
+
+            LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue);
+
+            Assert.True(stdError > 0);
+            Assert.True(zScore > 0);
+
+            Done();
+        }
     }
 }