dotnet · sfilipi · Nov 13, 2018 · Oct 26, 2018 · Oct 30, 2018 · Oct 31, 2018
diff --git a/build/Dependencies.props b/build/Dependencies.props
@@ -22,6 +22,7 @@
     <SystemIOFileSystemAccessControl>4.5.0</SystemIOFileSystemAccessControl>
     <SystemSecurityPrincipalWindows>4.5.0</SystemSecurityPrincipalWindows>
     <TensorFlowVersion>1.10.0</TensorFlowVersion>
+    <MathNumericPackageVersion>4.6.0</MathNumericPackageVersion>
   </PropertyGroup>
 
   <!-- Code Analyzer Dependencies -->

diff --git a/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs b/src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs
@@ -0,0 +1,94 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Utilities;
+using Microsoft.ML.Trainers.HalLearners;
+using System;
+
+namespace Microsoft.ML.Runtime.Learners
+{
+    using Mkl = OlsLinearRegressionTrainer.Mkl;
+
+    public static class LogisticRegressionTrainingStats
+    {
+        /// <summary>
+        /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
+        /// p-value and z-Score.
+        /// This function performs the same calculations as <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/> but it is faster than it, because it makes use of Intel's MKL.
+        /// </summary>
+        /// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param>
+        /// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
+        /// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
+        public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
+            Contracts.Assert(l2Weight > 0);
+
+            int numSelectedParams = model.Statistics.ParametersCount;
+
+            // Apply Cholesky Decomposition to find the inverse of the Hessian.
+            Double[] invHessian = null;
+            try
+            {
+                // First, find the Cholesky decomposition LL' of the Hessian.
+                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
+                // Note that hessian is already modified at this point. It is no longer the original Hessian,
+                // but instead represents the Cholesky decomposition L.
+                // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
+                // of the original information matrix.
+                Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
+                // At this point, hessian should contain the inverse of the original Hessian matrix.
+                // Swap hessian with invHessian to avoid confusion in the following context.
+                Utils.Swap(ref model.Statistics.Hessian, ref invHessian);
+                Contracts.Assert(model.Statistics.Hessian == null);
+            }
+            catch (DllNotFoundException)
+            {
+                throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
+            }
+
+            float[] stdErrorValues = new float[numSelectedParams];
+            stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);
+
+            for (int i = 1; i < numSelectedParams; i++)
+            {
+                // Initialize with inverse Hessian.
+                stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
+            }
+
+            if (l2Weight > 0)
+            {
+                // Iterate through all entries of inverse Hessian to make adjustment to variance.
+                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+                // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+                int ioffset = 1;
+                for (int iRow = 1; iRow < numSelectedParams; iRow++)
+                {
+                    for (int iCol = 0; iCol <= iRow; iCol++)
+                    {
+                        var entry = (Single)invHessian[ioffset];
+                        var adjustment = -l2Weight * entry * entry;
+                        stdErrorValues[iRow] -= adjustment;
+                        if (0 < iCol && iCol < iRow)
+                            stdErrorValues[iCol] -= adjustment;
+                        ioffset++;
+                    }
+                }
+
+                Contracts.Assert(ioffset == invHessian.Length);
+            }
+
+            for (int i = 1; i < numSelectedParams; i++)
+                stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+            // currentWeights vector size is Weights2 + the bias
+            var currentWeightsCount = model.Weights2.Count + 1;
+            VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices);
+            model.Statistics.SetCoeffStdError(stdErrors);
+        }
+    }
+}
diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
@@ -1,11 +1,15 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
     <IncludeInPackage>Microsoft.ML</IncludeInPackage>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
+  <ItemGroup>
+    <PackageReference Include="MathNet.Numerics.Signed" Version="$(MathNumericPackageVersion)" />
 <PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftJsonPackageVersion)" /> 
 <PackageReference Include="System.Reflection.Emit.Lightweight" Version="$(SystemReflectionEmitLightweightPackageVersion)" /> 
 <PackageReference Include="System.Threading.Tasks.Dataflow" Version="$(SystemThreadingTasksDataflowPackageVersion)" /> 
 <PackageReference Include="System.CodeDom" Version="$(SystemCodeDomPackageVersion)" /> 
 <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" /> 
 <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" /> 
 <PackageReference Include="System.ComponentModel.Composition" Version="$(SystemComponentModelCompositionVersion)" /> 
 <PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftJsonPackageVersion)" /> 
 <PackageReference Include="System.Reflection.Emit.Lightweight" Version="$(SystemReflectionEmitLightweightPackageVersion)" /> 
 <PackageReference Include="System.Threading.Tasks.Dataflow" Version="$(SystemThreadingTasksDataflowPackageVersion)" /> 
 <PackageReference Include="System.CodeDom" Version="$(SystemCodeDomPackageVersion)" /> 
 <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" /> 
 <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" /> 
 <PackageReference Include="System.ComponentModel.Composition" Version="$(SystemComponentModelCompositionVersion)" /> 
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />

diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs
@@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight
             [Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)]
             public bool EnforceNonNegativity = Defaults.EnforceNonNegativity;
 
-            internal static class Defaults
+            public static class Defaults
             {
-                internal const float L2Weight = 1;
-                internal const float L1Weight = 1;
-                internal const float OptTol = 1e-7f;
-                internal const int MemorySize = 20;
-                internal const int MaxIterations = int.MaxValue;
-                internal const bool EnforceNonNegativity = false;
+                public const float L2Weight = 1;
+                public const float L1Weight = 1;
+                public const float OptTol = 1e-7f;
+                public const int MemorySize = 20;
+                public const int MaxIterations = int.MaxValue;
+                public const bool EnforceNonNegativity = false;
             }
         }
 
@@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu
         }
 
         protected virtual int ClassCount => 1;
-        protected int BiasCount => ClassCount;
+        public int BiasCount => ClassCount;
         protected int WeightCount => ClassCount * NumFeatures;
         protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory,
             out VBuffer<float> init, out ITerminationCriterion terminationCriterion)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -44,7 +44,7 @@ public sealed class Arguments : ArgumentsBase
             public bool ShowTrainingStats = false;
         }
 
-        private Double _posWeight;
+        private double _posWeight;
         private LinearModelStatistics _stats;
 
         /// <summary>
@@ -329,8 +329,9 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
                     }
                 }
             }
-
             _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+            _stats.Hessian = hessian;
+            _stats.WeightIndices = weightIndices;
         }
 
         protected override void ProcessPriorDistribution(float label, float weight)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
@@ -2,17 +2,17 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
-using System.Collections.Generic;
-using System.ComponentModel;
-using System.IO;
-using System.Linq;
+using MathNet.Numerics.LinearAlgebra;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Runtime.Internal.CpuMath;
 using Microsoft.ML.Runtime.Internal.Utilities;
 using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Runtime.Model;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
 
 // This is for deserialization from a model repository.
 [assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
@@ -82,15 +82,20 @@ private static VersionInfo GetVersionInfo()
         // It could be null when there are too many non-zero weights so that
         // the memory is insufficient to hold the Hessian matrix necessary for the computation
         // of the variance-covariance matrix.
-        private readonly VBuffer<Single>? _coeffStdError;
+        private VBuffer<Single>? _coeffStdError;
+
+        public long TrainingExampleCount => _trainingExampleCount;
 
-        public long TrainingExampleCount { get { return _trainingExampleCount; } }
+        public Single Deviance => _deviance;
 
-        public Single Deviance { get { return _deviance; } }
+        public Single NullDeviance => _nullDeviance;
 
-        public Single NullDeviance { get { return _nullDeviance; } }
+        public int ParametersCount => _paramCount;
 
-        public int ParametersCount { get { return _paramCount; } }
+        public Double[] Hessian;
+
+        // Indices of bias and non-zero weight slots.
+        public int[] WeightIndices;
 
         internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
         {
@@ -111,7 +116,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
             _coeffStdError = coeffStdError;
         }
 
-        public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
+        internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
         {
             Contracts.CheckValue(env, nameof(env));
             _env = env;
@@ -157,7 +162,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
             _coeffStdError = new VBuffer<Single>(length, _paramCount, stdErrorValues, stdErrorIndices);
         }
 
-        public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
+        internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
         {
             Contracts.CheckValue(env, nameof(env));
             env.CheckValue(ctx, nameof(ctx));
@@ -208,6 +213,10 @@ private void SaveCore(ModelSaveContext ctx)
             ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.Indices, _paramCount);
         }
 
+        /// <summary>
+        /// Computes the standart deviation, Z-Score and p-Value.
+        /// Should be called after <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/>.
+        /// </summary>
         public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue)
         {
             if (!stats._coeffStdError.HasValue)
@@ -222,10 +231,97 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
             stdError = stats._coeffStdError.Value.Values[0];
             Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
             zScore = bias / stdError;
-            pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
+            pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
             return true;
         }
 
+        /// <summary>
+        /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
+        /// p-value and z-Score.
+        /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+        /// </summary>
+        /// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param>
+        /// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
+        /// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
+        public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
+            Contracts.Assert(l2Weight > 0);
+
+            int numSelectedParams = model.Statistics.ParametersCount;
+
+            double[] hessian = model.Statistics.Hessian;
+            double[,] matrixHessian = new double[numSelectedParams, numSelectedParams];
+
+            int hessianLength = 0;
+            int dimention = numSelectedParams - 1;
+
+            for (int row = dimention; row >= 0; row--)
+            {
+                for (int col = 0; col <= dimention; col++)
+                {
+                    if ((row + col) <= dimention)
+                    {
+                        if ((row + col) == dimention)
+                        {
+                            matrixHessian[row, col] = hessian[hessianLength];
+                        }
+                        else
+                        {
+                            matrixHessian[row, col] = hessian[hessianLength];
+                            matrixHessian[dimention - col, dimention - row] = hessian[hessianLength];
+                        }
+                        hessianLength++;
+                    }
+                    else
+                        continue;
+                }
+            }
+
+            var h = Matrix<double>.Build.DenseOfArray(matrixHessian);
+            var invers = h.Inverse();
+
+            float[] stdErrorValues2 = new float[numSelectedParams];
+            stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]);
+
+            for (int i = 1; i < numSelectedParams; i++)
+            {
+                // Initialize with inverse Hessian.
+                // The diagonal of the inverse Hessian.
+                stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1];
+            }
+
+            if (l2Weight > 0)
+            {
+                // Iterate through all entries of inverse Hessian to make adjustment to variance.
+                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
+                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
+                int ioffset = 1;
+                for (int iRow = 1; iRow < numSelectedParams; iRow++)
+                {
+                    for (int iCol = 0; iCol <= iRow; iCol++)
+                    {
+                        float entry = (float)invers[iRow, numSelectedParams - iCol - 1];
+                        var adjustment = -l2Weight * entry * entry;
+                        stdErrorValues2[iRow] -= adjustment;
+
+                        if (0 < iCol && iCol < iRow)
+                            stdErrorValues2[iCol] -= adjustment;
+                        ioffset++;
+                    }
+                }
+            }
+
+            for (int i = 1; i < numSelectedParams; i++)
+                stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]);
+
+            var currentWeightsCount = model.Weights2.Count + 1; // adding one for the bias
+            VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues2, model.Statistics.WeightIndices);
+            model.Statistics.SetCoeffStdError(stdErrors);
+        }
+
         private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer<Single> weights, in VBuffer<ReadOnlyMemory<char>> names,
             ref VBuffer<Single> estimate, ref VBuffer<Single> stdErr, ref VBuffer<Single> zScore, ref VBuffer<Single> pValue, out ValueGetter<VBuffer<ReadOnlyMemory<char>>> getSlotNames)
         {
@@ -285,6 +381,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat
                 };
         }
 
+        public void SetCoeffStdError(VBuffer<Single> coeffStdError)
+        {
+            _env.Assert(coeffStdError.Count == _paramCount);
+            _coeffStdError = coeffStdError;
+        }
+
         private IEnumerable<CoefficientStatistics> GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema)
         {
             Contracts.AssertValue(_env);