diff --git a/build/Dependencies.props b/build/Dependencies.props
index 7a79b3a087..47f34e0e1e 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -9,6 +9,7 @@
4.3.0
4.8.0
4.5.0
+ 4.6.0
diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
index 75517c587e..f479d0e970 100644
--- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
+++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
@@ -8,6 +8,7 @@
+
diff --git a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs
new file mode 100644
index 0000000000..66868c1c9a
--- /dev/null
+++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs
@@ -0,0 +1,92 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Utilities;
+using Microsoft.ML.Trainers.HalLearners;
+using System;
+
+namespace Microsoft.ML.Runtime.Learners
+{
+ using Mkl = OlsLinearRegressionTrainer.Mkl;
+
+ public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd
+ {
+ ///
+ /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
+ /// p-value and z-Score.
+ /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// The used for messaging.
+ /// The L2Weight used for training. (Supply the same one that got used during training.)
+ public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight)
+ {
+ Contracts.AssertValue(ch);
+ Contracts.AssertValue(hessian, nameof(hessian));
+ Contracts.Assert(numSelectedParams > 0);
+ Contracts.Assert(currentWeightsCount > 0);
+ Contracts.Assert(l2Weight > 0);
+
+ // Apply Cholesky Decomposition to find the inverse of the Hessian.
+ Double[] invHessian = null;
+ try
+ {
+ // First, find the Cholesky decomposition LL' of the Hessian.
+ Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian);
+ // Note that hessian is already modified at this point. It is no longer the original Hessian,
+ // but instead represents the Cholesky decomposition L.
+ // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
+ // of the original information matrix.
+ Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian);
+ // At this point, hessian should contain the inverse of the original Hessian matrix.
+ // Swap hessian with invHessian to avoid confusion in the following context.
+ Utils.Swap(ref hessian, ref invHessian);
+ Contracts.Assert(hessian == null);
+ }
+ catch (DllNotFoundException)
+ {
+ throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
+ }
+
+ float[] stdErrorValues = new float[numSelectedParams];
+ stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);
+
+ for (int i = 1; i < numSelectedParams; i++)
+ {
+ // Initialize with inverse Hessian.
+ stdErrorValues[i] = (float)invHessian[i * (i + 1) / 2 + i];
+ }
+
+ if (l2Weight > 0)
+ {
+ // Iterate through all entries of inverse Hessian to make adjustment to variance.
+ // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+ // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+ int ioffset = 1;
+ for (int iRow = 1; iRow < numSelectedParams; iRow++)
+ {
+ for (int iCol = 0; iCol <= iRow; iCol++)
+ {
+ var entry = (float)invHessian[ioffset++];
+ AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues);
+ }
+ }
+
+ Contracts.Assert(ioffset == invHessian.Length);
+ }
+
+ for (int i = 1; i < numSelectedParams; i++)
+ stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+ // currentWeights vector size is Weights2 + the bias
+ return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
index 415752aa8d..671913b203 100644
--- a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
+++ b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs
@@ -6,5 +6,6 @@
using Microsoft.ML;
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)]
+[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)]
[assembly: WantsToBeBestFriends]
diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
index b1624559cd..d1c2fba257 100644
--- a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
+++ b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
@@ -1,4 +1,4 @@
-
+
netstandard2.0
@@ -6,6 +6,10 @@
true
+
+
+
+
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
index 1f5f49fe40..60c81b0ed1 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
+using MathNet.Numerics.LinearAlgebra;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.CommandLine;
@@ -40,11 +41,27 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase
+ /// If set to truetraining statistics will be generated at the end of training.
+ /// If you have a large number of learned training parameters(more than 500),
+ /// generating the training statistics might take a few seconds.
+ /// More than 1000 weights might take a few minutes. For those cases consider using the instance of
+ /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
+ ///
[Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)]
public bool ShowTrainingStats = false;
+
+ ///
+ /// The instance of that computes the training statistics at the end of training.
+ /// If you have a large number of learned training parameters(more than 500),
+ /// generating the training statistics might take a few seconds.
+ /// More than 1000 weights might take a few minutes. For those cases consider using the instance of
+ /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
+ ///
+ public ComputeLRTrainingStd StdComputer;
}
- private Double _posWeight;
+ private double _posWeight;
private LinearModelStatistics _stats;
///
@@ -78,6 +95,9 @@ public LogisticRegression(IHostEnvironment env,
_posWeight = 0;
ShowTrainingStats = Args.ShowTrainingStats;
+
+ if (ShowTrainingStats && Args.StdComputer == null)
+ Args.StdComputer = new ComputeLRTrainingStdImpl();
}
///
@@ -88,6 +108,9 @@ internal LogisticRegression(IHostEnvironment env, Arguments args)
{
_posWeight = 0;
ShowTrainingStats = Args.ShowTrainingStats;
+
+ if (ShowTrainingStats && Args.StdComputer == null)
+ Args.StdComputer = new ComputeLRTrainingStdImpl();
}
public override PredictionKind PredictionKind => PredictionKind.BinaryClassification;
@@ -330,7 +353,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
}
}
- _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+ if (Args.StdComputer == null)
+ _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
+ else
+ {
+ var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Length, ch, L2Weight);
+ _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, std);
+ }
}
protected override void ProcessPriorDistribution(float label, float weight)
@@ -397,4 +426,125 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm
() => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn));
}
}
+
+ ///
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+ /// p-value and z-Score.
+ /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package,
+ /// which makes use of hardware acceleration.
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+ ///
+ public abstract class ComputeLRTrainingStd
+ {
+ ///
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+ /// p-value and z-Score.
+ /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+ ///
+ public abstract VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight);
+
+ ///
+ /// Adjust the variance for regularized cases.
+ ///
+ [BestFriend]
+ internal void AdjustVariance(float inverseEntry, int iRow, int iCol, float l2Weight, float[] stdErrorValues2)
+ {
+ var adjustment = l2Weight * inverseEntry * inverseEntry;
+ stdErrorValues2[iRow] -= adjustment;
+
+ if (0 < iCol && iCol < iRow)
+ stdErrorValues2[iCol] -= adjustment;
+ }
+ }
+
+ ///
+ /// Extends the implementing making use of Math.Net numeric
+ /// If you need faster calculations(have non-sparse weight vectors of more than 300 features), use the instance of ComputeLRTrainingStd from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration
+ /// for those computations.
+ ///
+ public sealed class ComputeLRTrainingStdImpl : ComputeLRTrainingStd
+ {
+ ///
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
+ /// p-value and z-Score.
+ /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// The used for messaging.
+ /// The L2Weight used for training. (Supply the same one that got used during training.)
+ public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight)
+ {
+ Contracts.AssertValue(ch);
+ Contracts.AssertValue(hessian, nameof(hessian));
+ Contracts.Assert(numSelectedParams > 0);
+ Contracts.Assert(currentWeightsCount > 0);
+ Contracts.Assert(l2Weight > 0);
+
+ double[,] matrixHessian = new double[numSelectedParams, numSelectedParams];
+
+ int hessianLength = 0;
+ int dimension = numSelectedParams - 1;
+
+ for (int row = dimension; row >= 0; row--)
+ {
+ for (int col = 0; col <= dimension; col++)
+ {
+ if ((row + col) <= dimension)
+ {
+ if ((row + col) == dimension)
+ {
+ matrixHessian[row, col] = hessian[hessianLength];
+ }
+ else
+ {
+ matrixHessian[row, col] = hessian[hessianLength];
+ matrixHessian[dimension - col, dimension - row] = hessian[hessianLength];
+ }
+ hessianLength++;
+ }
+ else
+ continue;
+ }
+ }
+
+ var h = Matrix.Build.DenseOfArray(matrixHessian);
+ var invers = h.Inverse();
+
+ float[] stdErrorValues = new float[numSelectedParams];
+ stdErrorValues[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]);
+
+ for (int i = 1; i < numSelectedParams; i++)
+ {
+ // Initialize with inverse Hessian.
+ // The diagonal of the inverse Hessian.
+ stdErrorValues[i] = (float)invers[i, numSelectedParams - i - 1];
+ }
+
+ if (l2Weight > 0)
+ {
+ // Iterate through all entries of inverse Hessian to make adjustment to variance.
+ // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
+ // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
+ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
+ for (int iRow = 1; iRow < numSelectedParams; iRow++)
+ {
+ for (int iCol = 0; iCol <= iRow; iCol++)
+ {
+ float entry = (float)invers[iRow, numSelectedParams - iCol - 1];
+ AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues);
+ }
+ }
+ }
+
+ for (int i = 1; i < numSelectedParams; i++)
+ stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);
+
+ return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices);
+ }
+ }
}
diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
index bd69453d5a..1eeb043c01 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
@@ -2,17 +2,16 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
-using System.Collections.Generic;
-using System.ComponentModel;
-using System.IO;
-using System.Linq;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.CpuMath;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Runtime.Model;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
// This is for deserialization from a model repository.
[assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
@@ -84,13 +83,13 @@ private static VersionInfo GetVersionInfo()
// of the variance-covariance matrix.
private readonly VBuffer? _coeffStdError;
- public long TrainingExampleCount { get { return _trainingExampleCount; } }
+ public long TrainingExampleCount => _trainingExampleCount;
- public Single Deviance { get { return _deviance; } }
+ public Single Deviance => _deviance;
- public Single NullDeviance { get { return _nullDeviance; } }
+ public Single NullDeviance => _nullDeviance;
- public int ParametersCount { get { return _paramCount; } }
+ public int ParametersCount => _paramCount;
internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
{
@@ -111,7 +110,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
_coeffStdError = coeffStdError;
}
- public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
+ internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
_env = env;
@@ -157,7 +156,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
_coeffStdError = new VBuffer(length, _paramCount, stdErrorValues, stdErrorIndices);
}
- public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
+ internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
@@ -209,6 +208,9 @@ private void SaveCore(ModelSaveContext ctx)
ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.GetIndices());
}
+ ///
+ /// Computes the standart deviation, Z-Score and p-Value.
+ ///
public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue)
{
if (!stats._coeffStdError.HasValue)
@@ -223,7 +225,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
stdError = stats._coeffStdError.Value.Values[0];
Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
zScore = bias / stdError;
- pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
+ pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
return true;
}
diff --git a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
index 6d58cb8d2d..4bd1c57233 100644
--- a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
+++ b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt
@@ -13,3 +13,15 @@ Count of training examples: 32561
Residual Deviance: 26705.74
Null Deviance: 35948.08
AIC: 26719.74
+
+Coefficients statistics:
+Coefficient Estimate Std. Error z value Pr(>|z|)
+(Bias) -8.228298 0.1161297 -70.85435 0 ***
+education-num 5.066041 0.1048074 48.33666 0 ***
+capital-gain 18.58347 0.4694776 39.5833 0 ***
+age 3.86064 0.1061118 36.38277 0 ***
+hours-per-week 3.946534 0.1258723 31.35349 0 ***
+capital-loss 2.81616 0.13793 20.41732 0 ***
+fnlwgt 0.7489593 0.2048056 3.656927 0.0002553463 ***
+---
+Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
index 5c5d36e4b6..057ef0ff87 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt
@@ -5,6 +5,14 @@
#@ col={name={Residual Deviance} type=R4 src=1}
#@ col={name={Null Deviance} type=R4 src=2}
#@ col=AIC:R4:3
+#@ col=BiasEstimate:R4:4
+#@ col=BiasStandardError:R4:5
+#@ col=BiasZScore:R4:6
+#@ col=BiasPValue:R4:7
+#@ col=Estimate:R4:8-16
+#@ col=StandardError:R4:17-25
+#@ col=ZScore:R4:26-34
+#@ col=PValue:R4:35-43
#@ }
-Count of training examples Residual Deviance Null Deviance AIC
-521 98.29433 669.0935 118.294327
+Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1
+521 98.29433 669.0935 118.294327 -5.120674 0.699818552 -7.31714535 0 2.353567 1.78653753 1.9442488 1.38072 1.0831089 2.43588924 1.61141682 1.34575915 -0.7715381 0.4267568 0.42040658 0.41370967 0.482155383 0.456691444 0.451504 0.4605175 0.478413582 0.342069477 5.5150075 4.249547 4.69954872 2.86364126 2.37164259 5.395056 3.4991436 2.81296182 -2.255501 5.96046448E-08 2.14576721E-05 2.62260437E-06 0.00418818 0.0177091956 5.96046448E-08 0.000466823578 0.00490885973 0.0241017938
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
index 152e94f64d..dbb2224574 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt
@@ -5,6 +5,14 @@
#@ col={name={Residual Deviance} type=R4 src=1}
#@ col={name={Null Deviance} type=R4 src=2}
#@ col=AIC:R4:3
+#@ col=BiasEstimate:R4:4
+#@ col=BiasStandardError:R4:5
+#@ col=BiasZScore:R4:6
+#@ col=BiasPValue:R4:7
+#@ col=Estimate:R4:8-16
+#@ col=StandardError:R4:17-25
+#@ col=ZScore:R4:26-34
+#@ col=PValue:R4:35-43
#@ }
-Count of training examples Residual Deviance Null Deviance AIC
-520 94.1969452 673.3445 114.196945
+Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1
+520 94.1969452 673.3445 114.196945 -4.860323 0.712811947 -6.81852055 0 2.143086 1.49418533 1.71121442 1.38318741 0.883200347 3.16845965 1.38684654 1.51904845 -0.8226236 0.430655479 0.4099987 0.4222687 0.4832917 0.457050323 0.457937717 0.445124656 0.4728626 0.338379949 4.976335 3.64436626 4.05243 2.86201358 1.93239188 6.918975 3.11563635 3.21245217 -2.43106484 6.556511E-07 0.0002681017 5.07235527E-05 0.00420969725 0.05331099 0 0.00183564425 0.00131618977 0.0150545239
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
index beeec64d77..d89d7a7619 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt
@@ -14,6 +14,16 @@ Count of training examples: 521
Residual Deviance: 98.29433
Null Deviance: 669.0935
AIC: 118.2943
+(Bias): System.Single[]
+Features.thickness: System.Single[]
+Features.bare_nuclei: System.Single[]
+Features.uniform_shape: System.Single[]
+Features.uniform_size: System.Single[]
+Features.bland_chromatin: System.Single[]
+Features.adhesion: System.Single[]
+Features.normal_nucleoli: System.Single[]
+Features.epit_size: System.Single[]
+Cat.1: System.Single[]
Partition model 1 summary:
Per-feature gain summary for the boosted tree ensemble:
Features.uniform_size: 1
@@ -43,6 +53,16 @@ Count of training examples: 520
Residual Deviance: 94.19695
Null Deviance: 673.3445
AIC: 114.1969
+(Bias): System.Single[]
+Features.bare_nuclei: System.Single[]
+Features.thickness: System.Single[]
+Features.uniform_shape: System.Single[]
+Features.uniform_size: System.Single[]
+Features.normal_nucleoli: System.Single[]
+Features.bland_chromatin: System.Single[]
+Features.adhesion: System.Single[]
+Features.epit_size: System.Single[]
+Cat.1: System.Single[]
Partition model 3 summary:
Per-feature gain summary for the boosted tree ensemble:
Features.uniform_size: 1
diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
index 50abe9df54..fadb2e27c8 100644
--- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
+++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt
@@ -17,6 +17,21 @@ Count of training examples: 521
Residual Deviance: 98.29433
Null Deviance: 669.0935
AIC: 118.2943
+
+Coefficients statistics:
+Coefficient Estimate Std. Error z value Pr(>|z|)
+(Bias) -5.120674 0.6998186 -7.317145 0 ***
+Features.thickness 2.353567 0.4267568 5.515007 5.960464E-08 ***
+Features.bare_nuclei 2.435889 0.451504 5.395056 5.960464E-08 ***
+Features.uniform_shape 1.944249 0.4137097 4.699549 2.622604E-06 ***
+Features.uniform_size 1.786538 0.4204066 4.249547 2.145767E-05 ***
+Features.bland_chromatin 1.611417 0.4605175 3.499144 0.0004668236 ***
+Features.adhesion 1.38072 0.4821554 2.863641 0.00418818 **
+Features.normal_nucleoli 1.345759 0.4784136 2.812962 0.00490886 **
+Features.epit_size 1.083109 0.4566914 2.371643 0.0177092 *
+Cat.1 -0.7715381 0.3420695 -2.255501 0.02410179 *
+---
+Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Partition model 1 summary:
Per-feature gain summary for the boosted tree ensemble:
@@ -50,6 +65,21 @@ Count of training examples: 520
Residual Deviance: 94.19695
Null Deviance: 673.3445
AIC: 114.1969
+
+Coefficients statistics:
+Coefficient Estimate Std. Error z value Pr(>|z|)
+(Bias) -4.860323 0.7128119 -6.818521 0 ***
+Features.bare_nuclei 3.16846 0.4579377 6.918975 0 ***
+Features.thickness 2.143086 0.4306555 4.976335 6.556511E-07 ***
+Features.uniform_shape 1.711214 0.4222687 4.05243 5.072355E-05 ***
+Features.uniform_size 1.494185 0.4099987 3.644366 0.0002681017 ***
+Features.normal_nucleoli 1.519048 0.4728626 3.212452 0.00131619 **
+Features.bland_chromatin 1.386847 0.4451247 3.115636 0.001835644 **
+Features.adhesion 1.383187 0.4832917 2.862014 0.004209697 **
+Features.epit_size 0.8832003 0.4570503 1.932392 0.05331099 .
+Cat.1 -0.8226236 0.3383799 -2.431065 0.01505452 *
+---
+Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Partition model 3 summary:
Per-feature gain summary for the boosted tree ensemble:
diff --git a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
index 8e04238c73..c467f102be 100644
--- a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
+++ b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt
@@ -5,6 +5,14 @@
#@ col={name={Residual Deviance} type=R4 src=1}
#@ col={name={Null Deviance} type=R4 src=2}
#@ col=AIC:R4:3
+#@ col=BiasEstimate:R4:4
+#@ col=BiasStandardError:R4:5
+#@ col=BiasZScore:R4:6
+#@ col=BiasPValue:R4:7
+#@ col=Estimate:R4:8-16
+#@ col=StandardError:R4:17-25
+#@ col=ZScore:R4:26-34
+#@ col=PValue:R4:35-43
#@ }
-Count of training examples Residual Deviance Null Deviance AIC
-683 126.83107 884.350159 146.83107
+Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses
+683 126.83107 884.350159 146.83107 -6.186806 0.459383339 -13.4676332 0 2.65800762 1.68089855 1.944068 1.42514718 0.8536965 2.9325006 1.74816787 1.58165014 0.595681 0.455618978 0.429146379 0.431570023 0.479817748 0.470442533 0.4381438 0.469593167 0.4714128 0.467883229 5.83383846 3.916842 4.504641 2.97018433 1.814667 6.69301 3.72272849 3.35512757 1.27314031 0 8.9764595E-05 6.67572E-06 0.002976358 0.06957501 0 0.00019711256 0.0007933974 0.202968419
diff --git a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
index 614166c904..273028ddaa 100644
--- a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
+++ b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
@@ -535,41 +535,52 @@ private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchColl
double f1 = double.Parse(firstCollection[i].ToString());
double f2 = double.Parse(secondCollection[i].ToString());
- // this follows the IEEE recommendations for how to compare floating point numbers
- double allowedVariance = Math.Pow(10, -digitsOfPrecision);
- double delta = Round(f1, digitsOfPrecision) - Round(f2, digitsOfPrecision);
- // limitting to the digits we care about.
- delta = Math.Round(delta, digitsOfPrecision);
-
- bool inRange = delta > -allowedVariance && delta < allowedVariance;
-
- // for some cases, rounding up is not beneficial
- // so checking on whether the difference is significant prior to rounding, and failing only then.
- // example, for 5 digits of precision.
- // F1 = 1.82844949 Rounds to 1.8284
- // F2 = 1.8284502 Rounds to 1.8285
- // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers
- // (1.82844949 - 1.8284502) = -0.00000071
+ if(!CompareNumbersWithTolerance(f1, f2, i, digitsOfPrecision))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public bool CompareNumbersWithTolerance(double expected, double actual, int? iterationOnCollection = null, int digitsOfPrecision = DigitsOfPrecision)
+ {
+ // this follows the IEEE recommendations for how to compare floating point numbers
+ double allowedVariance = Math.Pow(10, -digitsOfPrecision);
+ double delta = Round(expected, digitsOfPrecision) - Round(actual, digitsOfPrecision);
+ // limitting to the digits we care about.
+ delta = Math.Round(delta, digitsOfPrecision);
+
+ bool inRange = delta > -allowedVariance && delta < allowedVariance;
+
+ // for some cases, rounding up is not beneficial
+ // so checking on whether the difference is significant prior to rounding, and failing only then.
+ // example, for 5 digits of precision.
+ // F1 = 1.82844949 Rounds to 1.8284
+ // F2 = 1.8284502 Rounds to 1.8285
+ // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers
+ // (1.82844949 - 1.8284502) = -0.00000071
double delta2 = 0;
if (!inRange)
{
- delta2 = Math.Round(f1 - f2, digitsOfPrecision);
+ delta2 = Math.Round(expected - actual, digitsOfPrecision);
inRange = delta2 >= -allowedVariance && delta2 <= allowedVariance;
}
if (!inRange)
{
- Fail(_allowMismatch, $"Output and baseline mismatch at line {i}." + Environment.NewLine +
- $"Values to compare are {firstCollection[i]} and {secondCollection[i]}" + Environment.NewLine +
- $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine +
- $"\t delta: {delta}" + Environment.NewLine +
- $"\t delta2: {delta2}" + Environment.NewLine);
- return false;
+ var message = iterationOnCollection != null ? "" : $"Output and baseline mismatch at line {iterationOnCollection}." + Environment.NewLine;
+
+ Fail(_allowMismatch, message +
+ $"Values to compare are {expected} and {actual}" + Environment.NewLine +
+ $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine +
+ $"\t delta: {delta}" + Environment.NewLine +
+ $"\t delta2: {delta2}" + Environment.NewLine);
}
- }
- return true;
+ return inRange;
}
private static double Round(double value, int digitsOfPrecision)
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
index dc5950f8d8..30906c8940 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
@@ -4,6 +4,7 @@
using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Calibration;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Xunit;
@@ -38,5 +39,41 @@ public void TestEstimatorPoissonRegression()
TestEstimatorCore(pipe, dataView);
Done();
}
+
+ [Fact]
+ public void TestLogisticRegressionStats()
+ {
+ (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline();
+
+ pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { s.ShowTrainingStats = true; }));
+ var transformerChain = pipe.Fit(dataView) as TransformerChain>;
+
+ var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
+ var stats = linearModel.Statistics;
+ LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);
+
+ CompareNumbersWithTolerance(stdError, 0.250672936);
+ CompareNumbersWithTolerance(zScore, 7.97852373);
+ }
+
+ [Fact]
+ public void TestLogisticRegressionStats_MKL()
+ {
+ (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline();
+
+ pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => {
+ s.ShowTrainingStats = true;
+ s.StdComputer = new ComputeLRTrainingStdThroughHal();
+ }));
+
+ var transformerChain = pipe.Fit(dataView) as TransformerChain>;
+
+ var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
+ var stats = linearModel.Statistics;
+ LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);
+
+ CompareNumbersWithTolerance(stdError, 0.250672936);
+ CompareNumbersWithTolerance(zScore, 7.97852373);
+ }
}
}
diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
index c7ee67537a..c4bd2fe38c 100644
--- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
+++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs
@@ -18,7 +18,7 @@ public sealed class InstanceInitializerAnalyzer : DiagnosticAnalyzer
internal const string DiagnosticId = "MSML_NoInstanceInitializers";
private const string Title = "No initializers on instance fields or properties";
- private const string Format = "Member {0} has a {1} initialier outside the constructor";
+ private const string Format = "Member {0} has a {1} initializer outside the constructor";
private static DiagnosticDescriptor Rule =
new DiagnosticDescriptor(DiagnosticId, Title, Format, Category,