diff --git a/build/Dependencies.props b/build/Dependencies.props index 7a79b3a087..47f34e0e1e 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -9,6 +9,7 @@ 4.3.0 4.8.0 4.5.0 + 4.6.0 diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj index 75517c587e..f479d0e970 100644 --- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj +++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj @@ -8,6 +8,7 @@ + diff --git a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs new file mode 100644 index 0000000000..66868c1c9a --- /dev/null +++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Trainers.HalLearners; +using System; + +namespace Microsoft.ML.Runtime.Learners +{ + using Mkl = OlsLinearRegressionTrainer.Mkl; + + public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd + { + /// + /// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, + /// p-value and z-Score. + /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// + /// + /// + /// + /// + /// The used for messaging. + /// The L2Weight used for training. (Supply the same one that got used during training.) + public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(hessian, nameof(hessian)); + Contracts.Assert(numSelectedParams > 0); + Contracts.Assert(currentWeightsCount > 0); + Contracts.Assert(l2Weight > 0); + + // Apply Cholesky Decomposition to find the inverse of the Hessian. + Double[] invHessian = null; + try + { + // First, find the Cholesky decomposition LL' of the Hessian. + Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian); + // Note that hessian is already modified at this point. It is no longer the original Hessian, + // but instead represents the Cholesky decomposition L. + // Also note that the following routine is supposed to consume the Cholesky decomposition L instead + // of the original information matrix. + Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, hessian); + // At this point, hessian should contain the inverse of the original Hessian matrix. + // Swap hessian with invHessian to avoid confusion in the following context. + Utils.Swap(ref hessian, ref invHessian); + Contracts.Assert(hessian == null); + } + catch (DllNotFoundException) + { + throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing."); + } + + float[] stdErrorValues = new float[numSelectedParams]; + stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); + + for (int i = 1; i < numSelectedParams; i++) + { + // Initialize with inverse Hessian. + stdErrorValues[i] = (float)invHessian[i * (i + 1) / 2 + i]; + } + + if (l2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25) + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression") + int ioffset = 1; + for (int iRow = 1; iRow < numSelectedParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + var entry = (float)invHessian[ioffset++]; + AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues); + } + } + + Contracts.Assert(ioffset == invHessian.Length); + } + + for (int i = 1; i < numSelectedParams; i++) + stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); + + // currentWeights vector size is Weights2 + the bias + return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices); + } + } +} diff --git a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs index 415752aa8d..671913b203 100644 --- a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs +++ b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs @@ -6,5 +6,6 @@ using Microsoft.ML; [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)] [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj index b1624559cd..d1c2fba257 100644 --- a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj +++ b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj @@ -1,4 +1,4 @@ - + netstandard2.0 @@ -6,6 +6,10 @@ true + + + + diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 1f5f49fe40..60c81b0ed1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using MathNet.Numerics.LinearAlgebra; using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; @@ -40,11 +41,27 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase + /// If set to truetraining statistics will be generated at the end of training. + /// If you have a large number of learned training parameters(more than 500), + /// generating the training statistics might take a few seconds. + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Show statistics of training examples.", ShortName = "stat", SortOrder = 50)] public bool ShowTrainingStats = false; + + /// + /// The instance of that computes the training statistics at the end of training. + /// If you have a large number of learned training parameters(more than 500), + /// generating the training statistics might take a few seconds. + /// More than 1000 weights might take a few minutes. For those cases consider using the instance of + /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration. + /// + public ComputeLRTrainingStd StdComputer; } - private Double _posWeight; + private double _posWeight; private LinearModelStatistics _stats; /// @@ -78,6 +95,9 @@ public LogisticRegression(IHostEnvironment env, _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; + + if (ShowTrainingStats && Args.StdComputer == null) + Args.StdComputer = new ComputeLRTrainingStdImpl(); } /// @@ -88,6 +108,9 @@ internal LogisticRegression(IHostEnvironment env, Arguments args) { _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; + + if (ShowTrainingStats && Args.StdComputer == null) + Args.StdComputer = new ComputeLRTrainingStdImpl(); } public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; @@ -330,7 +353,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } } - _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + if (Args.StdComputer == null) + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); + else + { + var std = Args.StdComputer.ComputeStd(hessian, weightIndices, numParams, CurrentWeights.Length, ch, L2Weight); + _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, std); + } } protected override void ProcessPriorDistribution(float label, float weight) @@ -397,4 +426,125 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn)); } } + + /// + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, + /// p-value and z-Score. + /// If you need fast calculations, use the implementation in the Microsoft.ML.HALLearners package, + /// which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// + public abstract class ComputeLRTrainingStd + { + /// + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, + /// p-value and z-Score. + /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// + public abstract VBuffer ComputeStd(double[] hessian, int[] weightIndices, int parametersCount, int currentWeightsCount, IChannel ch, float l2Weight); + + /// + /// Adjust the variance for regularized cases. + /// + [BestFriend] + internal void AdjustVariance(float inverseEntry, int iRow, int iCol, float l2Weight, float[] stdErrorValues2) + { + var adjustment = l2Weight * inverseEntry * inverseEntry; + stdErrorValues2[iRow] -= adjustment; + + if (0 < iCol && iCol < iRow) + stdErrorValues2[iCol] -= adjustment; + } + } + + /// + /// Extends the implementing making use of Math.Net numeric + /// If you need faster calculations(have non-sparse weight vectors of more than 300 features), use the instance of ComputeLRTrainingStd from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration + /// for those computations. + /// + public sealed class ComputeLRTrainingStdImpl : ComputeLRTrainingStd + { + /// + /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation, + /// p-value and z-Score. + /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. + /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients. + /// + /// + /// + /// + /// + /// The used for messaging. + /// The L2Weight used for training. (Supply the same one that got used during training.) + public override VBuffer ComputeStd(double[] hessian, int[] weightIndices, int numSelectedParams, int currentWeightsCount, IChannel ch, float l2Weight) + { + Contracts.AssertValue(ch); + Contracts.AssertValue(hessian, nameof(hessian)); + Contracts.Assert(numSelectedParams > 0); + Contracts.Assert(currentWeightsCount > 0); + Contracts.Assert(l2Weight > 0); + + double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; + + int hessianLength = 0; + int dimension = numSelectedParams - 1; + + for (int row = dimension; row >= 0; row--) + { + for (int col = 0; col <= dimension; col++) + { + if ((row + col) <= dimension) + { + if ((row + col) == dimension) + { + matrixHessian[row, col] = hessian[hessianLength]; + } + else + { + matrixHessian[row, col] = hessian[hessianLength]; + matrixHessian[dimension - col, dimension - row] = hessian[hessianLength]; + } + hessianLength++; + } + else + continue; + } + } + + var h = Matrix.Build.DenseOfArray(matrixHessian); + var invers = h.Inverse(); + + float[] stdErrorValues = new float[numSelectedParams]; + stdErrorValues[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); + + for (int i = 1; i < numSelectedParams; i++) + { + // Initialize with inverse Hessian. + // The diagonal of the inverse Hessian. + stdErrorValues[i] = (float)invers[i, numSelectedParams - i - 1]; + } + + if (l2Weight > 0) + { + // Iterate through all entries of inverse Hessian to make adjustment to variance. + // A discussion on ridge regularized LR coefficient covariance matrix can be found here: + // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25) + // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression") + for (int iRow = 1; iRow < numSelectedParams; iRow++) + { + for (int iCol = 0; iCol <= iRow; iCol++) + { + float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; + AdjustVariance(entry, iRow, iCol, l2Weight, stdErrorValues); + } + } + } + + for (int i = 1; i < numSelectedParams; i++) + stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); + + return new VBuffer(currentWeightsCount, numSelectedParams, stdErrorValues, weightIndices); + } + } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index bd69453d5a..1eeb043c01 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -2,17 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.ComponentModel; -using System.IO; -using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.CpuMath; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.Model; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; // This is for deserialization from a model repository. [assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel), @@ -84,13 +83,13 @@ private static VersionInfo GetVersionInfo() // of the variance-covariance matrix. private readonly VBuffer? _coeffStdError; - public long TrainingExampleCount { get { return _trainingExampleCount; } } + public long TrainingExampleCount => _trainingExampleCount; - public Single Deviance { get { return _deviance; } } + public Single Deviance => _deviance; - public Single NullDeviance { get { return _nullDeviance; } } + public Single NullDeviance => _nullDeviance; - public int ParametersCount { get { return _paramCount; } } + public int ParametersCount => _paramCount; internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) { @@ -111,7 +110,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, _coeffStdError = coeffStdError; } - public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) + internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _env = env; @@ -157,7 +156,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) _coeffStdError = new VBuffer(length, _paramCount, stdErrorValues, stdErrorIndices); } - public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) + internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); @@ -209,6 +208,9 @@ private void SaveCore(ModelSaveContext ctx) ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.GetIndices()); } + /// + /// Computes the standart deviation, Z-Score and p-Value. + /// public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) { if (!stats._coeffStdError.HasValue) @@ -223,7 +225,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias stdError = stats._coeffStdError.Value.Values[0]; Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0)); zScore = bias / stdError; - pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); + pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); return true; } diff --git a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt index 6d58cb8d2d..4bd1c57233 100644 --- a/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt +++ b/test/BaselineOutput/Common/Command/CommandTrainingLrWithStats-summary.txt @@ -13,3 +13,15 @@ Count of training examples: 32561 Residual Deviance: 26705.74 Null Deviance: 35948.08 AIC: 26719.74 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -8.228298 0.1161297 -70.85435 0 *** +education-num 5.066041 0.1048074 48.33666 0 *** +capital-gain 18.58347 0.4694776 39.5833 0 *** +age 3.86064 0.1061118 36.38277 0 *** +hours-per-week 3.946534 0.1258723 31.35349 0 *** +capital-loss 2.81616 0.13793 20.41732 0 *** +fnlwgt 0.7489593 0.2048056 3.656927 0.0002553463 *** +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt index 5c5d36e4b6..057ef0ff87 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model0-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -521 98.29433 669.0935 118.294327 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 +521 98.29433 669.0935 118.294327 -5.120674 0.699818552 -7.31714535 0 2.353567 1.78653753 1.9442488 1.38072 1.0831089 2.43588924 1.61141682 1.34575915 -0.7715381 0.4267568 0.42040658 0.41370967 0.482155383 0.456691444 0.451504 0.4605175 0.478413582 0.342069477 5.5150075 4.249547 4.69954872 2.86364126 2.37164259 5.395056 3.4991436 2.81296182 -2.255501 5.96046448E-08 2.14576721E-05 2.62260437E-06 0.00418818 0.0177091956 5.96046448E-08 0.000466823578 0.00490885973 0.0241017938 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt index 152e94f64d..dbb2224574 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-model2-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -520 94.1969452 673.3445 114.196945 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 Features.thickness Features.uniform_size Features.uniform_shape Features.adhesion Features.epit_size Features.bare_nuclei Features.bland_chromatin Features.normal_nucleoli Cat.1 +520 94.1969452 673.3445 114.196945 -4.860323 0.712811947 -6.81852055 0 2.143086 1.49418533 1.71121442 1.38318741 0.883200347 3.16845965 1.38684654 1.51904845 -0.8226236 0.430655479 0.4099987 0.4222687 0.4832917 0.457050323 0.457937717 0.445124656 0.4728626 0.338379949 4.976335 3.64436626 4.05243 2.86201358 1.93239188 6.918975 3.11563635 3.21245217 -2.43106484 6.556511E-07 0.0002681017 5.07235527E-05 0.00420969725 0.05331099 0 0.00183564425 0.00131618977 0.0150545239 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt index beeec64d77..d89d7a7619 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary-key-value-pairs.txt @@ -14,6 +14,16 @@ Count of training examples: 521 Residual Deviance: 98.29433 Null Deviance: 669.0935 AIC: 118.2943 +(Bias): System.Single[] +Features.thickness: System.Single[] +Features.bare_nuclei: System.Single[] +Features.uniform_shape: System.Single[] +Features.uniform_size: System.Single[] +Features.bland_chromatin: System.Single[] +Features.adhesion: System.Single[] +Features.normal_nucleoli: System.Single[] +Features.epit_size: System.Single[] +Cat.1: System.Single[] Partition model 1 summary: Per-feature gain summary for the boosted tree ensemble: Features.uniform_size: 1 @@ -43,6 +53,16 @@ Count of training examples: 520 Residual Deviance: 94.19695 Null Deviance: 673.3445 AIC: 114.1969 +(Bias): System.Single[] +Features.bare_nuclei: System.Single[] +Features.thickness: System.Single[] +Features.uniform_shape: System.Single[] +Features.uniform_size: System.Single[] +Features.normal_nucleoli: System.Single[] +Features.bland_chromatin: System.Single[] +Features.adhesion: System.Single[] +Features.epit_size: System.Single[] +Cat.1: System.Single[] Partition model 3 summary: Per-feature gain summary for the boosted tree ensemble: Features.uniform_size: 1 diff --git a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt index 50abe9df54..fadb2e27c8 100644 --- a/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt +++ b/test/BaselineOutput/Common/EntryPoints/ensemble-summary.txt @@ -17,6 +17,21 @@ Count of training examples: 521 Residual Deviance: 98.29433 Null Deviance: 669.0935 AIC: 118.2943 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -5.120674 0.6998186 -7.317145 0 *** +Features.thickness 2.353567 0.4267568 5.515007 5.960464E-08 *** +Features.bare_nuclei 2.435889 0.451504 5.395056 5.960464E-08 *** +Features.uniform_shape 1.944249 0.4137097 4.699549 2.622604E-06 *** +Features.uniform_size 1.786538 0.4204066 4.249547 2.145767E-05 *** +Features.bland_chromatin 1.611417 0.4605175 3.499144 0.0004668236 *** +Features.adhesion 1.38072 0.4821554 2.863641 0.00418818 ** +Features.normal_nucleoli 1.345759 0.4784136 2.812962 0.00490886 ** +Features.epit_size 1.083109 0.4566914 2.371643 0.0177092 * +Cat.1 -0.7715381 0.3420695 -2.255501 0.02410179 * +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Partition model 1 summary: Per-feature gain summary for the boosted tree ensemble: @@ -50,6 +65,21 @@ Count of training examples: 520 Residual Deviance: 94.19695 Null Deviance: 673.3445 AIC: 114.1969 + +Coefficients statistics: +Coefficient Estimate Std. Error z value Pr(>|z|) +(Bias) -4.860323 0.7128119 -6.818521 0 *** +Features.bare_nuclei 3.16846 0.4579377 6.918975 0 *** +Features.thickness 2.143086 0.4306555 4.976335 6.556511E-07 *** +Features.uniform_shape 1.711214 0.4222687 4.05243 5.072355E-05 *** +Features.uniform_size 1.494185 0.4099987 3.644366 0.0002681017 *** +Features.normal_nucleoli 1.519048 0.4728626 3.212452 0.00131619 ** +Features.bland_chromatin 1.386847 0.4451247 3.115636 0.001835644 ** +Features.adhesion 1.383187 0.4832917 2.862014 0.004209697 ** +Features.epit_size 0.8832003 0.4570503 1.932392 0.05331099 . +Cat.1 -0.8226236 0.3383799 -2.431065 0.01505452 * +--- +Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Partition model 3 summary: Per-feature gain summary for the boosted tree ensemble: diff --git a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt index 8e04238c73..c467f102be 100644 --- a/test/BaselineOutput/Common/EntryPoints/lr-stats.txt +++ b/test/BaselineOutput/Common/EntryPoints/lr-stats.txt @@ -5,6 +5,14 @@ #@ col={name={Residual Deviance} type=R4 src=1} #@ col={name={Null Deviance} type=R4 src=2} #@ col=AIC:R4:3 +#@ col=BiasEstimate:R4:4 +#@ col=BiasStandardError:R4:5 +#@ col=BiasZScore:R4:6 +#@ col=BiasPValue:R4:7 +#@ col=Estimate:R4:8-16 +#@ col=StandardError:R4:17-25 +#@ col=ZScore:R4:26-34 +#@ col=PValue:R4:35-43 #@ } -Count of training examples Residual Deviance Null Deviance AIC -683 126.83107 884.350159 146.83107 +Count of training examples Residual Deviance Null Deviance AIC BiasEstimate BiasStandardError BiasZScore BiasPValue thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses thickness uniform_size uniform_shape adhesion epit_size bare_nuclei bland_chromatin normal_nucleoli mitoses +683 126.83107 884.350159 146.83107 -6.186806 0.459383339 -13.4676332 0 2.65800762 1.68089855 1.944068 1.42514718 0.8536965 2.9325006 1.74816787 1.58165014 0.595681 0.455618978 0.429146379 0.431570023 0.479817748 0.470442533 0.4381438 0.469593167 0.4714128 0.467883229 5.83383846 3.916842 4.504641 2.97018433 1.814667 6.69301 3.72272849 3.35512757 1.27314031 0 8.9764595E-05 6.67572E-06 0.002976358 0.06957501 0 0.00019711256 0.0007933974 0.202968419 diff --git a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs index 614166c904..273028ddaa 100644 --- a/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs +++ b/test/Microsoft.ML.TestFramework/BaseTestBaseline.cs @@ -535,41 +535,52 @@ private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchColl double f1 = double.Parse(firstCollection[i].ToString()); double f2 = double.Parse(secondCollection[i].ToString()); - // this follows the IEEE recommendations for how to compare floating point numbers - double allowedVariance = Math.Pow(10, -digitsOfPrecision); - double delta = Round(f1, digitsOfPrecision) - Round(f2, digitsOfPrecision); - // limitting to the digits we care about. - delta = Math.Round(delta, digitsOfPrecision); - - bool inRange = delta > -allowedVariance && delta < allowedVariance; - - // for some cases, rounding up is not beneficial - // so checking on whether the difference is significant prior to rounding, and failing only then. - // example, for 5 digits of precision. - // F1 = 1.82844949 Rounds to 1.8284 - // F2 = 1.8284502 Rounds to 1.8285 - // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers - // (1.82844949 - 1.8284502) = -0.00000071 + if(!CompareNumbersWithTolerance(f1, f2, i, digitsOfPrecision)) + { + return false; + } + } + + return true; + } + + public bool CompareNumbersWithTolerance(double expected, double actual, int? iterationOnCollection = null, int digitsOfPrecision = DigitsOfPrecision) + { + // this follows the IEEE recommendations for how to compare floating point numbers + double allowedVariance = Math.Pow(10, -digitsOfPrecision); + double delta = Round(expected, digitsOfPrecision) - Round(actual, digitsOfPrecision); + // limitting to the digits we care about. + delta = Math.Round(delta, digitsOfPrecision); + + bool inRange = delta > -allowedVariance && delta < allowedVariance; + + // for some cases, rounding up is not beneficial + // so checking on whether the difference is significant prior to rounding, and failing only then. + // example, for 5 digits of precision. + // F1 = 1.82844949 Rounds to 1.8284 + // F2 = 1.8284502 Rounds to 1.8285 + // would fail the inRange == true check, but would suceed the following, and we doconsider those two numbers + // (1.82844949 - 1.8284502) = -0.00000071 double delta2 = 0; if (!inRange) { - delta2 = Math.Round(f1 - f2, digitsOfPrecision); + delta2 = Math.Round(expected - actual, digitsOfPrecision); inRange = delta2 >= -allowedVariance && delta2 <= allowedVariance; } if (!inRange) { - Fail(_allowMismatch, $"Output and baseline mismatch at line {i}." + Environment.NewLine + - $"Values to compare are {firstCollection[i]} and {secondCollection[i]}" + Environment.NewLine + - $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine + - $"\t delta: {delta}" + Environment.NewLine + - $"\t delta2: {delta2}" + Environment.NewLine); - return false; + var message = iterationOnCollection != null ? "" : $"Output and baseline mismatch at line {iterationOnCollection}." + Environment.NewLine; + + Fail(_allowMismatch, message + + $"Values to compare are {expected} and {actual}" + Environment.NewLine + + $"\t AllowedVariance: {allowedVariance}" + Environment.NewLine + + $"\t delta: {delta}" + Environment.NewLine + + $"\t delta2: {delta2}" + Environment.NewLine); } - } - return true; + return inRange; } private static double Round(double value, int digitsOfPrecision) diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index dc5950f8d8..30906c8940 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -4,6 +4,7 @@ using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers; using Xunit; @@ -38,5 +39,41 @@ public void TestEstimatorPoissonRegression() TestEstimatorCore(pipe, dataView); Done(); } + + [Fact] + public void TestLogisticRegressionStats() + { + (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); + + pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { s.ShowTrainingStats = true; })); + var transformerChain = pipe.Fit(dataView) as TransformerChain>; + + var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; + var stats = linearModel.Statistics; + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); + + CompareNumbersWithTolerance(stdError, 0.250672936); + CompareNumbersWithTolerance(zScore, 7.97852373); + } + + [Fact] + public void TestLogisticRegressionStats_MKL() + { + (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); + + pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features", advancedSettings: s => { + s.ShowTrainingStats = true; + s.StdComputer = new ComputeLRTrainingStdThroughHal(); + })); + + var transformerChain = pipe.Fit(dataView) as TransformerChain>; + + var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor; + var stats = linearModel.Statistics; + LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); + + CompareNumbersWithTolerance(stdError, 0.250672936); + CompareNumbersWithTolerance(zScore, 7.97852373); + } } } diff --git a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs index c7ee67537a..c4bd2fe38c 100644 --- a/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs +++ b/tools-local/Microsoft.ML.InternalCodeAnalyzer/InstanceInitializerAnalyzer.cs @@ -18,7 +18,7 @@ public sealed class InstanceInitializerAnalyzer : DiagnosticAnalyzer internal const string DiagnosticId = "MSML_NoInstanceInitializers"; private const string Title = "No initializers on instance fields or properties"; - private const string Format = "Member {0} has a {1} initialier outside the constructor"; + private const string Format = "Member {0} has a {1} initializer outside the constructor"; private static DiagnosticDescriptor Rule = new DiagnosticDescriptor(DiagnosticId, Title, Format, Category,