-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Adding training statistics for LR in the HAL learners package. #1392
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
2d5fbf6
68dd4a6
e8fede2
f649287
06f9704
96127d0
3831f5d
c638cbd
dd9524e
e540d63
2752b60
fe29307
f0b4707
5386a8c
fb897ed
377b462
89301b4
46316ea
c8d060a
737d173
39ca55e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using Microsoft.ML.Runtime.Data; | ||
using Microsoft.ML.Runtime.Internal.Utilities; | ||
using Microsoft.ML.Trainers.HalLearners; | ||
using System; | ||
|
||
namespace Microsoft.ML.Runtime.Learners | ||
{ | ||
using Mkl = OlsLinearRegressionTrainer.Mkl; | ||
|
||
public static class LogisticRegressionTrainingStats | ||
{ | ||
/// <summary> | ||
/// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, | ||
/// p-value and z-Score. | ||
/// This function performs the same calculations as <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/> but it is faster than it, because it makes use of Intel's MKL. | ||
/// </summary> | ||
/// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param> | ||
/// <param name="ch">The <see cref="IChannel"/> used for messaging.</param> | ||
/// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param> | ||
public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) | ||
{ | ||
Contracts.AssertValue(ch); | ||
Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); | ||
Contracts.Assert(l2Weight > 0); | ||
|
||
int numSelectedParams = model.Statistics.ParametersCount; | ||
|
||
// Apply Cholesky Decomposition to find the inverse of the Hessian. | ||
Double[] invHessian = null; | ||
try | ||
{ | ||
// First, find the Cholesky decomposition LL' of the Hessian. | ||
Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); | ||
// Note that hessian is already modified at this point. It is no longer the original Hessian, | ||
// but instead represents the Cholesky decomposition L. | ||
// Also note that the following routine is supposed to consume the Cholesky decomposition L instead | ||
// of the original information matrix. | ||
Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian); | ||
// At this point, hessian should contain the inverse of the original Hessian matrix. | ||
// Swap hessian with invHessian to avoid confusion in the following context. | ||
Utils.Swap(ref model.Statistics.Hessian, ref invHessian); | ||
Contracts.Assert(model.Statistics.Hessian == null); | ||
} | ||
catch (DllNotFoundException) | ||
{ | ||
throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing."); | ||
} | ||
|
||
float[] stdErrorValues = new float[numSelectedParams]; | ||
stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]); | ||
|
||
for (int i = 1; i < numSelectedParams; i++) | ||
{ | ||
// Initialize with inverse Hessian. | ||
stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; | ||
} | ||
|
||
if (l2Weight > 0) | ||
{ | ||
// Iterate through all entries of inverse Hessian to make adjustment to variance. | ||
// A discussion on ridge regularized LR coefficient covariance matrix can be found here: | ||
// http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25) | ||
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression") | ||
int ioffset = 1; | ||
for (int iRow = 1; iRow < numSelectedParams; iRow++) | ||
{ | ||
for (int iCol = 0; iCol <= iRow; iCol++) | ||
{ | ||
var entry = (Single)invHessian[ioffset]; | ||
var adjustment = -l2Weight * entry * entry; | ||
|
||
stdErrorValues[iRow] -= adjustment; | ||
if (0 < iCol && iCol < iRow) | ||
stdErrorValues[iCol] -= adjustment; | ||
ioffset++; | ||
} | ||
} | ||
|
||
Contracts.Assert(ioffset == invHessian.Length); | ||
} | ||
|
||
for (int i = 1; i < numSelectedParams; i++) | ||
stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]); | ||
|
||
// currentWeights vector size is Weights2 + the bias | ||
var currentWeightsCount = model.Weights2.Count + 1; | ||
VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices); | ||
model.Statistics.SetCoeffStdError(stdErrors); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,11 +1,15 @@ | ||||||||||||||||
<Project Sdk="Microsoft.NET.Sdk"> | ||||||||||||||||
<Project Sdk="Microsoft.NET.Sdk"> | ||||||||||||||||
|
||||||||||||||||
<PropertyGroup> | ||||||||||||||||
<TargetFramework>netstandard2.0</TargetFramework> | ||||||||||||||||
<IncludeInPackage>Microsoft.ML</IncludeInPackage> | ||||||||||||||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||||||||||||||||
</PropertyGroup> | ||||||||||||||||
|
||||||||||||||||
<ItemGroup> | ||||||||||||||||
<PackageReference Include="MathNet.Numerics.Signed" Version="$(MathNumericPackageVersion)" /> | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: you'll need to add this PackageReference to our NuGet package as well: machinelearning/pkg/Microsoft.ML/Microsoft.ML.nupkgproj Lines 11 to 17 in f222025
|
||||||||||||||||
</ItemGroup> | ||||||||||||||||
|
||||||||||||||||
<ItemGroup> | ||||||||||||||||
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" /> | ||||||||||||||||
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" /> | ||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight | |
[Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)] | ||
public bool EnforceNonNegativity = Defaults.EnforceNonNegativity; | ||
|
||
internal static class Defaults | ||
public static class Defaults | ||
|
||
{ | ||
internal const float L2Weight = 1; | ||
internal const float L1Weight = 1; | ||
internal const float OptTol = 1e-7f; | ||
internal const int MemorySize = 20; | ||
internal const int MaxIterations = int.MaxValue; | ||
internal const bool EnforceNonNegativity = false; | ||
public const float L2Weight = 1; | ||
public const float L1Weight = 1; | ||
public const float OptTol = 1e-7f; | ||
public const int MemorySize = 20; | ||
public const int MaxIterations = int.MaxValue; | ||
public const bool EnforceNonNegativity = false; | ||
} | ||
} | ||
|
||
|
@@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu | |
} | ||
|
||
protected virtual int ClassCount => 1; | ||
protected int BiasCount => ClassCount; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
revert #Resolved |
||
public int BiasCount => ClassCount; | ||
protected int WeightCount => ClassCount * NumFeatures; | ||
protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory, | ||
out VBuffer<float> init, out ITerminationCriterion terminationCriterion) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,17 +2,17 @@ | |
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.ComponentModel; | ||
using System.IO; | ||
using System.Linq; | ||
using MathNet.Numerics.LinearAlgebra; | ||
|
||
using Microsoft.ML.Runtime; | ||
using Microsoft.ML.Runtime.Data; | ||
using Microsoft.ML.Runtime.Internal.CpuMath; | ||
using Microsoft.ML.Runtime.Internal.Utilities; | ||
using Microsoft.ML.Runtime.Learners; | ||
using Microsoft.ML.Runtime.Model; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
|
||
// This is for deserialization from a model repository. | ||
[assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel), | ||
|
@@ -82,15 +82,20 @@ private static VersionInfo GetVersionInfo() | |
// It could be null when there are too many non-zero weights so that | ||
// the memory is insufficient to hold the Hessian matrix necessary for the computation | ||
// of the variance-covariance matrix. | ||
private readonly VBuffer<Single>? _coeffStdError; | ||
private VBuffer<Single>? _coeffStdError; | ||
|
||
public long TrainingExampleCount => _trainingExampleCount; | ||
|
||
public long TrainingExampleCount { get { return _trainingExampleCount; } } | ||
public Single Deviance => _deviance; | ||
|
||
public Single Deviance { get { return _deviance; } } | ||
public Single NullDeviance => _nullDeviance; | ||
|
||
public Single NullDeviance { get { return _nullDeviance; } } | ||
public int ParametersCount => _paramCount; | ||
|
||
public int ParametersCount { get { return _paramCount; } } | ||
public Double[] Hessian; | ||
|
||
// Indices of bias and non-zero weight slots. | ||
public int[] WeightIndices; | ||
|
||
internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) | ||
{ | ||
|
@@ -111,7 +116,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, | |
_coeffStdError = coeffStdError; | ||
} | ||
|
||
public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) | ||
internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
_env = env; | ||
|
@@ -157,7 +162,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) | |
_coeffStdError = new VBuffer<Single>(length, _paramCount, stdErrorValues, stdErrorIndices); | ||
} | ||
|
||
public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) | ||
internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
env.CheckValue(ctx, nameof(ctx)); | ||
|
@@ -208,6 +213,10 @@ private void SaveCore(ModelSaveContext ctx) | |
ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.Indices, _paramCount); | ||
} | ||
|
||
/// <summary> | ||
/// Computes the standart deviation, Z-Score and p-Value. | ||
/// Should be called after <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/>. | ||
/// </summary> | ||
public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) | ||
{ | ||
if (!stats._coeffStdError.HasValue) | ||
|
@@ -222,10 +231,97 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias | |
stdError = stats._coeffStdError.Value.Values[0]; | ||
Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0)); | ||
zScore = bias / stdError; | ||
pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); | ||
pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); | ||
return true; | ||
} | ||
|
||
/// <summary> | ||
/// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation, | ||
/// p-value and z-Score. | ||
/// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration. | ||
/// </summary> | ||
/// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param> | ||
/// <param name="ch">The <see cref="IChannel"/> used for messaging.</param> | ||
/// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param> | ||
public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight) | ||
|
||
{ | ||
Contracts.AssertValue(ch); | ||
Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true."); | ||
Contracts.Assert(l2Weight > 0); | ||
|
||
int numSelectedParams = model.Statistics.ParametersCount; | ||
|
||
double[] hessian = model.Statistics.Hessian; | ||
double[,] matrixHessian = new double[numSelectedParams, numSelectedParams]; | ||
|
||
int hessianLength = 0; | ||
int dimention = numSelectedParams - 1; | ||
|
||
for (int row = dimention; row >= 0; row--) | ||
{ | ||
for (int col = 0; col <= dimention; col++) | ||
{ | ||
if ((row + col) <= dimention) | ||
{ | ||
if ((row + col) == dimention) | ||
{ | ||
matrixHessian[row, col] = hessian[hessianLength]; | ||
} | ||
else | ||
{ | ||
matrixHessian[row, col] = hessian[hessianLength]; | ||
matrixHessian[dimention - col, dimention - row] = hessian[hessianLength]; | ||
} | ||
hessianLength++; | ||
} | ||
else | ||
continue; | ||
} | ||
} | ||
|
||
var h = Matrix<double>.Build.DenseOfArray(matrixHessian); | ||
var invers = h.Inverse(); | ||
|
||
float[] stdErrorValues2 = new float[numSelectedParams]; | ||
stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]); | ||
|
||
for (int i = 1; i < numSelectedParams; i++) | ||
{ | ||
// Initialize with inverse Hessian. | ||
// The diagonal of the inverse Hessian. | ||
stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1]; | ||
} | ||
|
||
if (l2Weight > 0) | ||
{ | ||
// Iterate through all entries of inverse Hessian to make adjustment to variance. | ||
// A discussion on ridge regularized LR coefficient covariance matrix can be found here: | ||
// http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ | ||
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf | ||
int ioffset = 1; | ||
for (int iRow = 1; iRow < numSelectedParams; iRow++) | ||
{ | ||
for (int iCol = 0; iCol <= iRow; iCol++) | ||
{ | ||
float entry = (float)invers[iRow, numSelectedParams - iCol - 1]; | ||
var adjustment = -l2Weight * entry * entry; | ||
stdErrorValues2[iRow] -= adjustment; | ||
|
||
if (0 < iCol && iCol < iRow) | ||
stdErrorValues2[iCol] -= adjustment; | ||
ioffset++; | ||
} | ||
} | ||
} | ||
|
||
for (int i = 1; i < numSelectedParams; i++) | ||
stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]); | ||
|
||
var currentWeightsCount = model.Weights2.Count + 1; // adding one for the bias | ||
VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues2, model.Statistics.WeightIndices); | ||
model.Statistics.SetCoeffStdError(stdErrors); | ||
} | ||
|
||
private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer<Single> weights, in VBuffer<ReadOnlyMemory<char>> names, | ||
ref VBuffer<Single> estimate, ref VBuffer<Single> stdErr, ref VBuffer<Single> zScore, ref VBuffer<Single> pValue, out ValueGetter<VBuffer<ReadOnlyMemory<char>>> getSlotNames) | ||
{ | ||
|
@@ -285,6 +381,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat | |
}; | ||
} | ||
|
||
public void SetCoeffStdError(VBuffer<Single> coeffStdError) | ||
{ | ||
_env.Assert(coeffStdError.Count == _paramCount); | ||
_coeffStdError = coeffStdError; | ||
} | ||
|
||
private IEnumerable<CoefficientStatistics> GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema) | ||
{ | ||
Contracts.AssertValue(_env); | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(nit) since this is a dependency in the
MIcrosoft.ML
nuget package, it belongs under theCore Product Dependencies
section above. #Resolved