Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
<SystemIOFileSystemAccessControl>4.5.0</SystemIOFileSystemAccessControl>
<SystemSecurityPrincipalWindows>4.5.0</SystemSecurityPrincipalWindows>
<TensorFlowVersion>1.10.0</TensorFlowVersion>
<MathNumericPackageVersion>4.6.0</MathNumericPackageVersion>
Copy link
Member

@eerhardt eerhardt Nov 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(nit) since this is a dependency in the MIcrosoft.ML nuget package, it belongs under the Core Product Dependencies section above. #Resolved

</PropertyGroup>

<!-- Code Analyzer Dependencies -->
Expand Down
94 changes: 94 additions & 0 deletions src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Trainers.HalLearners;
using System;

namespace Microsoft.ML.Runtime.Learners
{
using Mkl = OlsLinearRegressionTrainer.Mkl;

public static class LogisticRegressionTrainingStats
{
/// <summary>
/// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
/// p-value and z-Score.
/// This function performs the same calculations as <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/> but it is faster than it, because it makes use of Intel's MKL.
/// </summary>
/// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param>
/// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
/// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
{
Contracts.AssertValue(ch);
Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
Contracts.Assert(l2Weight > 0);

int numSelectedParams = model.Statistics.ParametersCount;

// Apply Cholesky Decomposition to find the inverse of the Hessian.
Double[] invHessian = null;
try
{
// First, find the Cholesky decomposition LL' of the Hessian.
Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
// Note that hessian is already modified at this point. It is no longer the original Hessian,
// but instead represents the Cholesky decomposition L.
// Also note that the following routine is supposed to consume the Cholesky decomposition L instead
// of the original information matrix.
Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
// At this point, hessian should contain the inverse of the original Hessian matrix.
// Swap hessian with invHessian to avoid confusion in the following context.
Utils.Swap(ref model.Statistics.Hessian, ref invHessian);
Contracts.Assert(model.Statistics.Hessian == null);
}
catch (DllNotFoundException)
{
throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
}

float[] stdErrorValues = new float[numSelectedParams];
stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);

for (int i = 1; i < numSelectedParams; i++)
{
// Initialize with inverse Hessian.
stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
}

if (l2Weight > 0)
{
// Iterate through all entries of inverse Hessian to make adjustment to variance.
// A discussion on ridge regularized LR coefficient covariance matrix can be found here:
// http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
int ioffset = 1;
for (int iRow = 1; iRow < numSelectedParams; iRow++)
{
for (int iCol = 0; iCol <= iRow; iCol++)
{
var entry = (Single)invHessian[ioffset];
var adjustment = -l2Weight * entry * entry;
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line doesn't make a lot of sense to me. The code first compute \sigma=sqrt(x+\lambda) and them do something like \sigma + \lambda * \sigma^2. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll review it too. Thanks for pointing out.


In reply to: 230916692 [](ancestors = 230916692)

stdErrorValues[iRow] -= adjustment;
if (0 < iCol && iCol < iRow)
stdErrorValues[iCol] -= adjustment;
ioffset++;
}
}

Contracts.Assert(ioffset == invHessian.Length);
}

for (int i = 1; i < numSelectedParams; i++)
stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);

// currentWeights vector size is Weights2 + the bias
var currentWeightsCount = model.Weights2.Count + 1;
VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices);
model.Statistics.SetCoeffStdError(stdErrors);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<IncludeInPackage>Microsoft.ML</IncludeInPackage>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="MathNet.Numerics.Signed" Version="$(MathNumericPackageVersion)" />
Copy link
Member

@eerhardt eerhardt Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: you'll need to add this PackageReference to our NuGet package as well:

<PackageReference Include="Newtonsoft.Json" Version="$(NewtonsoftJsonPackageVersion)" />
<PackageReference Include="System.Reflection.Emit.Lightweight" Version="$(SystemReflectionEmitLightweightPackageVersion)" />
<PackageReference Include="System.Threading.Tasks.Dataflow" Version="$(SystemThreadingTasksDataflowPackageVersion)" />
<PackageReference Include="System.CodeDom" Version="$(SystemCodeDomPackageVersion)" />
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
<PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" />
<PackageReference Include="System.ComponentModel.Composition" Version="$(SystemComponentModelCompositionVersion)" />
#Resolved

</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight
[Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)]
public bool EnforceNonNegativity = Defaults.EnforceNonNegativity;

internal static class Defaults
public static class Defaults
Copy link
Member Author

@sfilipi sfilipi Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public [](start = 11, length = 7)

revert #Resolved

{
internal const float L2Weight = 1;
internal const float L1Weight = 1;
internal const float OptTol = 1e-7f;
internal const int MemorySize = 20;
internal const int MaxIterations = int.MaxValue;
internal const bool EnforceNonNegativity = false;
public const float L2Weight = 1;
public const float L1Weight = 1;
public const float OptTol = 1e-7f;
public const int MemorySize = 20;
public const int MaxIterations = int.MaxValue;
public const bool EnforceNonNegativity = false;
}
}

Expand Down Expand Up @@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu
}

protected virtual int ClassCount => 1;
protected int BiasCount => ClassCount;
Copy link
Member Author

@sfilipi sfilipi Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

protecte [](start = 6, length = 10)

revert #Resolved

public int BiasCount => ClassCount;
protected int WeightCount => ClassCount * NumFeatures;
protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory,
out VBuffer<float> init, out ITerminationCriterion terminationCriterion)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public sealed class Arguments : ArgumentsBase
public bool ShowTrainingStats = false;
}

private Double _posWeight;
private double _posWeight;
private LinearModelStatistics _stats;

/// <summary>
Expand Down Expand Up @@ -329,8 +329,9 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
}
}
}

_stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
_stats.Hessian = hessian;
_stats.WeightIndices = weightIndices;
}

protected override void ProcessPriorDistribution(float label, float weight)
Expand Down
128 changes: 115 additions & 13 deletions src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Linq;
using MathNet.Numerics.LinearAlgebra;
Copy link
Member

@eerhardt eerhardt Nov 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this using necessary? I don't see any Math.NET usages below. #Resolved

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.CpuMath;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Runtime.Model;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

// This is for deserialization from a model repository.
[assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
Expand Down Expand Up @@ -82,15 +82,20 @@ private static VersionInfo GetVersionInfo()
// It could be null when there are too many non-zero weights so that
// the memory is insufficient to hold the Hessian matrix necessary for the computation
// of the variance-covariance matrix.
private readonly VBuffer<Single>? _coeffStdError;
private VBuffer<Single>? _coeffStdError;

public long TrainingExampleCount => _trainingExampleCount;

public long TrainingExampleCount { get { return _trainingExampleCount; } }
public Single Deviance => _deviance;

public Single Deviance { get { return _deviance; } }
public Single NullDeviance => _nullDeviance;

public Single NullDeviance { get { return _nullDeviance; } }
public int ParametersCount => _paramCount;

public int ParametersCount { get { return _paramCount; } }
public Double[] Hessian;

// Indices of bias and non-zero weight slots.
public int[] WeightIndices;

internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
{
Expand All @@ -111,7 +116,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
_coeffStdError = coeffStdError;
}

public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
_env = env;
Expand Down Expand Up @@ -157,7 +162,7 @@ public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
_coeffStdError = new VBuffer<Single>(length, _paramCount, stdErrorValues, stdErrorIndices);
}

public static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
Expand Down Expand Up @@ -208,6 +213,10 @@ private void SaveCore(ModelSaveContext ctx)
ctx.Writer.WriteIntsNoCount(_coeffStdError.Value.Indices, _paramCount);
}

/// <summary>
/// Computes the standart deviation, Z-Score and p-Value.
/// Should be called after <see cref="ComputeStd(LinearBinaryPredictor, IChannel, float)"/>.
/// </summary>
public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue)
{
if (!stats._coeffStdError.HasValue)
Expand All @@ -222,10 +231,97 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
stdError = stats._coeffStdError.Value.Values[0];
Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
zScore = bias / stdError;
pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
return true;
}

/// <summary>
/// Computes the standart deviation matrix of each of the non-zero training weights, needed to calculate further the standart deviation,
/// p-value and z-Score.
/// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
/// </summary>
/// <param name="model">A <see cref="LinearBinaryPredictor"/> obtained as a result of training with <see cref="LogisticRegression"/>.</param>
/// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
/// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
public static void ComputeStd(LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
Copy link
Member Author

@sfilipi sfilipi Nov 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ComputeStd [](start = 27, length = 10)

I do dislike it being here, but also putting them in the LinearBinaryPredictor doesn't make sense, since the computations are really only if the model gets generated through LR training (but the LinearBinaryPredictor is the predictor for most of our binary classifiers). @[email protected] I am starting to think that the best place for it might indeed be the LR trainer. Maybe a static method in the LR trainer with the same signature as here. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I'll refactor another class out of the LinearBinaryPredictor that will have the Stats object, and use that as the model produced the Lbfgs trainers.


In reply to: 231417895 [](ancestors = 231417895)

{
Contracts.AssertValue(ch);
Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
Contracts.Assert(l2Weight > 0);

int numSelectedParams = model.Statistics.ParametersCount;

double[] hessian = model.Statistics.Hessian;
double[,] matrixHessian = new double[numSelectedParams, numSelectedParams];

int hessianLength = 0;
int dimention = numSelectedParams - 1;

for (int row = dimention; row >= 0; row--)
{
for (int col = 0; col <= dimention; col++)
{
if ((row + col) <= dimention)
{
if ((row + col) == dimention)
{
matrixHessian[row, col] = hessian[hessianLength];
}
else
{
matrixHessian[row, col] = hessian[hessianLength];
matrixHessian[dimention - col, dimention - row] = hessian[hessianLength];
}
hessianLength++;
}
else
continue;
}
}

var h = Matrix<double>.Build.DenseOfArray(matrixHessian);
var invers = h.Inverse();

float[] stdErrorValues2 = new float[numSelectedParams];
stdErrorValues2[0] = (float)Math.Sqrt(invers[0, numSelectedParams - 1]);

for (int i = 1; i < numSelectedParams; i++)
{
// Initialize with inverse Hessian.
// The diagonal of the inverse Hessian.
stdErrorValues2[i] = (Single)invers[i, numSelectedParams - i - 1];
}

if (l2Weight > 0)
{
// Iterate through all entries of inverse Hessian to make adjustment to variance.
// A discussion on ridge regularized LR coefficient covariance matrix can be found here:
// http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
int ioffset = 1;
for (int iRow = 1; iRow < numSelectedParams; iRow++)
{
for (int iCol = 0; iCol <= iRow; iCol++)
{
float entry = (float)invers[iRow, numSelectedParams - iCol - 1];
var adjustment = -l2Weight * entry * entry;
stdErrorValues2[iRow] -= adjustment;

if (0 < iCol && iCol < iRow)
stdErrorValues2[iCol] -= adjustment;
ioffset++;
}
}
}

for (int i = 1; i < numSelectedParams; i++)
stdErrorValues2[i] = (float)Math.Sqrt(stdErrorValues2[i]);

var currentWeightsCount = model.Weights2.Count + 1; // adding one for the bias
VBuffer<float> stdErrors = new VBuffer<float>(currentWeightsCount, numSelectedParams, stdErrorValues2, model.Statistics.WeightIndices);
model.Statistics.SetCoeffStdError(stdErrors);
}

private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer<Single> weights, in VBuffer<ReadOnlyMemory<char>> names,
ref VBuffer<Single> estimate, ref VBuffer<Single> stdErr, ref VBuffer<Single> zScore, ref VBuffer<Single> pValue, out ValueGetter<VBuffer<ReadOnlyMemory<char>>> getSlotNames)
{
Expand Down Expand Up @@ -285,6 +381,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat
};
}

public void SetCoeffStdError(VBuffer<Single> coeffStdError)
{
_env.Assert(coeffStdError.Count == _paramCount);
_coeffStdError = coeffStdError;
}

private IEnumerable<CoefficientStatistics> GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema)
{
Contracts.AssertValue(_env);
Expand Down
Loading