Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/Microsoft.ML.HalLearners/LogisticRegressionTrainingStats.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Trainers.HalLearners;
using System;

namespace Microsoft.ML.Runtime.Learners
{
using Mkl = OlsLinearRegressionTrainer.Mkl;

/// <include file='doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
/// <include file='doc.xml' path='docs/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
public static class LogisticRegressionTrainingStats
{

public static void ComputeExtendedTrainingStatistics(this LinearBinaryPredictor model, IChannel ch, float l2Weight = LogisticRegression.Arguments.Defaults.L2Weight)
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function name can be just compute(...) because it's inside LogisticRegressionTrainingStats. #Resolved

{
Contracts.AssertValue(ch);
Contracts.AssertValue(model.Statistics, $"Training Statistics can get generated after training finishes. Train with setting: ShowTrainigStats set to true.");
Contracts.Assert(l2Weight > 0);

int numSelectedParams = model.Statistics.ParametersCount;

// Apply Cholesky Decomposition to find the inverse of the Hessian.
Double[] invHessian = null;
try
{
// First, find the Cholesky decomposition LL' of the Hessian.
Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
// Note that hessian is already modified at this point. It is no longer the original Hessian,
// but instead represents the Cholesky decomposition L.
// Also note that the following routine is supposed to consume the Cholesky decomposition L instead
// of the original information matrix.
Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numSelectedParams, model.Statistics.Hessian);
// At this point, hessian should contain the inverse of the original Hessian matrix.
// Swap hessian with invHessian to avoid confusion in the following context.
Utils.Swap(ref model.Statistics.Hessian, ref invHessian);
Contracts.Assert(model.Statistics.Hessian == null);
}
catch (DllNotFoundException)
{
throw ch.ExceptNotSupp("The MKL library (MklImports.dll) or one of its dependencies is missing.");
}

float[] stdErrorValues = new float[numSelectedParams];
stdErrorValues[0] = (float)Math.Sqrt(invHessian[0]);

for (int i = 1; i < numSelectedParams; i++)
{
// Initialize with inverse Hessian.
stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
}

if (l2Weight > 0)
{
// Iterate through all entries of inverse Hessian to make adjustment to variance.
// A discussion on ridge regularized LR coefficient covariance matrix can be found here:
// http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really don't like putting a very old doc inline and asking someone to read the whole article.

Suggested change
// http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
// http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
``` #Resolved

// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
// http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
``` #Resolved

int ioffset = 1;
for (int iRow = 1; iRow < numSelectedParams; iRow++)
{
for (int iCol = 0; iCol <= iRow; iCol++)
{
var entry = (Single)invHessian[ioffset];
var adjustment = -l2Weight * entry * entry;
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line doesn't make a lot of sense to me. The code first compute \sigma=sqrt(x+\lambda) and them do something like \sigma + \lambda * \sigma^2. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll review it too. Thanks for pointing out.


In reply to: 230916692 [](ancestors = 230916692)

stdErrorValues[iRow] -= adjustment;
if (0 < iCol && iCol < iRow)
stdErrorValues[iCol] -= adjustment;
ioffset++;
}
}

Contracts.Assert(ioffset == invHessian.Length);
}

for (int i = 1; i < numSelectedParams; i++)
stdErrorValues[i] = (float)Math.Sqrt(stdErrorValues[i]);

VBuffer<float> stdErrors = new VBuffer<float>(model.Weights2.Count, numSelectedParams, stdErrorValues, model.Statistics.WeightIndices);
model.Statistics.SetCoeffStdError(stdErrors);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight
[Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)]
public bool EnforceNonNegativity = Defaults.EnforceNonNegativity;

internal static class Defaults
public static class Defaults
Copy link
Member Author

@sfilipi sfilipi Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public [](start = 11, length = 7)

revert #Resolved

{
internal const float L2Weight = 1;
internal const float L1Weight = 1;
internal const float OptTol = 1e-7f;
internal const int MemorySize = 20;
internal const int MaxIterations = int.MaxValue;
internal const bool EnforceNonNegativity = false;
public const float L2Weight = 1;
public const float L1Weight = 1;
public const float OptTol = 1e-7f;
public const int MemorySize = 20;
public const int MaxIterations = int.MaxValue;
public const bool EnforceNonNegativity = false;
}
}

Expand Down Expand Up @@ -258,7 +258,7 @@ private static TArgs ArgsInit(string featureColumn, SchemaShape.Column labelColu
}

protected virtual int ClassCount => 1;
protected int BiasCount => ClassCount;
Copy link
Member Author

@sfilipi sfilipi Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

protecte [](start = 6, length = 10)

revert #Resolved

public int BiasCount => ClassCount;
protected int WeightCount => ClassCount * NumFeatures;
protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory,
out VBuffer<float> init, out ITerminationCriterion terminationCriterion)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public sealed class Arguments : ArgumentsBase
public bool ShowTrainingStats = false;
}

private Double _posWeight;
private double _posWeight;
private LinearModelStatistics _stats;

/// <summary>
Expand Down Expand Up @@ -329,8 +329,9 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
}
}
}

_stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
_stats.Hessian = hessian;
_stats.WeightIndices = weightIndices;
}

protected override void ProcessPriorDistribution(float label, float weight)
Expand Down
36 changes: 23 additions & 13 deletions src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Linq;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.CpuMath;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Runtime.Model;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

// This is for deserialization from a model repository.
[assembly: LoadableClass(typeof(LinearModelStatistics), null, typeof(SignatureLoadModel),
Expand Down Expand Up @@ -82,17 +81,22 @@ private static VersionInfo GetVersionInfo()
// It could be null when there are too many non-zero weights so that
// the memory is insufficient to hold the Hessian matrix necessary for the computation
// of the variance-covariance matrix.
private readonly VBuffer<Single>? _coeffStdError;
private VBuffer<Single>? _coeffStdError;

public long TrainingExampleCount { get { return _trainingExampleCount; } }
public long TrainingExampleCount => _trainingExampleCount;

public Single Deviance { get { return _deviance; } }
public Single Deviance => _deviance;

public Single NullDeviance { get { return _nullDeviance; } }
public Single NullDeviance => _nullDeviance;

public int ParametersCount { get { return _paramCount; } }
public int ParametersCount => _paramCount;

internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
public Double[] Hessian;

// Indices of bias and non-zero weight slots.
public int[] WeightIndices;

public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance)
{
Contracts.AssertValue(env);
env.Assert(trainingExampleCount > 0);
Expand All @@ -104,7 +108,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount,
_nullDeviance = nullDeviance;
}

internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer<Single> coeffStdError)
public LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer<Single> coeffStdError)
: this(env, trainingExampleCount, paramCount, deviance, nullDeviance)
{
_env.Assert(coeffStdError.Count == _paramCount);
Expand Down Expand Up @@ -222,7 +226,7 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias
stdError = stats._coeffStdError.Value.Values[0];
Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
zScore = bias / stdError;
pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
return true;
}

Expand Down Expand Up @@ -285,6 +289,12 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat
};
}

public void SetCoeffStdError(VBuffer<Single> coeffStdError)
{
_env.Assert(coeffStdError.Count == _paramCount);
_coeffStdError = coeffStdError;
}

private IEnumerable<CoefficientStatistics> GetUnorderedCoefficientStatistics(LinearBinaryPredictor parent, RoleMappedSchema schema)
{
Contracts.AssertValue(_env);
Expand Down
29 changes: 29 additions & 0 deletions test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Calibration;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Xunit;
Expand Down Expand Up @@ -38,5 +39,33 @@ public void TestEstimatorPoissonRegression()
TestEstimatorCore(pipe, dataView);
Done();
}

[Fact]
public void TestLogisticRegressionStats()
Copy link
Member Author

@sfilipi sfilipi Nov 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TestLogisticRegressionStats [](start = 20, length = 27)

combine both tests together #WontFix

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently running into issues with this. Will investigate and log a bug.


In reply to: 231416451 [](ancestors = 231416451)

{
(IEstimator<ITransformer> pipe, IDataView dataView) = GetBinaryClassificationPipeline();

pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label", advancedSettings: s => { s.ShowTrainingStats = true; }));
var transformerChain = pipe.Fit(dataView) as TransformerChain<BinaryPredictionTransformer<ParameterMixingCalibratedPredictor>>;

var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryPredictor;
var stats = linearModel.Statistics;

LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue);

Assert.Equal(0.0f, stdError);
Assert.Equal(0.0f, zScore);
Assert.Equal(0.0f, pValue);

using (var ch = Env.Start("Calcuating STD for LR."))
linearModel.ComputeExtendedTrainingStatistics(ch);

LinearModelStatistics.TryGetBiasStatistics(stats, 2, out stdError, out zScore, out pValue);

Assert.True(stdError > 0);
Copy link
Member

@wschin wschin Nov 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May we put a more strict range for each statistic? #Resolved

Assert.True(zScore > 0);

Done();
}
}
}