Skip to content

Fix MatchNumberWithTolerance to better compare floating-point values #1145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ public void EarlyStoppingTest()
[TestCategory("Logistic Regression")]
public void MulticlassLRTest()
{
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.iris, precision: 10_000);
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.iris, digitsOfPrecision: 4);
Done();
}

Expand All @@ -173,7 +173,7 @@ public void MulticlassLRTest()
[TestCategory("Logistic Regression")]
public void MulticlassLRNonNegativeTest()
{
RunOneAllTests(TestLearners.multiclassLogisticRegressionNonNegative, TestDatasets.iris, precision: 10_000);
RunOneAllTests(TestLearners.multiclassLogisticRegressionNonNegative, TestDatasets.iris, digitsOfPrecision: 4);
Done();
}

Expand All @@ -200,8 +200,8 @@ public void MulticlassTreeFeaturizedLRTest()
{
RunMTAThread(() =>
{
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturized, precision: 10_000);
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturizedPermuted, precision: 10_000);
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturized, digitsOfPrecision: 4);
RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturizedPermuted, digitsOfPrecision: 4);
});
Done();
}
Expand Down
49 changes: 32 additions & 17 deletions test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace Microsoft.ML.Runtime.RunTests
/// </summary>
public abstract partial class BaseTestBaseline : BaseTestClass
{
public const decimal Tolerance = 10_000_000;
public const int DigitsOfPrecision = 7;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public [](start = 8, length = 6)

nit: can it be internal?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, but the existing code already had the previous constant as public.

Copy link
Member

@eerhardt eerhardt Oct 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like it is used by inherited classes. And this is "just tests" so internal vs. public may not make that much of a difference.


In reply to: 222529405 [](ancestors = 222529405)


protected BaseTestBaseline(ITestOutputHelper output) : base(output)
{
Expand Down Expand Up @@ -352,12 +352,12 @@ protected bool CheckEquality(string dir, string name, string nameBase = null)
/// Check whether two files are same ignoring volatile differences (path, dates, times, etc).
/// Returns true if the check passes.
/// </summary>
protected bool CheckEqualityNormalized(string dir, string name, string nameBase = null, decimal precision = Tolerance)
protected bool CheckEqualityNormalized(string dir, string name, string nameBase = null, int digitsOfPrecision = DigitsOfPrecision)
{
return CheckEqualityCore(dir, name, nameBase ?? name, true, precision);
return CheckEqualityCore(dir, name, nameBase ?? name, true, digitsOfPrecision);
}

protected bool CheckEqualityCore(string dir, string name, string nameBase, bool normalize, decimal precision = Tolerance)
protected bool CheckEqualityCore(string dir, string name, string nameBase, bool normalize, int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(IsActive);
Contracts.AssertValue(dir); // Can be empty.
Expand All @@ -384,7 +384,7 @@ protected bool CheckEqualityCore(string dir, string name, string nameBase, bool
if (!CheckBaseFile(basePath))
return false;

bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, precision: precision);
bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, digitsOfPrecision: digitsOfPrecision);

// No need to keep the raw (unnormalized) output file.
if (normalize && res)
Expand Down Expand Up @@ -501,7 +501,7 @@ protected bool CheckOutputIsSuffix(string basePath, string outPath, int skip = 0
/// skipping the given number of lines on the output, and finding the corresponding line
/// in the baseline.
/// </summary>
protected bool CheckEqualityNormalized(string dir, string name, string suffix, int skip, decimal precision = Tolerance)
protected bool CheckEqualityNormalized(string dir, string name, string suffix, int skip, int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(IsActive);
Contracts.AssertValue(dir); // Can be empty.
Expand All @@ -522,7 +522,7 @@ protected bool CheckEqualityNormalized(string dir, string name, string suffix, i
if (!CheckBaseFile(basePath))
return false;

bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, skip, precision);
bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, skip, digitsOfPrecision);

// No need to keep the raw (unnormalized) output file.
if (res)
Expand All @@ -531,7 +531,7 @@ protected bool CheckEqualityNormalized(string dir, string name, string suffix, i
return res;
}

protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, decimal precision = Tolerance)
protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(skip >= 0);

Expand Down Expand Up @@ -578,9 +578,7 @@ protected bool CheckEqualityFromPathsCore(string relPath, string basePath, strin
}

count++;

if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
GetNumbersFromFile(ref line1, ref line2, precision);
GetNumbersFromFile(ref line1, ref line2, digitsOfPrecision);

if (line1 != line2)
{
Expand All @@ -594,28 +592,45 @@ protected bool CheckEqualityFromPathsCore(string relPath, string basePath, strin
}
}

private static void GetNumbersFromFile(ref string firstString, ref string secondString, decimal precision)
private static void GetNumbersFromFile(ref string firstString, ref string secondString, int digitsOfPrecision)
{
Regex _matchNumer = new Regex(@"\b[0-9]+\.?[0-9]*\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
MatchCollection firstCollection = _matchNumer.Matches(firstString);
MatchCollection secondCollection = _matchNumer.Matches(secondString);

MatchNumberWithTolerance(firstCollection, secondCollection, precision);
MatchNumberWithTolerance(firstCollection, secondCollection, digitsOfPrecision);
firstString = _matchNumer.Replace(firstString, "%Number%");
secondString = _matchNumer.Replace(secondString, "%Number%");
}

private static void MatchNumberWithTolerance(MatchCollection firstCollection, MatchCollection secondCollection, decimal precision)
private static void MatchNumberWithTolerance(MatchCollection firstCollection, MatchCollection secondCollection, int digitsOfPrecision)
{
for (int i = 0; i < firstCollection.Count; i++)
{
decimal f1 = decimal.Parse(firstCollection[i].ToString());
decimal f2 = decimal.Parse(secondCollection[i].ToString());
double f1 = double.Parse(firstCollection[i].ToString());
double f2 = double.Parse(secondCollection[i].ToString());

double allowedVariance = Math.Pow(10, -digitsOfPrecision);
double delta = Round(f1, digitsOfPrecision) - Round(f2, digitsOfPrecision);

Assert.InRange(f1, f2 - (f2 / precision), f2 + (f2 / precision));
Assert.InRange(delta, -allowedVariance, allowedVariance);
}
}

private static double Round(double value, int digitsOfPrecision)
{
if ((value == 0) || double.IsInfinity(value) || double.IsNaN(value))
{
return value;
}

double absValue = Math.Abs(value);
double integralDigitCount = Math.Floor(Math.Log10(absValue) + 1);

double scale = Math.Pow(10, integralDigitCount);
return scale * Math.Round(value / scale, digitsOfPrecision);
}

#if TOLERANCE_ENABLED
// This corresponds to how much relative error is tolerable for a value of 0.
const Float RelativeToleranceStepSize = (Float)0.001;
Expand Down
20 changes: 10 additions & 10 deletions test/Microsoft.ML.TestFramework/BaseTestPredictorsMaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public TestImpl(RunContextBase ctx) :
/// <summary>
/// Run the predictor with given args and check if it adds up
/// </summary>
protected void Run(RunContext ctx, decimal precision = Tolerance)
protected void Run(RunContext ctx, int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(IsActive);
List<string> args = new List<string>();
Expand Down Expand Up @@ -164,7 +164,7 @@ protected void Run(RunContext ctx, decimal precision = Tolerance)
}
var consOutPath = ctx.StdoutPath();
TestCore(ctx, ctx.Command.ToString(), runcmd);
bool matched = consOutPath.CheckEqualityNormalized(precision);
bool matched = consOutPath.CheckEqualityNormalized(digitsOfPrecision);

if (modelPath != null && (ctx.Summary || ctx.SaveAsIni))
{
Expand All @@ -190,7 +190,7 @@ protected void Run(RunContext ctx, decimal precision = Tolerance)
}

MainForTest(Env, LogWriter, str);
files.ForEach(file => CheckEqualityNormalized(dir, file, precision: precision));
files.ForEach(file => CheckEqualityNormalized(dir, file, digitsOfPrecision: digitsOfPrecision));
}

if (ctx.Command == Cmd.Train || ctx.Command == Cmd.Test || ctx.ExpectedToFail)
Expand Down Expand Up @@ -351,11 +351,11 @@ protected void RunAllTests(
/// Run TrainTest, CV, and TrainSaveTest for a single predictor on a single dataset.
/// </summary>
protected void RunOneAllTests(PredictorAndArgs predictor, TestDataset dataset,
string[] extraSettings = null, string extraTag = "", bool summary = false, decimal precision = Tolerance)
string[] extraSettings = null, string extraTag = "", bool summary = false, int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(IsActive);
Run_TrainTest(predictor, dataset, extraSettings, extraTag, summary: summary, precision: precision);
Run_CV(predictor, dataset, extraSettings, extraTag, useTest: true, precision: precision);
Run_TrainTest(predictor, dataset, extraSettings, extraTag, summary: summary, digitsOfPrecision: digitsOfPrecision);
Run_CV(predictor, dataset, extraSettings, extraTag, useTest: true, digitsOfPrecision: digitsOfPrecision);
}

/// <summary>
Expand Down Expand Up @@ -383,10 +383,10 @@ protected RunContext Run_Train(PredictorAndArgs predictor, TestDataset dataset,
/// Run a train-test unit test
/// </summary>
protected void Run_TrainTest(PredictorAndArgs predictor, TestDataset dataset,
string[] extraSettings = null, string extraTag = "", bool expectFailure = false, bool summary = false, bool saveAsIni = false, decimal precision = Tolerance)
string[] extraSettings = null, string extraTag = "", bool expectFailure = false, bool summary = false, bool saveAsIni = false, int digitsOfPrecision = DigitsOfPrecision)
{
RunContext ctx = new RunContext(this, Cmd.TrainTest, predictor, dataset, extraSettings, extraTag, expectFailure: expectFailure, summary: summary, saveAsIni: saveAsIni);
Run(ctx, precision);
Run(ctx, digitsOfPrecision);
}

// REVIEW: Remove TrainSaveTest and supporting code.
Expand Down Expand Up @@ -421,7 +421,7 @@ protected void Run_Test(PredictorAndArgs predictor, TestDataset dataset, string
/// <paramref name="useTest"/> is set.
/// </summary>
protected void Run_CV(PredictorAndArgs predictor, TestDataset dataset,
string[] extraSettings = null, string extraTag = "", bool useTest = false, decimal precision = Tolerance)
string[] extraSettings = null, string extraTag = "", bool useTest = false, int digitsOfPrecision = DigitsOfPrecision)
{
if (useTest)
{
Expand All @@ -431,7 +431,7 @@ protected void Run_CV(PredictorAndArgs predictor, TestDataset dataset,
dataset.trainFilename = dataset.testFilename;
}
RunContext cvCtx = new RunContext(this, Cmd.CV, predictor, dataset, extraSettings, extraTag);
Run(cvCtx, precision);
Run(cvCtx, digitsOfPrecision);
}

/// <summary>
Expand Down
4 changes: 2 additions & 2 deletions test/Microsoft.ML.TestFramework/TestCommandBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ public bool CheckEquality()
return _testCmd.CheckEquality(_dir, _name);
}

public bool CheckEqualityNormalized(decimal precision = Tolerance)
public bool CheckEqualityNormalized(int digitsOfPrecision = DigitsOfPrecision)
{
Contracts.Assert(CanBeBaselined);
return _testCmd.CheckEqualityNormalized(_dir, _name, precision: precision);
return _testCmd.CheckEqualityNormalized(_dir, _name, digitsOfPrecision: digitsOfPrecision);
}

public string ArgStr(string name)
Expand Down