Skip to content

Commit 024bd44

Browse files
authored
Enable a QuantileRegression Test & Fix Duplicated Baseline Files (#1193)
* Enable a QuantileRegression test Enable CommandTrainScoreEvaluateQuantileRegression, add the dataset and the necessary baselines. The only baseline changes were of the form that were caused by https://github.com/dotnet/corefx/issues/31847. * Allow BaseTestBaseline to check Common first. A lot of baseline tests have duplicated baselines between debug and release. Allow BaseTestBaseline to check the Common baseline directory for a baseline file first. Fix #410 * PR feedback Add more info to the README. Add column headers to the housing.txt data file.
1 parent 5684398 commit 024bd44

11 files changed

+1607
-126
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Wrote 506 rows across 3 columns in %Time%
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Wrote 506 rows of length 19

test/BaselineOutput/Common/Command/CommandTrainScoreEvaluateQuantileRegression-3-metrics.txt

Lines changed: 507 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
L1(avg): 1.926877
2+
L2(avg): 10.630326
3+
RMS(avg): 3.260418
4+
Loss-fn(avg): 10.630326
5+
R Squared: 0.874077
6+
7+
OVERALL RESULTS
8+
---------------------------------------
9+
L1(avg): 1.926877 (0.0000)
10+
L2(avg): 10.630326 (0.0000)
11+
RMS(avg): 3.260418 (0.0000)
12+
Loss-fn(avg): 10.630326 (0.0000)
13+
R Squared: 0.874077 (0.0000)
14+
15+
---------------------------------------
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
L1(avg): 1.926877
2+
L2(avg): 10.630326
3+
RMS(avg): 3.260418
4+
Loss-fn(avg): 10.630326
5+
R Squared: 0.874077
6+
7+
OVERALL RESULTS
8+
---------------------------------------
9+
L1(avg): 1.926877 (0.0000)
10+
L2(avg): 10.630326 (0.0000)
11+
RMS(avg): 3.260418 (0.0000)
12+
Loss-fn(avg): 10.630326 (0.0000)
13+
R Squared: 0.874077 (0.0000)
14+
15+
---------------------------------------

test/BaselineOutput/Common/Command/CommandTrainScoreEvaluateQuantileRegression-data.txt

Lines changed: 514 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
maml.exe Train tr=FastForestRegression{dt+} loader=Text{header+} data=%Data% out=%Output%
2+
Not adding a normalizer.
3+
Making per-feature arrays
4+
Changing data from row-wise to column-wise on disk
5+
Processed 506 instances
6+
Binning and forming Feature objects
7+
Reserved memory for tree learner: 290472 bytes
8+
Starting to train ...
9+
Not training a calibrator because it is not needed.
10+
Physical memory usage(MB): %Number%
11+
Virtual memory usage(MB): %Number%
12+
%DateTime% Time elapsed(s): %Number%
13+

test/Microsoft.ML.TestFramework/BaseTestBaseline.cs

Lines changed: 23 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
using System;
1010
using System.Collections.Generic;
1111
using System.IO;
12-
using System.Runtime.InteropServices;
1312
using System.Text;
1413
using System.Text.RegularExpressions;
1514
using System.Threading;
@@ -31,12 +30,8 @@ protected BaseTestBaseline(ITestOutputHelper output) : base(output)
3130

3231
internal const string RawSuffix = ".raw";
3332
private const string LogSuffix = ".log";
34-
private readonly string _baselineRootRelPath = Path.Combine(TestDir, "BaselineOutput", BuildString); // Relative to Root.
3533
private readonly string _logRootRelPath = Path.Combine("Logs", BuildString); // Relative to OutDir.
36-
private readonly string ScopeRootRelPath = Path.Combine("Samples", "scope"); // Root of files required for Scope related tests. Used primarily for local runs
37-
private readonly string TestExtDir = Path.Combine("Tests", "Ext"); // Directory with external binaries checked in. Eg libvw.dll
3834

39-
private const string SamplesRootRelPath = @"Samples"; // Root location of Samples. Used primarily for local runs
4035
private const string TestDir = @"test";
4136

4237
private const string DataRootRegExp = @"[a-z]:\\[^/\t ]+\\test\\data" + @"\\[^/\t ]+";
@@ -70,10 +65,9 @@ protected BaseTestBaseline(ITestOutputHelper output) : base(output)
7065
/// </summary>
7166
protected const string ProgressLogLine = "--- Progress log ---";
7267

73-
private static readonly char[] _seperators = { '\t', ' ', '=', '%', '(', ')' };
74-
75-
// Full paths to the directories.
76-
private string _baseDir;
68+
// Full paths to the baseline directories.
69+
private string _baselineCommonDir;
70+
private string _baselineBuildStringDir;
7771

7872
// The writer to write to test log files.
7973
protected StreamWriter LogWriter;
@@ -86,13 +80,15 @@ protected override void Initialize()
8680
base.Initialize();
8781

8882
// Create the output and log directories.
89-
Contracts.Check(Directory.Exists(Path.Combine(RootDir, TestDir, "BaselineOutput")));
83+
string baselineRootDir = Path.Combine(RootDir, TestDir, "BaselineOutput");
84+
Contracts.Check(Directory.Exists(baselineRootDir));
85+
86+
_baselineCommonDir = Path.Combine(baselineRootDir, "Common");
87+
_baselineBuildStringDir = Path.Combine(baselineRootDir, BuildString);
88+
9089
string logDir = Path.Combine(OutDir, _logRootRelPath);
9190
Directory.CreateDirectory(logDir);
9291

93-
// Find the sample data and baselines.
94-
_baseDir = Path.Combine(RootDir, _baselineRootRelPath);
95-
9692
string logPath = Path.Combine(logDir, FullTestName + LogSuffix);
9793
LogWriter = OpenWriter(logPath);
9894
_passed = true;
@@ -125,19 +121,6 @@ protected override void Cleanup()
125121

126122
protected bool IsPassing { get { return _passed; } }
127123

128-
// Return the location of the local Samples folder
129-
// Used primarily for local Scope runs
130-
protected string SamplesDir { get { return Path.Combine(RootDir, SamplesRootRelPath); } }
131-
132-
// Return the location of the local scope folder under Samples. Used primarily
133-
// by Scope scripts and for Scope tests
134-
protected string ScopeSamplesDir { get { return Path.Combine(RootDir, ScopeRootRelPath); } }
135-
136-
// Return the local of the directory where external binaries for test purposes are located
137-
protected string ExternalTestBinariesDir { get { return Path.Combine(RootDir, TestExtDir); } }
138-
139-
protected string TestDirectory { get { return Path.Combine(RootDir, TestDir); } }
140-
141124
// Called by a test to signal normal completion. If this is not called before the
142125
// TestScope is disposed, we assume the test was aborted.
143126
protected void Done()
@@ -198,31 +181,28 @@ protected void Log(string fmt, params object[] args)
198181
Output.WriteLine(fmt, args);
199182
}
200183

201-
protected string GetBaselineDir(string subDir)
184+
protected string GetBaselinePath(string name)
202185
{
203186
Contracts.Assert(IsActive);
204-
if (string.IsNullOrWhiteSpace(subDir))
187+
if (string.IsNullOrWhiteSpace(name))
205188
return null;
206-
return Path.GetFullPath(Path.Combine(_baseDir, subDir));
207-
//return Path.Combine(_baseDir, subDir);
189+
190+
return GetBaselinePath(string.Empty, name);
208191
}
209192

210193
protected string GetBaselinePath(string subDir, string name)
211194
{
212195
Contracts.Assert(IsActive);
213-
if (string.IsNullOrWhiteSpace(subDir))
214-
return GetBaselinePath(name);
215-
return Path.GetFullPath(Path.Combine(_baseDir, subDir, name));
216-
//return Path.Combine(_baseDir, subDir, name);
217-
}
196+
subDir = subDir ?? string.Empty;
218197

219-
protected string GetBaselinePath(string name)
220-
{
221-
Contracts.Assert(IsActive);
222-
if (string.IsNullOrWhiteSpace(name))
223-
return null;
224-
//return Path.Combine(_baseDir, name);
225-
return Path.GetFullPath(Path.Combine(_baseDir, name));
198+
// first check the Common folder, and use it if it exists
199+
string commonBaselinePath = Path.GetFullPath(Path.Combine(_baselineCommonDir, subDir, name));
200+
if (File.Exists(commonBaselinePath))
201+
{
202+
return commonBaselinePath;
203+
}
204+
205+
return Path.GetFullPath(Path.Combine(_baselineBuildStringDir, subDir, name));
226206
}
227207

228208
// Inverts the _passed flag. Do not ever use this except in rare conditions. Eg. Recording failure of a test as a success.
@@ -393,36 +373,6 @@ protected bool CheckEqualityCore(string dir, string name, string nameBase, bool
393373
return res;
394374
}
395375

396-
/// <summary>
397-
/// Check whether two files are same ignoring volatile differences (path, dates, times, etc),
398-
/// skipping the given number of lines on the output, and finding the corresponding line
399-
/// in the baseline.
400-
/// </summary>
401-
protected bool CheckEqualityNormalizedFromPaths(string desc, string basePath, string outPath, int skip = 0)
402-
{
403-
Contracts.Assert(IsActive);
404-
Contracts.AssertNonEmpty(basePath);
405-
Contracts.AssertNonEmpty(outPath);
406-
Contracts.Assert(skip >= 0);
407-
408-
if (!CheckOutFile(outPath))
409-
return false;
410-
411-
// Normalize the output file.
412-
Normalize(outPath);
413-
414-
if (!CheckBaseFile(basePath))
415-
return false;
416-
417-
bool res = CheckEqualityFromPathsCore(desc, basePath, outPath, skip);
418-
419-
// No need to keep the raw (unnormalized) output file.
420-
if (res)
421-
File.Delete(outPath + RawSuffix);
422-
423-
return res;
424-
}
425-
426376
private bool FirstIsSuffix<T>(IEnumerator<T> suffix, IEnumerator<T> seq, Func<T, T, bool> equalFunc = null)
427377
{
428378
Contracts.AssertValue(suffix);
@@ -464,11 +414,6 @@ private bool FirstIsSuffix<T>(IEnumerator<T> suffix, IEnumerator<T> seq, Func<T,
464414
return true;
465415
}
466416

467-
private IEnumerator<string> LineEnumerator(TextReader reader)
468-
{
469-
return LineEnumerator(reader, x => false);
470-
}
471-
472417
private IEnumerator<string> LineEnumerator(TextReader reader, Func<string, bool> stop)
473418
{
474419
string result;
@@ -496,41 +441,6 @@ protected bool CheckOutputIsSuffix(string basePath, string outPath, int skip = 0
496441
}
497442
}
498443

499-
/// <summary>
500-
/// Check whether two files are same ignoring volatile differences (path, dates, times, etc),
501-
/// skipping the given number of lines on the output, and finding the corresponding line
502-
/// in the baseline.
503-
/// </summary>
504-
protected bool CheckEqualityNormalized(string dir, string name, string suffix, int skip, int digitsOfPrecision = DigitsOfPrecision)
505-
{
506-
Contracts.Assert(IsActive);
507-
Contracts.AssertValue(dir); // Can be empty.
508-
Contracts.AssertNonEmpty(name);
509-
Contracts.AssertNonEmpty(suffix);
510-
Contracts.Assert(skip >= 0);
511-
512-
string relPath = Path.Combine(dir, name + suffix);
513-
string basePath = GetBaselinePath(dir, name);
514-
string outPath = GetOutputPath(dir, name + suffix);
515-
516-
if (!CheckOutFile(outPath))
517-
return false;
518-
519-
// Normalize the output file.
520-
Normalize(outPath);
521-
522-
if (!CheckBaseFile(basePath))
523-
return false;
524-
525-
bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, skip, digitsOfPrecision);
526-
527-
// No need to keep the raw (unnormalized) output file.
528-
if (res)
529-
File.Delete(outPath + RawSuffix);
530-
531-
return res;
532-
}
533-
534444
protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, int digitsOfPrecision = DigitsOfPrecision)
535445
{
536446
Contracts.Assert(skip >= 0);
@@ -838,7 +748,7 @@ protected static StreamReader OpenReader(string path)
838748
{
839749
Contracts.CheckNonWhiteSpace(path, nameof(path));
840750
#if CORECLR
841-
return new StreamReader(File.Open(path, FileMode.Open, FileAccess.Read));
751+
return new StreamReader(File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Read));
842752
#else
843753
return new StreamReader(path);
844754
#endif
@@ -857,16 +767,6 @@ protected static int MainForTest(string args)
857767
return result;
858768
}
859769
}
860-
861-
protected static string GetEnvironmentVariable(string name)
862-
{
863-
return Environment.GetEnvironmentVariable(name, EnvironmentVariableTarget.Process);
864-
}
865-
866-
protected static void SetEnvironmentVariable(string name, string value)
867-
{
868-
Environment.SetEnvironmentVariable(name, value, EnvironmentVariableTarget.Process);
869-
}
870770
}
871771

872772
public partial class TestBaselineNormalize : BaseTestBaseline

test/Microsoft.ML.TestFramework/TestCommandBase.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,15 +1662,15 @@ public void CommandTrainScoreEvaluateSdcaRegression()
16621662
}
16631663

16641664
[TestCategory(Cat), TestCategory("FastForest")]
1665-
[Fact(Skip = "Need CoreTLC specific baseline update")]
1665+
[Fact]
16661666
public void CommandTrainScoreEvaluateQuantileRegression()
16671667
{
16681668
RunMTAThread(() =>
16691669
{
16701670
// First run a training.
1671-
string pathData = GetDataPath(@"..\Housing (regression)", "housing.txt");
1671+
string pathData = GetDataPath("housing.txt");
16721672
OutputPath trainModel = ModelPath();
1673-
TestCore("train", pathData, "loader=text", "lab=Label feat=Features tr=FastForestRegression{dt+}");
1673+
TestCore("train", pathData, "loader=Text{header+}", "lab=Label feat=Features tr=FastForestRegression{dt+}");
16741674

16751675
// Then, run the score.
16761676
_step++;

test/data/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ The dataset is under a CC-by 4.0 license.
6565
}
6666
```
6767

68+
### Boston Housing Data
69+
70+
Redistributing the dataset "[housing.txt](housing.txt)" with attribution:
71+
72+
> Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.
73+
74+
More information: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.names
75+
6876
# Images
6977

7078
### Located in `images` folder

0 commit comments

Comments
 (0)