-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Enable a QuantileRegression Test & Fix Duplicated Baseline Files #1193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Wrote 506 rows across 3 columns in %Time% |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Wrote 506 rows of length 19 |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
L1(avg): 1.926877 | ||
L2(avg): 10.630326 | ||
RMS(avg): 3.260418 | ||
Loss-fn(avg): 10.630326 | ||
R Squared: 0.874077 | ||
|
||
OVERALL RESULTS | ||
--------------------------------------- | ||
L1(avg): 1.926877 (0.0000) | ||
L2(avg): 10.630326 (0.0000) | ||
RMS(avg): 3.260418 (0.0000) | ||
Loss-fn(avg): 10.630326 (0.0000) | ||
R Squared: 0.874077 (0.0000) | ||
|
||
--------------------------------------- |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
L1(avg): 1.926877 | ||
L2(avg): 10.630326 | ||
RMS(avg): 3.260418 | ||
Loss-fn(avg): 10.630326 | ||
R Squared: 0.874077 | ||
|
||
OVERALL RESULTS | ||
--------------------------------------- | ||
L1(avg): 1.926877 (0.0000) | ||
L2(avg): 10.630326 (0.0000) | ||
RMS(avg): 3.260418 (0.0000) | ||
Loss-fn(avg): 10.630326 (0.0000) | ||
R Squared: 0.874077 (0.0000) | ||
|
||
--------------------------------------- |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
maml.exe Train tr=FastForestRegression{dt+} loader=Text{header+} data=%Data% out=%Output% | ||
Not adding a normalizer. | ||
Making per-feature arrays | ||
Changing data from row-wise to column-wise on disk | ||
Processed 506 instances | ||
Binning and forming Feature objects | ||
Reserved memory for tree learner: 290472 bytes | ||
Starting to train ... | ||
Not training a calibrator because it is not needed. | ||
Physical memory usage(MB): %Number% | ||
Virtual memory usage(MB): %Number% | ||
%DateTime% Time elapsed(s): %Number% | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using System.Text.RegularExpressions; | ||
using System.Threading; | ||
|
@@ -31,12 +30,8 @@ protected BaseTestBaseline(ITestOutputHelper output) : base(output) | |
|
||
internal const string RawSuffix = ".raw"; | ||
private const string LogSuffix = ".log"; | ||
private readonly string _baselineRootRelPath = Path.Combine(TestDir, "BaselineOutput", BuildString); // Relative to Root. | ||
private readonly string _logRootRelPath = Path.Combine("Logs", BuildString); // Relative to OutDir. | ||
private readonly string ScopeRootRelPath = Path.Combine("Samples", "scope"); // Root of files required for Scope related tests. Used primarily for local runs | ||
private readonly string TestExtDir = Path.Combine("Tests", "Ext"); // Directory with external binaries checked in. Eg libvw.dll | ||
|
||
private const string SamplesRootRelPath = @"Samples"; // Root location of Samples. Used primarily for local runs | ||
private const string TestDir = @"test"; | ||
|
||
private const string DataRootRegExp = @"[a-z]:\\[^/\t ]+\\test\\data" + @"\\[^/\t ]+"; | ||
|
@@ -70,10 +65,9 @@ protected BaseTestBaseline(ITestOutputHelper output) : base(output) | |
/// </summary> | ||
protected const string ProgressLogLine = "--- Progress log ---"; | ||
|
||
private static readonly char[] _seperators = { '\t', ' ', '=', '%', '(', ')' }; | ||
|
||
// Full paths to the directories. | ||
private string _baseDir; | ||
// Full paths to the baseline directories. | ||
private string _baselineCommonDir; | ||
private string _baselineBuildStringDir; | ||
|
||
// The writer to write to test log files. | ||
protected StreamWriter LogWriter; | ||
|
@@ -86,13 +80,15 @@ protected override void Initialize() | |
base.Initialize(); | ||
|
||
// Create the output and log directories. | ||
Contracts.Check(Directory.Exists(Path.Combine(RootDir, TestDir, "BaselineOutput"))); | ||
string baselineRootDir = Path.Combine(RootDir, TestDir, "BaselineOutput"); | ||
Contracts.Check(Directory.Exists(baselineRootDir)); | ||
|
||
_baselineCommonDir = Path.Combine(baselineRootDir, "Common"); | ||
_baselineBuildStringDir = Path.Combine(baselineRootDir, BuildString); | ||
|
||
string logDir = Path.Combine(OutDir, _logRootRelPath); | ||
Directory.CreateDirectory(logDir); | ||
|
||
// Find the sample data and baselines. | ||
_baseDir = Path.Combine(RootDir, _baselineRootRelPath); | ||
|
||
string logPath = Path.Combine(logDir, FullTestName + LogSuffix); | ||
LogWriter = OpenWriter(logPath); | ||
_passed = true; | ||
|
@@ -125,19 +121,6 @@ protected override void Cleanup() | |
|
||
protected bool IsPassing { get { return _passed; } } | ||
|
||
// Return the location of the local Samples folder | ||
// Used primarily for local Scope runs | ||
protected string SamplesDir { get { return Path.Combine(RootDir, SamplesRootRelPath); } } | ||
|
||
// Return the location of the local scope folder under Samples. Used primarily | ||
// by Scope scripts and for Scope tests | ||
protected string ScopeSamplesDir { get { return Path.Combine(RootDir, ScopeRootRelPath); } } | ||
|
||
// Return the local of the directory where external binaries for test purposes are located | ||
protected string ExternalTestBinariesDir { get { return Path.Combine(RootDir, TestExtDir); } } | ||
|
||
protected string TestDirectory { get { return Path.Combine(RootDir, TestDir); } } | ||
|
||
// Called by a test to signal normal completion. If this is not called before the | ||
// TestScope is disposed, we assume the test was aborted. | ||
protected void Done() | ||
|
@@ -198,31 +181,28 @@ protected void Log(string fmt, params object[] args) | |
Output.WriteLine(fmt, args); | ||
} | ||
|
||
protected string GetBaselineDir(string subDir) | ||
protected string GetBaselinePath(string name) | ||
{ | ||
Contracts.Assert(IsActive); | ||
if (string.IsNullOrWhiteSpace(subDir)) | ||
if (string.IsNullOrWhiteSpace(name)) | ||
return null; | ||
return Path.GetFullPath(Path.Combine(_baseDir, subDir)); | ||
//return Path.Combine(_baseDir, subDir); | ||
|
||
return GetBaselinePath(string.Empty, name); | ||
} | ||
|
||
protected string GetBaselinePath(string subDir, string name) | ||
{ | ||
Contracts.Assert(IsActive); | ||
if (string.IsNullOrWhiteSpace(subDir)) | ||
return GetBaselinePath(name); | ||
return Path.GetFullPath(Path.Combine(_baseDir, subDir, name)); | ||
//return Path.Combine(_baseDir, subDir, name); | ||
} | ||
subDir = subDir ?? string.Empty; | ||
|
||
protected string GetBaselinePath(string name) | ||
{ | ||
Contracts.Assert(IsActive); | ||
if (string.IsNullOrWhiteSpace(name)) | ||
return null; | ||
//return Path.Combine(_baseDir, name); | ||
return Path.GetFullPath(Path.Combine(_baseDir, name)); | ||
// first check the Common folder, and use it if it exists | ||
string commonBaselinePath = Path.GetFullPath(Path.Combine(_baselineCommonDir, subDir, name)); | ||
if (File.Exists(commonBaselinePath)) | ||
{ | ||
return commonBaselinePath; | ||
} | ||
|
||
return Path.GetFullPath(Path.Combine(_baselineBuildStringDir, subDir, name)); | ||
} | ||
|
||
// Inverts the _passed flag. Do not ever use this except in rare conditions. Eg. Recording failure of a test as a success. | ||
|
@@ -393,36 +373,6 @@ protected bool CheckEqualityCore(string dir, string name, string nameBase, bool | |
return res; | ||
} | ||
|
||
/// <summary> | ||
/// Check whether two files are same ignoring volatile differences (path, dates, times, etc), | ||
/// skipping the given number of lines on the output, and finding the corresponding line | ||
/// in the baseline. | ||
/// </summary> | ||
protected bool CheckEqualityNormalizedFromPaths(string desc, string basePath, string outPath, int skip = 0) | ||
{ | ||
Contracts.Assert(IsActive); | ||
Contracts.AssertNonEmpty(basePath); | ||
Contracts.AssertNonEmpty(outPath); | ||
Contracts.Assert(skip >= 0); | ||
|
||
if (!CheckOutFile(outPath)) | ||
return false; | ||
|
||
// Normalize the output file. | ||
Normalize(outPath); | ||
|
||
if (!CheckBaseFile(basePath)) | ||
return false; | ||
|
||
bool res = CheckEqualityFromPathsCore(desc, basePath, outPath, skip); | ||
|
||
// No need to keep the raw (unnormalized) output file. | ||
if (res) | ||
File.Delete(outPath + RawSuffix); | ||
|
||
return res; | ||
} | ||
|
||
private bool FirstIsSuffix<T>(IEnumerator<T> suffix, IEnumerator<T> seq, Func<T, T, bool> equalFunc = null) | ||
{ | ||
Contracts.AssertValue(suffix); | ||
|
@@ -464,11 +414,6 @@ private bool FirstIsSuffix<T>(IEnumerator<T> suffix, IEnumerator<T> seq, Func<T, | |
return true; | ||
} | ||
|
||
private IEnumerator<string> LineEnumerator(TextReader reader) | ||
{ | ||
return LineEnumerator(reader, x => false); | ||
} | ||
|
||
private IEnumerator<string> LineEnumerator(TextReader reader, Func<string, bool> stop) | ||
{ | ||
string result; | ||
|
@@ -496,41 +441,6 @@ protected bool CheckOutputIsSuffix(string basePath, string outPath, int skip = 0 | |
} | ||
} | ||
|
||
/// <summary> | ||
/// Check whether two files are same ignoring volatile differences (path, dates, times, etc), | ||
/// skipping the given number of lines on the output, and finding the corresponding line | ||
/// in the baseline. | ||
/// </summary> | ||
protected bool CheckEqualityNormalized(string dir, string name, string suffix, int skip, int digitsOfPrecision = DigitsOfPrecision) | ||
{ | ||
Contracts.Assert(IsActive); | ||
Contracts.AssertValue(dir); // Can be empty. | ||
Contracts.AssertNonEmpty(name); | ||
Contracts.AssertNonEmpty(suffix); | ||
Contracts.Assert(skip >= 0); | ||
|
||
string relPath = Path.Combine(dir, name + suffix); | ||
string basePath = GetBaselinePath(dir, name); | ||
string outPath = GetOutputPath(dir, name + suffix); | ||
|
||
if (!CheckOutFile(outPath)) | ||
return false; | ||
|
||
// Normalize the output file. | ||
Normalize(outPath); | ||
|
||
if (!CheckBaseFile(basePath)) | ||
return false; | ||
|
||
bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, skip, digitsOfPrecision); | ||
|
||
// No need to keep the raw (unnormalized) output file. | ||
if (res) | ||
File.Delete(outPath + RawSuffix); | ||
|
||
return res; | ||
} | ||
|
||
protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, int digitsOfPrecision = DigitsOfPrecision) | ||
{ | ||
Contracts.Assert(skip >= 0); | ||
|
@@ -838,7 +748,7 @@ protected static StreamReader OpenReader(string path) | |
{ | ||
Contracts.CheckNonWhiteSpace(path, nameof(path)); | ||
#if CORECLR | ||
return new StreamReader(File.Open(path, FileMode.Open, FileAccess.Read)); | ||
return new StreamReader(File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Read)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI - this is a nicety when debugging tests and trying to use Beyond Compare (or similar) to diff the output and baseline files. As currently implemented, it is impossible to open these files in 2 processes at the same time. #Resolved |
||
#else | ||
return new StreamReader(path); | ||
#endif | ||
|
@@ -857,16 +767,6 @@ protected static int MainForTest(string args) | |
return result; | ||
} | ||
} | ||
|
||
protected static string GetEnvironmentVariable(string name) | ||
{ | ||
return Environment.GetEnvironmentVariable(name, EnvironmentVariableTarget.Process); | ||
} | ||
|
||
protected static void SetEnvironmentVariable(string name, string value) | ||
{ | ||
Environment.SetEnvironmentVariable(name, value, EnvironmentVariableTarget.Process); | ||
} | ||
} | ||
|
||
public partial class TestBaselineNormalize : BaseTestBaseline | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,14 @@ The dataset is under a CC-by 4.0 license. | |
} | ||
``` | ||
|
||
### Boston Housing Data | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a more info link? I also added a link to our copy of the dataset so folks know what we are referencing.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. #Closed |
||
|
||
Redistributing the dataset "[housing.txt](housing.txt)" with attribution: | ||
|
||
> Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978. | ||
|
||
More information: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.names | ||
|
||
# Images | ||
|
||
### Located in `images` folder | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @yaeldekel to check whether this conflicts with her work. #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for thinking of that work, @sfilipi.
If there are things that are only used in the internal repo, we should move them to only be in the internal repo. It isn't sustainable to have a bunch of methods in dotnet/machinelearning that are not used.
@yaeldekel - if any of these are necessary for your work, can you add them to a
partial
class in the internal repo? #Resolved