-
Notifications
You must be signed in to change notification settings - Fork 1.9k
switch housing dataset to wine #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Directory.Build.props))\Directory.Build.props" /> | ||
|
||
<Import Project="$(ToolsDir)VersionTools.targets" Condition="Exists('$(ToolsDir)VersionTools.targets')" /> | ||
|
||
<UsingTask TaskName="DownloadFilesFromUrl" AssemblyFile="$(ToolsDir)Microsoft.DotNet.Build.Tasks.dll"/> | ||
<PropertyGroup> | ||
<!-- To disable the restoration of packages, set RestoreDuringBuild=false or pass /p:RestoreDuringBuild=false.--> | ||
<RestoreDuringBuild Condition="'$(RestoreDuringBuild)'==''">true</RestoreDuringBuild> | ||
|
@@ -33,6 +33,7 @@ | |
RestoreProjects; | ||
BuildNative; | ||
$(TraversalBuildDependsOn); | ||
DownloadExternalTestFiles; | ||
RunTests; | ||
</TraversalBuildDependsOn> | ||
</PropertyGroup> | ||
|
@@ -62,7 +63,18 @@ | |
<MSBuild Projects="@(PkgProject)" | ||
Targets="Pack" /> | ||
</Target> | ||
|
||
|
||
<ItemGroup> | ||
<TestFile Include="d" Url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"/> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (minor) Can you make the "Include" a better name? <TestFile Include="winequality" Url="[https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv](https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv)"/> |
||
</ItemGroup> | ||
|
||
<Target Name="DownloadExternalTestFiles" Condition="'$(RunTests)'=='true'"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should always do this, not just when However, we should also have "Inputs and Outputs" hooked up, so that way the target is skipped if the files already exist. check out https://msdn.microsoft.com/en-us/library/ms171483.aspx for how to make incremental builds work. Let me know if you need help. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any chance you can help me with that?
It keeps download files and i'm not sure how to debug it properly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eerhardt Can you help me? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way Inputs and Outputs work is based on file timestamps. You need an actual Input file, so MSBuild can compare its last write time to the output file's last write time. Here when you specify:
It uses the Another option is to check if the local file already exists, and if it does, then don't do the download. The above will download any URL in the The item is "Condition"d on whether the file exists or not:
So if the |
||
<Message Importance="High" Text="Downloading external test files..." /> | ||
<DownloadFilesFromUrl Items="@(TestFile)" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reasonable way to check the file hash? At some point the UCI site will be broken, and/or we will receive (perhaps) a partial file. As we expand on the use of this, there'll be additional ways for it to fail. |
||
DestinationDir="test/data/external" | ||
TreatErrorsAsWarnings="true"/> | ||
</Target> | ||
|
||
<Target Name="RunTests" Condition="'$(RunTests)'=='true'"> | ||
<MSBuild Projects="test\run-tests.proj" | ||
Targets="RunTests" /> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -729,7 +729,7 @@ public void EntryPointTextToKeyToText() | |
} | ||
|
||
private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath, | ||
string instanceMetricsPath, string confusionMatrixPath = null) | ||
string instanceMetricsPath, string confusionMatrixPath = null, string loader = null) | ||
{ | ||
string inputGraph = string.Format(@" | ||
{{ | ||
|
@@ -738,6 +738,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data | |
'Name': 'Data.TextLoader', | ||
'Inputs': {{ | ||
'InputFile': '$file' | ||
{8} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this already existed before you change, but what do you think about using string interpolation here? I think it would make this so much more readable: 'Inputs': {{
'InputFile': '$file'
{string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader)} |
||
}}, | ||
'Outputs': {{ | ||
'Data': '$AllData' | ||
|
@@ -797,7 +798,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data | |
}} | ||
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath), | ||
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "", | ||
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : ""); | ||
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "", | ||
string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader)); | ||
|
||
var jsonPath = DeleteOutputPath("graph.json"); | ||
File.WriteAllLines(jsonPath, new[] { inputGraph }); | ||
|
@@ -855,15 +857,16 @@ public void EntryPointEvaluateMultiClass() | |
Assert.Equal(3, CountRows(loader)); | ||
} | ||
|
||
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] | ||
[Fact()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) no need for the parens. You can remove |
||
public void EntryPointEvaluateRegression() | ||
{ | ||
var dataPath = GetDataPath("housing.txt"); | ||
var dataPath = GetDataPath(@"external/winequality-white.csv"); | ||
var warningsPath = DeleteOutputPath("warnings.idv"); | ||
var overallMetricsPath = DeleteOutputPath("overall.idv"); | ||
var instanceMetricsPath = DeleteOutputPath("instance.idv"); | ||
|
||
RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath); | ||
RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", | ||
dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); | ||
|
||
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath)) | ||
Assert.Equal(0, CountRows(loader)); | ||
|
@@ -872,7 +875,7 @@ public void EntryPointEvaluateRegression() | |
Assert.Equal(1, CountRows(loader)); | ||
|
||
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath)) | ||
Assert.Equal(104, CountRows(loader)); | ||
Assert.Equal(975, CountRows(loader)); | ||
} | ||
|
||
[Fact] | ||
|
@@ -887,10 +890,10 @@ public void EntryPointSDCAMultiClass() | |
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier"); | ||
} | ||
|
||
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] | ||
[Fact()] | ||
public void EntryPointSDCARegression() | ||
{ | ||
TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor"); | ||
TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.StochasticDualCoordinateAscentRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); | ||
} | ||
|
||
[Fact] | ||
|
@@ -961,10 +964,10 @@ public void EntryPointHogwildSGD() | |
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier"); | ||
} | ||
|
||
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] | ||
[Fact()] | ||
public void EntryPointPoissonRegression() | ||
{ | ||
TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor"); | ||
TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.PoissonRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); | ||
} | ||
|
||
[Fact] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will this thing download a file on every build or only if the file is not present already?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm trying to make it download file only once, but struggle for right now, see Eric comment and my reply.