Skip to content

Commit 7dfadca

Browse files
authored
Add a project for functional tests without visibility into internals of ML.NET (#2470)
Adding a project for functional tests without internal access to the ML.NET Library.
1 parent 6526a01 commit 7dfadca

File tree

12 files changed

+226
-88
lines changed

12 files changed

+226
-88
lines changed

Microsoft.ML.sln

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TestFramework"
3333
EndProject
3434
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Predictor.Tests", "test\Microsoft.ML.Predictor.Tests\Microsoft.ML.Predictor.Tests.csproj", "{6B047E09-39C9-4583-96F3-685D84CA4117}"
3535
EndProject
36+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Functional.Tests", "test\Microsoft.ML.Functional.Tests\Microsoft.ML.Functional.Tests.csproj", "{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}"
37+
EndProject
3638
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.ResultProcessor", "src\Microsoft.ML.ResultProcessor\Microsoft.ML.ResultProcessor.csproj", "{3769FCC3-9AFF-4C37-97E9-6854324681DF}"
3739
EndProject
3840
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.FastTree", "src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj", "{B7B593C5-FB8C-4ADA-A638-5B53B47D087E}"
@@ -928,6 +930,18 @@ Global
928930
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
929931
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
930932
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
933+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
934+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
935+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
936+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
937+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
938+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
939+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
940+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.Build.0 = Release|Any CPU
941+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
942+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
943+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
944+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
931945
EndGlobalSection
932946
GlobalSection(SolutionProperties) = preSolution
933947
HideSolutionNode = FALSE
@@ -1011,6 +1025,7 @@ Global
10111025
{85D0CAFD-2FE8-496A-88C7-585D35B94243} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
10121026
{31D38B21-102B-41C0-9E0A-2FE0BF68D123} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
10131027
{5E920CAC-5A28-42FB-936E-49C472130953} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
1028+
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
10141029
EndGlobalSection
10151030
GlobalSection(ExtensibilityGlobals) = postSolution
10161031
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}

build/Dependencies.props

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
<PropertyGroup>
4444
<BenchmarkDotNetVersion>0.11.3</BenchmarkDotNetVersion>
4545
<MicrosoftMLTestModelsPackageVersion>0.0.3-test</MicrosoftMLTestModelsPackageVersion>
46+
<MicrosoftMLTensorFlowTestModelsVersion>0.0.7-test</MicrosoftMLTensorFlowTestModelsVersion>
47+
<MicrosoftMLOnnxTestModelsVersion>0.0.4-test</MicrosoftMLOnnxTestModelsVersion>
4648
</PropertyGroup>
4749

4850
</Project>

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@ public static class DatasetUtils
1717
/// Downloads the housing dataset from the ML.NET repo.
1818
/// </summary>
1919
public static string DownloadHousingRegressionDataset()
20-
=> Download("https://github.com/raw/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", "housing.txt");
20+
{
21+
var fileName = "housing.txt";
22+
if (!File.Exists(fileName))
23+
Download("https://github.com/raw/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", fileName);
24+
return fileName;
25+
}
2126

2227
public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
2328
{
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.Data.DataView;
6+
using Microsoft.ML.Data;
7+
using Microsoft.ML.SamplesUtils;
8+
using Microsoft.ML.Trainers.HalLearners;
9+
using Xunit;
10+
11+
namespace Microsoft.ML.Functional.Tests
12+
{
13+
internal static class Common
14+
{
15+
public static void CheckMetrics(RegressionMetrics metrics)
16+
{
17+
// Perform sanity checks on the metrics
18+
Assert.True(metrics.Rms >= 0);
19+
Assert.True(metrics.L1 >= 0);
20+
Assert.True(metrics.L2 >= 0);
21+
Assert.True(metrics.RSquared <= 1);
22+
}
23+
}
24+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<!-- We are turning off strong naming to ensure we never add `InternalsVisibleTo` for these tests -->
5+
<SignAssembly>false</SignAssembly>
6+
<PublicSign>false</PublicSign>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<ProjectReference Include="..\..\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
11+
<ProjectReference Include="..\..\src\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
12+
<ProjectReference Include="..\..\src\Microsoft.ML.EntryPoints\Microsoft.ML.EntryPoints.csproj" />
13+
<ProjectReference Include="..\..\src\Microsoft.ML.HalLearners\Microsoft.ML.HalLearners.csproj" />
14+
<ProjectReference Include="..\..\src\Microsoft.ML.ImageAnalytics\Microsoft.ML.ImageAnalytics.csproj" />
15+
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
16+
<ProjectReference Include="..\..\src\Microsoft.ML.SamplesUtils\Microsoft.ML.SamplesUtils.csproj" />
17+
<ProjectReference Include="..\..\src\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
18+
<ProjectReference Include="..\..\src\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />
19+
<ProjectReference Include="..\..\src\Microsoft.ML.OnnxTransform\Microsoft.ML.OnnxTransform.csproj" />
20+
<ProjectReference Include="..\..\src\Microsoft.ML.PCA\Microsoft.ML.PCA.csproj" />
21+
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
22+
<ProjectReference Include="..\..\src\Microsoft.ML.Recommender\Microsoft.ML.Recommender.csproj" />
23+
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
24+
<ProjectReference Include="..\..\src\Microsoft.ML.Onnx\Microsoft.ML.Onnx.csproj" />
25+
<ProjectReference Include="..\..\src\Microsoft.ML.StaticPipe\Microsoft.ML.StaticPipe.csproj" />
26+
<ProjectReference Include="..\..\src\Microsoft.ML.TensorFlow.StaticPipe\Microsoft.ML.TensorFlow.StaticPipe.csproj" />
27+
<ProjectReference Include="..\..\src\Microsoft.ML.TensorFlow\Microsoft.ML.TensorFlow.csproj" />
28+
<ProjectReference Include="..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
29+
<ProjectReference Include="..\Microsoft.ML.TestFramework\Microsoft.ML.TestFramework.csproj" />
30+
</ItemGroup>
31+
32+
<ItemGroup>
33+
<NativeAssemblyReference Include="CpuMathNative" />
34+
<NativeAssemblyReference Include="FastTreeNative" />
35+
<NativeAssemblyReference Include="FactorizationMachineNative" />
36+
<NativeAssemblyReference Include="MatrixFactorizationNative" />
37+
<NativeAssemblyReference Include="LdaNative" />
38+
<NativeAssemblyReference Include="SymSgdNative" />
39+
<NativeAssemblyReference Include="MklProxyNative" />
40+
<NativeAssemblyReference Include="MklImports" />
41+
</ItemGroup>
42+
43+
<!-- TensorFlow is 64-bit only -->
44+
<ItemGroup Condition="'$(NativeTargetArchitecture)' == 'x64'">
45+
<NativeAssemblyReference Include="tensorflow" />
46+
<NativeAssemblyReference Condition="'$(OS)' != 'Windows_NT'" Include="tensorflow_framework" />
47+
</ItemGroup>
48+
<ItemGroup>
49+
<PackageReference Include="Microsoft.ML.TensorFlow.TestModels" Version="$(MicrosoftMLTensorFlowTestModelsVersion)" />
50+
<PackageReference Include="Microsoft.ML.Onnx.TestModels" Version="$(MicrosoftMLOnnxTestModelsVersion)" />
51+
</ItemGroup>
52+
</Project>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.ML.RunTests;
6+
using Microsoft.ML.TestFramework;
7+
using Xunit;
8+
9+
namespace Microsoft.ML.Functional.Tests
10+
{
11+
public class PredictionScenarios
12+
{
13+
/// <summary>
14+
/// Reconfigurable predictions: The following should be possible: A user trains a binary classifier,
15+
/// and through the test evaluator gets a PR curve, the based on the PR curve picks a new threshold
16+
/// and configures the scorer (or more precisely instantiates a new scorer over the same model parameters)
17+
/// with some threshold derived from that.
18+
/// </summary>
19+
[Fact]
20+
public void ReconfigurablePrediction()
21+
{
22+
var mlContext = new MLContext(seed: 789);
23+
24+
// Get the dataset, create a train and test
25+
var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
26+
.Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
27+
(var train, var test) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
28+
29+
// Create a pipeline to train on the housing data
30+
var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
31+
"CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
32+
"PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
33+
.Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
34+
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares());
35+
36+
var model = pipeline.Fit(train);
37+
38+
var scoredTest = model.Transform(test);
39+
var metrics = mlContext.Regression.Evaluate(scoredTest);
40+
41+
Common.CheckMetrics(metrics);
42+
43+
// Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
44+
// This is no longer possible in the API
45+
//var newModel = new BinaryPredictionTransformer<IPredictorProducing<float>>(ml, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
46+
//var newScoredTest = newModel.Transform(pipeline.Transform(testData));
47+
//var newMetrics = mlContext.BinaryClassification.Evaluate(scoredTest);
48+
// And the Threshold and ThresholdColumn properties are not settable.
49+
//var predictor = model.LastTransformer;
50+
//predictor.Threshold = 0.01; // Not possible
51+
}
52+
}
53+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.Data.DataView;
6+
using Microsoft.ML.Data;
7+
using Microsoft.ML.RunTests;
8+
using Microsoft.ML.TestFramework;
9+
using Microsoft.ML.Trainers.HalLearners;
10+
using Xunit;
11+
12+
namespace Microsoft.ML.Functional.Tests
13+
{
14+
public class ValidationScenarios
15+
{
16+
/// <summary>
17+
/// Cross-validation: Have a mechanism to do cross validation, that is, you come up with
18+
/// a data source (optionally with stratification column), come up with an instantiable transform
19+
/// and trainer pipeline, and it will handle (1) splitting up the data, (2) training the separate
20+
/// pipelines on in-fold data, (3) scoring on the out-fold data, (4) returning the set of
21+
/// metrics, trained pipelines, and scored test data for each fold.
22+
/// </summary>
23+
[Fact]
24+
void CrossValidation()
25+
{
26+
var mlContext = new MLContext(seed: 789);
27+
28+
// Get the dataset
29+
var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
30+
.Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
31+
32+
// Create a pipeline to train on the sentiment data
33+
var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
34+
"CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
35+
"PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
36+
.Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
37+
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares());
38+
39+
// Compute the CV result
40+
var cvResult = mlContext.Regression.CrossValidate(data, pipeline, numFolds: 5);
41+
42+
// Check that the results are valid
43+
Assert.IsType<RegressionMetrics>(cvResult[0].metrics);
44+
Assert.IsType<TransformerChain<RegressionPredictionTransformer<OlsLinearRegressionModelParameters>>>(cvResult[0].model);
45+
Assert.True(cvResult[0].scoredTestData is IDataView);
46+
Assert.Equal(5, cvResult.Length);
47+
48+
// And validate the metrics
49+
foreach (var result in cvResult)
50+
Common.CheckMetrics(result.metrics);
51+
}
52+
}
53+
}

test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
<ProjectReference Include="..\Microsoft.ML.TestFramework\Microsoft.ML.TestFramework.csproj" />
1212
</ItemGroup>
1313
<ItemGroup>
14-
<PackageReference Include="Microsoft.ML.Onnx.TestModels" Version="0.0.4-test" />
14+
<PackageReference Include="Microsoft.ML.Onnx.TestModels" Version="$(MicrosoftMLOnnxTestModelsVersion)" />
1515
</ItemGroup>
1616

1717
<ItemGroup>

test/Microsoft.ML.TestFramework/Datasets.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,24 @@ public static class TestDatasets
158158
name = "housing",
159159
trainFilename = "housing.txt",
160160
testFilename = "housing.txt",
161-
loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}"
161+
loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}",
162+
GetLoaderColumns = () =>
163+
{
164+
return new[] {
165+
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
166+
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
167+
new TextLoader.Column("PercentResidental", DataKind.R4, 2),
168+
new TextLoader.Column("PercentNonRetail", DataKind.R4, 3),
169+
new TextLoader.Column("CharlesRiver", DataKind.R4, 4),
170+
new TextLoader.Column("NitricOxides", DataKind.R4, 5),
171+
new TextLoader.Column("RoomsPerDwelling", DataKind.R4, 6),
172+
new TextLoader.Column("PercentPre40s", DataKind.R4, 7),
173+
new TextLoader.Column("EmploymentDistance", DataKind.R4, 8),
174+
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
175+
new TextLoader.Column("TaxRate", DataKind.R4, 10),
176+
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
177+
};
178+
}
162179
};
163180

164181
public static TestDataset generatedRegressionDatasetmacro = new TestDataset

test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
<NativeAssemblyReference Condition="'$(OS)' != 'Windows_NT'" Include="tensorflow_framework" />
4747
</ItemGroup>
4848
<ItemGroup>
49-
<PackageReference Include="Microsoft.ML.TensorFlow.TestModels" Version="0.0.7-test" />
50-
<PackageReference Include="Microsoft.ML.Onnx.TestModels" Version="0.0.2-test" />
49+
<PackageReference Include="Microsoft.ML.TensorFlow.TestModels" Version="$(MicrosoftMLTensorFlowTestModelsVersion)" />
50+
<PackageReference Include="Microsoft.ML.Onnx.TestModels" Version="$(MicrosoftMLOnnxTestModelsVersion)" />
5151
</ItemGroup>
5252
</Project>

test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs

Lines changed: 0 additions & 36 deletions
This file was deleted.

test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs

Lines changed: 0 additions & 47 deletions
This file was deleted.

0 commit comments

Comments
 (0)