diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs
index 348d2563f9..8e4f45a71b 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs
@@ -13,7 +13,7 @@ internal sealed class FeatureColumn
}
///
- /// A class to hold the output of FeatureContributionCalculator.
+ /// A class to hold the output of FeatureContributionCalculator
///
internal sealed class FeatureContributionOutput
{
@@ -21,7 +21,7 @@ internal sealed class FeatureContributionOutput
}
///
- /// A class to hold the Score column.
+ /// A class to hold a score column.
///
internal sealed class ScoreColumn
{
@@ -29,7 +29,7 @@ internal sealed class ScoreColumn
}
///
- /// A class to hold a vector Score column.
+ /// A class to hold a vector score column.
///
internal sealed class VectorScoreColumn
{
diff --git a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs
similarity index 78%
rename from test/Microsoft.ML.Functional.Tests/ModelLoading.cs
rename to test/Microsoft.ML.Functional.Tests/ModelFiles.cs
index 4810ad2a09..e1fbe98749 100644
--- a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs
+++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs
@@ -4,9 +4,11 @@
using System;
using System.IO;
+using System.IO.Compression;
using System.Linq;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
+using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Transforms;
@@ -15,9 +17,9 @@
namespace Microsoft.ML.Functional.Tests
{
- public partial class ModelLoadingTests : TestDataPipeBase
+ public partial class ModelFiles : TestDataPipeBase
{
- public ModelLoadingTests(ITestOutputHelper output) : base(output)
+ public ModelFiles(ITestOutputHelper output) : base(output)
{
}
@@ -30,6 +32,101 @@ private class InputData
public float[] Features { get; set; }
}
+ ///
+ /// Model Files: The (minimum) nuget version can be found in the model file.
+ ///
+ [Fact]
+ public void DetermineNugetVersionFromModel()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ // Get the dataset.
+ var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true);
+
+ // Create a pipeline to train on the housing data.
+ var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
+ .Append(mlContext.Regression.Trainers.FastTree(
+ new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 }));
+
+ // Fit the pipeline.
+ var model = pipeline.Fit(data);
+
+ // Save model to a file.
+ var modelPath = DeleteOutputPath("determineNugetVersionFromModel.zip");
+ mlContext.Model.Save(model, data.Schema, modelPath);
+
+ // Check that the version can be extracted from the model.
+ var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + "Version.txt";
+ using (ZipArchive archive = ZipFile.OpenRead(modelPath))
+ {
+ // The version of the entire model is kept in the version file.
+ var versionPath = archive.Entries.First(x => x.FullName == versionFileName);
+ Assert.NotNull(versionPath);
+ using (var stream = versionPath.Open())
+ using (var reader = new StreamReader(stream))
+ {
+ // The only line in the file is the version of the model.
+ var line = reader.ReadLine();
+ Assert.Equal(@"1.0.0.0", line);
+ }
+ }
+ }
+
+ ///
+ /// Model Files: Save a model, including all transforms, then load and make predictions.
+ ///
+ ///
+ /// Serves two scenarios:
+ /// 1. I can train a model and save it to a file, including transforms.
+ /// 2. Training and prediction happen in different processes (or even different machines).
+ /// The actual test will not run in different processes, but will simulate the idea that the
+ /// "communication pipe" is just a serialized model of some form.
+ ///
+ [Fact]
+ public void FitPipelineSaveModelAndPredict()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ // Get the dataset.
+ var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true);
+
+ // Create a pipeline to train on the housing data.
+ var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
+ .Append(mlContext.Regression.Trainers.FastTree(
+ new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 }));
+
+ // Fit the pipeline.
+ var model = pipeline.Fit(data);
+
+ var modelPath = DeleteOutputPath("fitPipelineSaveModelAndPredict.zip");
+ // Save model to a file.
+ mlContext.Model.Save(model, data.Schema, modelPath);
+
+ // Load model from a file.
+ ITransformer serializedModel;
+ using (var file = File.OpenRead(modelPath))
+ {
+ serializedModel = mlContext.Model.Load(file, out var serializedSchema);
+ CheckSameSchemas(data.Schema, serializedSchema);
+ }
+
+ // Create prediction engine and test predictions.
+ var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model);
+ var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel);
+
+ // Take a handful of examples out of the dataset and compute predictions.
+ var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false);
+ foreach (var row in dataEnumerator)
+ {
+ var originalPrediction = originalPredictionEngine.Predict(row);
+ var serializedPrediction = serializedPredictionEngine.Predict(row);
+ // Check that the predictions are identical.
+ Assert.Equal(originalPrediction.Score, serializedPrediction.Score);
+ }
+
+ Done();
+ }
+
[Fact]
public void LoadModelAndExtractPredictor()
{
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
deleted file mode 100644
index faea0c9f70..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System.IO;
-using System.Linq;
-using Microsoft.ML.RunTests;
-using Microsoft.ML.Trainers;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
- public partial class ApiScenariosTests
- {
- ///
- /// Train, save/load model, predict:
- /// Serve the scenario where training and prediction happen in different processes (or even different machines).
- /// The actual test will not run in different processes, but will simulate the idea that the
- /// "communication pipe" is just a serialized model of some form.
- ///
- [Fact]
- public void TrainSaveModelAndPredict()
- {
- var ml = new MLContext(seed: 1);
- var data = ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true);
-
- // Pipeline.
- var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
- .AppendCacheCheckpoint(ml)
- .Append(ml.BinaryClassification.Trainers.SdcaNonCalibrated(
- new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
-
- // Train.
- var model = pipeline.Fit(data);
-
- var modelPath = GetOutputPath("temp.zip");
- // Save model.
- ml.Model.Save(model, data.Schema, modelPath);
-
- // Load model.
- var loadedModel = ml.Model.Load(modelPath, out var inputSchema);
-
- // Create prediction engine and test predictions.
- var engine = ml.Model.CreatePredictionEngine(loadedModel, inputSchema);
-
- // Take a couple examples out of the test data and run predictions on top.
- var testData = ml.Data.CreateEnumerable(
- ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true), false);
- foreach (var input in testData.Take(5))
- {
- var prediction = engine.Predict(input);
- // Verify that predictions match and scores are separated from zero.
- Assert.Equal(input.Sentiment, prediction.Sentiment);
- Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
- }
- }
- }
-}