diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs index 348d2563f9..8e4f45a71b 100644 --- a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs +++ b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs @@ -13,7 +13,7 @@ internal sealed class FeatureColumn } /// - /// A class to hold the output of FeatureContributionCalculator. + /// A class to hold the output of FeatureContributionCalculator /// internal sealed class FeatureContributionOutput { @@ -21,7 +21,7 @@ internal sealed class FeatureContributionOutput } /// - /// A class to hold the Score column. + /// A class to hold a score column. /// internal sealed class ScoreColumn { @@ -29,7 +29,7 @@ internal sealed class ScoreColumn } /// - /// A class to hold a vector Score column. + /// A class to hold a vector score column. /// internal sealed class VectorScoreColumn { diff --git a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs similarity index 78% rename from test/Microsoft.ML.Functional.Tests/ModelLoading.cs rename to test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 4810ad2a09..e1fbe98749 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -4,9 +4,11 @@ using System; using System.IO; +using System.IO.Compression; using System.Linq; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers.FastTree; using Microsoft.ML.Transforms; @@ -15,9 +17,9 @@ namespace Microsoft.ML.Functional.Tests { - public partial class ModelLoadingTests : TestDataPipeBase + public partial class ModelFiles : TestDataPipeBase { - public ModelLoadingTests(ITestOutputHelper output) : base(output) + public ModelFiles(ITestOutputHelper output) : base(output) { } @@ -30,6 +32,101 @@ private class InputData public float[] Features { get; set; } } + /// + /// Model Files: The (minimum) nuget version can be found in the model file. + /// + [Fact] + public void DetermineNugetVersionFromModel() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Regression.Trainers.FastTree( + new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Save model to a file. + var modelPath = DeleteOutputPath("determineNugetVersionFromModel.zip"); + mlContext.Model.Save(model, data.Schema, modelPath); + + // Check that the version can be extracted from the model. + var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + "Version.txt"; + using (ZipArchive archive = ZipFile.OpenRead(modelPath)) + { + // The version of the entire model is kept in the version file. + var versionPath = archive.Entries.First(x => x.FullName == versionFileName); + Assert.NotNull(versionPath); + using (var stream = versionPath.Open()) + using (var reader = new StreamReader(stream)) + { + // The only line in the file is the version of the model. + var line = reader.ReadLine(); + Assert.Equal(@"1.0.0.0", line); + } + } + } + + /// + /// Model Files: Save a model, including all transforms, then load and make predictions. + /// + /// + /// Serves two scenarios: + /// 1. I can train a model and save it to a file, including transforms. + /// 2. Training and prediction happen in different processes (or even different machines). + /// The actual test will not run in different processes, but will simulate the idea that the + /// "communication pipe" is just a serialized model of some form. + /// + [Fact] + public void FitPipelineSaveModelAndPredict() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Regression.Trainers.FastTree( + new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + var modelPath = DeleteOutputPath("fitPipelineSaveModelAndPredict.zip"); + // Save model to a file. + mlContext.Model.Save(model, data.Schema, modelPath); + + // Load model from a file. + ITransformer serializedModel; + using (var file = File.OpenRead(modelPath)) + { + serializedModel = mlContext.Model.Load(file, out var serializedSchema); + CheckSameSchemas(data.Schema, serializedSchema); + } + + // Create prediction engine and test predictions. + var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); + var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel); + + // Take a handful of examples out of the dataset and compute predictions. + var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); + foreach (var row in dataEnumerator) + { + var originalPrediction = originalPredictionEngine.Predict(row); + var serializedPrediction = serializedPredictionEngine.Predict(row); + // Check that the predictions are identical. + Assert.Equal(originalPrediction.Score, serializedPrediction.Score); + } + + Done(); + } + [Fact] public void LoadModelAndExtractPredictor() { diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs deleted file mode 100644 index faea0c9f70..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using System.Linq; -using Microsoft.ML.RunTests; -using Microsoft.ML.Trainers; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.Api -{ - public partial class ApiScenariosTests - { - /// - /// Train, save/load model, predict: - /// Serve the scenario where training and prediction happen in different processes (or even different machines). - /// The actual test will not run in different processes, but will simulate the idea that the - /// "communication pipe" is just a serialized model of some form. - /// - [Fact] - public void TrainSaveModelAndPredict() - { - var ml = new MLContext(seed: 1); - var data = ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); - - // Pipeline. - var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText") - .AppendCacheCheckpoint(ml) - .Append(ml.BinaryClassification.Trainers.SdcaNonCalibrated( - new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 })); - - // Train. - var model = pipeline.Fit(data); - - var modelPath = GetOutputPath("temp.zip"); - // Save model. - ml.Model.Save(model, data.Schema, modelPath); - - // Load model. - var loadedModel = ml.Model.Load(modelPath, out var inputSchema); - - // Create prediction engine and test predictions. - var engine = ml.Model.CreatePredictionEngine(loadedModel, inputSchema); - - // Take a couple examples out of the test data and run predictions on top. - var testData = ml.Data.CreateEnumerable( - ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true), false); - foreach (var input in testData.Take(5)) - { - var prediction = engine.Predict(input); - // Verify that predictions match and scores are separated from zero. - Assert.Equal(input.Sentiment, prediction.Sentiment); - Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); - } - } - } -}