diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs index 58bbb730b2..6b6722cf5d 100644 --- a/test/Microsoft.ML.Functional.Tests/Common.cs +++ b/test/Microsoft.ML.Functional.Tests/Common.cs @@ -83,14 +83,14 @@ public static void AssertTestTypeDatasetsAreEqual(MLContext mlContext, IDataView /// /// An array of floats. /// An array of floats. - public static void AssertEqual(float[] array1, float[] array2) + public static void AssertEqual(float[] array1, float[] array2, int precision = 6) { Assert.NotNull(array1); Assert.NotNull(array2); Assert.Equal(array1.Length, array2.Length); for (int i = 0; i < array1.Length; i++) - Assert.Equal(array1[i], array2[i]); + Assert.Equal(array1[i], array2[i], precision: precision); } /// diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs new file mode 100644 index 0000000000..348d2563f9 --- /dev/null +++ b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Functional.Tests.Datasets +{ + /// + /// A class to hold a feature column. + /// + internal sealed class FeatureColumn + { + public float[] Features { get; set; } + } + + /// + /// A class to hold the output of FeatureContributionCalculator. + /// + internal sealed class FeatureContributionOutput + { + public float[] FeatureContributions { get; set; } + } + + /// + /// A class to hold the Score column. + /// + internal sealed class ScoreColumn + { + public float Score { get; set; } + } + + /// + /// A class to hold a vector Score column. + /// + internal sealed class VectorScoreColumn + { + public float[] Score { get; set; } + } +} diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs b/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs deleted file mode 100644 index 090ad23646..0000000000 --- a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs +++ /dev/null @@ -1,14 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Functional.Tests.Datasets -{ - /// - /// A class to hold a feature column. - /// - internal sealed class FeatureColumn - { - public float[] Features { get; set; } - } -} diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs b/test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs deleted file mode 100644 index 6aa8dcbb11..0000000000 --- a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - - -namespace Microsoft.ML.Functional.Tests.Datasets -{ - /// - /// A class to hold the output of FeatureContributionCalculator - /// - internal sealed class FeatureContributionOutput - { - public float[] FeatureContributions { get; set; } - } -} diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs new file mode 100644 index 0000000000..3ece5658b8 --- /dev/null +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -0,0 +1,179 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using Microsoft.ML.Functional.Tests.Datasets; +using Microsoft.ML.RunTests; +using Microsoft.ML.TestFramework; +using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Functional.Tests +{ + public class ONNX : BaseTestClass + { + public ONNX(ITestOutputHelper output) : base(output) + { + } + + /// + /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline. + /// + [OnnxFactAttribute] + public void SaveOnnxModelLoadAndScoreFastTree() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Regression.Trainers.FastTree( + new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Serialize the pipeline to a file. + var modelFileName = "SaveOnnxLoadAndScoreFastTreeModel.onnx"; + var modelPath = DeleteOutputPath(modelFileName); + using (var file = File.Create(modelPath)) + mlContext.Model.ConvertToOnnx(model, data, file); + + // Load the model as a transform. + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath); + var onnxModel = onnxEstimator.Fit(data); + + // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now. + // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this. + var onnxWorkaroundPipeline = onnxModel.Append( + mlContext.Transforms.CopyColumns("Score", "Score0").Fit(onnxModel.Transform(data))); + + // Create prediction engine and test predictions. + var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); + // TODO #2982: ONNX produces vector types and not the original output type. + var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxWorkaroundPipeline); + + // Take a handful of examples out of the dataset and compute predictions. + var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); + foreach (var row in dataEnumerator) + { + var originalPrediction = originalPredictionEngine.Predict(row); + var onnxPrediction = onnxPredictionEngine.Predict(row); + // Check that the predictions are identical. + Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], precision: 4); // Note the low-precision equality! + } + } + + /// + /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline. + /// + [OnnxFactAttribute] + public void SaveOnnxModelLoadAndScoreKMeans() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Clustering.Trainers.KMeans( + new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Serialize the pipeline to a file. + var modelFileName = "SaveOnnxLoadAndScoreKMeansModel.onnx"; + var modelPath = DeleteOutputPath(modelFileName); + using (var file = File.Create(modelPath)) + mlContext.Model.ConvertToOnnx(model, data, file); + + // Load the model as a transform. + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath); + var onnxModel = onnxEstimator.Fit(data); + + // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now. + // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this. + var onnxWorkaroundPipeline = onnxModel.Append( + mlContext.Transforms.CopyColumns("Score", "Score0").Fit(onnxModel.Transform(data))); + + // Create prediction engine and test predictions. + var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); + // TODO #2982: ONNX produces vector types and not the original output type. + var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxWorkaroundPipeline); + + // Take a handful of examples out of the dataset and compute predictions. + var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); + foreach (var row in dataEnumerator) + { + var originalPrediction = originalPredictionEngine.Predict(row); + var onnxPrediction = onnxPredictionEngine.Predict(row); + // Check that the predictions are identical. + Common.AssertEqual(originalPrediction.Score, onnxPrediction.Score, precision: 4); // Note the low precision! + } + } + + /// + /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline. + /// + [OnnxFactAttribute] + public void SaveOnnxModelLoadAndScoreSDCA() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Regression.Trainers.Sdca( + new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Serialize the pipeline to a file. + var modelFileName = "SaveOnnxLoadAndScoreSdcaModel.onnx"; + var modelPath = DeleteOutputPath(modelFileName); + using (var file = File.Create(modelPath)) + mlContext.Model.ConvertToOnnx(model, data, file); + + // Load the model as a transform. + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath); + var onnxModel = onnxEstimator.Fit(data); + + // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now. + // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this. + var onnxWorkaroundPipeline = onnxModel.Append( + mlContext.Transforms.CopyColumns("Score", "Score0").Fit(onnxModel.Transform(data))); + + // Create prediction engine and test predictions. + var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); + // TODO #2982: ONNX produces vector types and not the original output type. + var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxWorkaroundPipeline); + + // Take a handful of examples out of the dataset and compute predictions. + var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); + foreach (var row in dataEnumerator) + { + var originalPrediction = originalPredictionEngine.Predict(row); + var onnxPrediction = onnxPredictionEngine.Predict(row); + // Check that the predictions are identical. + Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], precision: 4); // Note the low-precision equality! + } + } + } +}