diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportanceLoadFromDisk.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportanceLoadFromDisk.cs new file mode 100644 index 0000000000..d0c41484d4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportanceLoadFromDisk.cs @@ -0,0 +1,106 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Calibrators; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; + +namespace Samples.Dynamic.Trainers.BinaryClassification +{ + public static class PermutationFeatureImportanceLoadFromDisk + { + public static void Example() + { + + var mlContext = new MLContext(seed: 1); + var samples = GenerateData(); + var data = mlContext.Data.LoadFromEnumerable(samples); + + // Create pipeline + var featureColumns = + new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms + .Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression() + ); + + // Create and save model + var model0 = pipeline.Fit(data); + var lt = model0.LastTransformer; + var modelPath = "./model.zip"; + mlContext.Model.Save(model0, data.Schema, modelPath); + + // Load model + var model = mlContext.Model.Load(modelPath, out var schema); + + // Transform the dataset. + var transformedData = model.Transform(data); + + var linearPredictor = (model as TransformerChain).LastTransformer as BinaryPredictionTransformer>; + + // Execute PFI with the linearPredictor + var permutationMetrics = mlContext.BinaryClassification + .PermutationFeatureImportance(linearPredictor, transformedData, + permutationCount: 30); + + // Sort indices according to PFI results + var sortedIndices = permutationMetrics + .Select((metrics, index) => new { index, metrics.AreaUnderRocCurve }) + .OrderByDescending( + feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) + .Select(feature => feature.index); + + Console.WriteLine("Feature\tModel Weight\tChange in AUC" + + "\t95% Confidence in the Mean Change in AUC"); + var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray(); + foreach (int i in sortedIndices) + { + Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}", + featureColumns[i], + linearPredictor.Model.SubModel.Weights[i], // this way we can access the weights inside the submodel + auc[i].Mean, + 1.96 * auc[i].StandardError); + } + + // Expected output: + // Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC + // Feature2 35.15 -0.387 0.002015 + // Feature1 17.94 -0.1514 0.0008963 + } + + private class Data + { + public bool Label { get; set; } + + public float Feature1 { get; set; } + + public float Feature2 { get; set; } + } + + /// Generate Data + private static IEnumerable GenerateData(int nExamples = 10000, + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) + { + var rng = new Random(seed); + for (int i = 0; i < nExamples; i++) + { + var data = new Data + { + Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + }; + + // Create a noisy label. + var value = (float)(bias + weight1 * data.Feature1 + weight2 * + data.Feature2 + rng.NextDouble() - 0.5); + + data.Label = Sigmoid(value) > 0.5; + yield return data; + } + } + + private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x)); + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index c4cc248dc7..44f3d66849 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -8,6 +8,7 @@ using System.Collections.Immutable; using System.IO; using System.Linq; +using System.Reflection; using Microsoft.ML; using Microsoft.ML.Calibrators; using Microsoft.ML.CommandLine; @@ -396,6 +397,7 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] outputNames, string } [BestFriend] + [PredictionTransformerLoadType(typeof(CalibratedModelParametersBase<,>))] internal sealed class ValueMapperCalibratedModelParameters : ValueMapperCalibratedModelParametersBase, ICanSaveModel where TSubModel : class @@ -430,8 +432,8 @@ private static VersionInfo GetVersionInfoBulk() loaderAssemblyName: typeof(ValueMapperCalibratedModelParameters).Assembly.FullName); } - private ValueMapperCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx) - : base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx)) + private ValueMapperCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx, TSubModel predictor, TCalibrator calibrator) + : base(env, RegistrationName, predictor, calibrator) { } @@ -443,7 +445,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL var ver2 = GetVersionInfoBulk(); var ver = ctx.Header.ModelSignature == ver2.ModelSignature ? ver2 : ver1; ctx.CheckAtModel(ver); - return new ValueMapperCalibratedModelParameters(env, ctx); + + // Load first the predictor and calibrator + var predictor = GetPredictor(env, ctx); + var calibrator = GetCalibrator(env, ctx); + + // Create a generic type using the correct parameter types of predictor and calibrator + Type genericType = typeof(ValueMapperCalibratedModelParameters<,>); + var genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType); + + return (CalibratedModelParametersBase)genericInstance; } void ICanSaveModel.Save(ModelSaveContext ctx) @@ -456,6 +467,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx) } [BestFriend] + [PredictionTransformerLoadType(typeof(CalibratedModelParametersBase<,>))] internal sealed class FeatureWeightsCalibratedModelParameters : ValueMapperCalibratedModelParametersBase, IPredictorWithFeatureWeights, @@ -487,8 +499,9 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(FeatureWeightsCalibratedModelParameters).Assembly.FullName); } - private FeatureWeightsCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx) - : base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx)) + private FeatureWeightsCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx, + TSubModel predictor, TCalibrator calibrator) + : base(env, RegistrationName, predictor, calibrator) { Host.Check(SubModel is IPredictorWithFeatureWeights, "Predictor does not implement " + nameof(IPredictorWithFeatureWeights)); _featureWeights = (IPredictorWithFeatureWeights)SubModel; @@ -499,7 +512,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); - return new FeatureWeightsCalibratedModelParameters(env, ctx); + + // Load first the predictor and calibrator + var predictor = GetPredictor(env, ctx); + var calibrator = GetCalibrator(env, ctx); + + // Create a generic type using the correct parameter types of predictor and calibrator + Type genericType = typeof(FeatureWeightsCalibratedModelParameters<,>); + var genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType); + + return (CalibratedModelParametersBase) genericInstance; } void ICanSaveModel.Save(ModelSaveContext ctx) @@ -520,6 +542,7 @@ public void GetFeatureWeights(ref VBuffer weights) /// Encapsulates a predictor and a calibrator that implement . /// Its implementation of combines both the predictors and the calibrators. /// + [PredictionTransformerLoadType(typeof(CalibratedModelParametersBase <,>))] internal sealed class ParameterMixingCalibratedModelParameters : ValueMapperCalibratedModelParametersBase, IParameterMixer, @@ -553,8 +576,8 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(ParameterMixingCalibratedModelParameters).Assembly.FullName); } - private ParameterMixingCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx) - : base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx)) + private ParameterMixingCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx, TSubModel predictor, TCalibrator calibrator) + : base(env, RegistrationName, predictor, calibrator) { Host.Check(SubModel is IParameterMixer, "Predictor does not implement " + nameof(IParameterMixer)); Host.Check(SubModel is IPredictorWithFeatureWeights, "Predictor does not implement " + nameof(IPredictorWithFeatureWeights)); @@ -566,7 +589,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); - return new ParameterMixingCalibratedModelParameters(env, ctx); + + // Load first the predictor and calibrator + var predictor = GetPredictor(env, ctx); + var calibrator = GetCalibrator(env, ctx); + + // Create a generic type using the correct parameter types of predictor and calibrator + Type genericType = typeof(ParameterMixingCalibratedModelParameters<,>); + object genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType); + + return (CalibratedModelParametersBase) genericInstance; } void ICanSaveModel.Save(ModelSaveContext ctx) @@ -777,6 +809,28 @@ ValueMapper> IFeatureContributionMapper.GetFeatureContribut } } + internal static class CreateCalibratedModelParameters + { + internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object predictor, ICalibrator calibrator, Type calibratedModelParametersType) + { + Type[] genericTypeArgs = { predictor.GetType(), calibrator.GetType() }; + Type constructed = calibratedModelParametersType.MakeGenericType(genericTypeArgs); + + Type[] constructorArgs = { + typeof(IHostEnvironment), + typeof(ModelLoadContext), + predictor.GetType(), + calibrator.GetType() + }; + + // Call the appropiate constructor of the created generic type passing on the previously loaded predictor and calibrator + var genericCtor = constructed.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, constructorArgs, null); + object genericInstance = genericCtor.Invoke(new object[] { env, ctx, predictor, calibrator }); + + return genericInstance; + } + } + [BestFriend] internal static class CalibratorUtils { diff --git a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs index 2e4fff8b58..31933dd680 100644 --- a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs +++ b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs @@ -690,12 +690,13 @@ internal static class BinaryPredictionTransformer public static ISingleFeaturePredictionTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { - // Load internal model to be used as TModel of BinaryPredictionTransformer + // Load internal model var host = Contracts.CheckRef(env, nameof(env)).Register(nameof(BinaryPredictionTransformer>)); ctx.LoadModel, SignatureLoadModel>(host, out IPredictorProducing model, DirModel); - Type generic = typeof(BinaryPredictionTransformer<>); - return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, generic); + // Returns prediction transformer using the right TModel from the previously loaded model + Type predictionTransformerType = typeof(BinaryPredictionTransformer<>); + return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, predictionTransformerType); } } @@ -706,12 +707,13 @@ internal static class MulticlassPredictionTransformer public static ISingleFeaturePredictionTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { - // Load internal model to be used as TModel of MulticlassPredictionTransformer + // Load internal model var host = Contracts.CheckRef(env, nameof(env)).Register(nameof(MulticlassPredictionTransformer>>)); ctx.LoadModel>, SignatureLoadModel>(host, out IPredictorProducing> model, DirModel); - Type generic = typeof(MulticlassPredictionTransformer<>); - return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, generic); + // Returns prediction transformer using the right TModel from the previously loaded model + Type predictionTransformerType = typeof(MulticlassPredictionTransformer<>); + return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, predictionTransformerType); } } @@ -722,12 +724,13 @@ internal static class RegressionPredictionTransformer public static ISingleFeaturePredictionTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { - // Load internal model to be used as TModel of RegressionPredictionTransformer + // Load internal model var host = Contracts.CheckRef(env, nameof(env)).Register(nameof(RegressionPredictionTransformer>)); ctx.LoadModel, SignatureLoadModel>(host, out IPredictorProducing model, DirModel); - Type generic = typeof(RegressionPredictionTransformer<>); - return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, generic); + // Returns prediction transformer using the right TModel from the previously loaded model + Type predictionTransformerType = typeof(RegressionPredictionTransformer<>); + return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, predictionTransformerType); } } @@ -739,58 +742,90 @@ internal static class RankingPredictionTransformer public static ISingleFeaturePredictionTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { - // Load internal model to be used as TModel of RankingPredictionTransformer + // Load internal model var host = Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer>)); ctx.LoadModel, SignatureLoadModel>(host, out IPredictorProducing model, DirModel); - Type generic = typeof(RankingPredictionTransformer<>); - return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, generic); + // Returns prediction transformer using the right TModel from the previously loaded model + Type predictionTransformerType = typeof(RankingPredictionTransformer<>); + return (ISingleFeaturePredictionTransformer) CreatePredictionTransformer.Create(env, ctx, host, model, predictionTransformerType); } } internal static class CreatePredictionTransformer { - internal static object Create(IHostEnvironment env, ModelLoadContext ctx, IHost host, IPredictorProducing model, Type generic) + internal static object Create(IHostEnvironment env, ModelLoadContext ctx, IHost host, IPredictorProducing model, Type predictionTransformerType) { // Create generic type of the prediction transformer using the correct TModel. // Return an instance of that type, passing the previously loaded model to the constructor - Type[] genericTypeArgs = { model.GetType() }; - Type constructed = generic.MakeGenericType(genericTypeArgs); - Type[] constructorArgs = { - typeof(IHostEnvironment), - typeof(ModelLoadContext), - typeof(IHost), - model.GetType() - }; - - var genericCtor = constructed.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, constructorArgs, null); + var genericCtor = CreateConstructor(model.GetType(), predictionTransformerType); var genericInstance = genericCtor.Invoke(new object[] { env, ctx, host, model }); return genericInstance; } - internal static object Create(IHostEnvironment env, ModelLoadContext ctx, IHost host, IPredictorProducing> model, Type generic) + internal static object Create(IHostEnvironment env, ModelLoadContext ctx, IHost host, IPredictorProducing> model, Type predictionTransformerType) { // Create generic type of the prediction transformer using the correct TModel. // Return an instance of that type, passing the previously loaded model to the constructor - Type[] genericTypeArgs = { model.GetType() }; - Type constructed = generic.MakeGenericType(genericTypeArgs); + + var genericCtor = CreateConstructor(model.GetType(), predictionTransformerType); + var genericInstance = genericCtor.Invoke(new object[] { env, ctx, host, model }); + + return genericInstance; + } + + private static ConstructorInfo CreateConstructor(Type modelType, Type predictionTransformerType) + { + Type modelLoadType = GetLoadType(modelType); + Type[] genericTypeArgs = { modelLoadType }; + Type constructedType = predictionTransformerType.MakeGenericType(genericTypeArgs); Type[] constructorArgs = { typeof(IHostEnvironment), typeof(ModelLoadContext), typeof(IHost), - model.GetType() + modelLoadType }; - var genericCtor = constructed.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, constructorArgs, null); - var genericInstance = genericCtor.Invoke(new object[] { env, ctx, host, model }); + var genericCtor = constructedType.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, constructorArgs, null); + return genericCtor; + } - return genericInstance; + private static Type GetLoadType(Type modelType) + { + // Returns the type that should be assigned as TModel of the Prediction Transformer being loaded + var att = modelType.GetCustomAttribute(typeof(PredictionTransformerLoadTypeAttribute)) as PredictionTransformerLoadTypeAttribute; + if (att != null) + { + if (att.LoadType.IsGenericType && att.LoadType.GetGenericArguments().Length == modelType.GetGenericArguments().Length) + { + // This assumes that if att.LoadType and modelType have the same number of type parameters + // Then they should get the same type parameters. + // This is the case for CalibratedModelParametersBase and its children generic clases. + // But might break if other classes begin using the PredictionTransformerLoadTypeAttribute in the future. + Type[] typeArguments = modelType.GetGenericArguments(); + Type genericType = att.LoadType; + return genericType.MakeGenericType(typeArguments); + } + } + + return modelType; } } + [AttributeUsage(AttributeTargets.Class)] + internal class PredictionTransformerLoadTypeAttribute : Attribute + { + internal Type LoadType { get; } + internal PredictionTransformerLoadTypeAttribute(Type loadtype) + { + LoadType = loadtype; + } + + } + internal static class AnomalyPredictionTransformer { public const string LoaderSignature = "AnomalyPredXfer"; diff --git a/test/Microsoft.ML.Tests/CalibratedModelParametersTests.cs b/test/Microsoft.ML.Tests/CalibratedModelParametersTests.cs new file mode 100644 index 0000000000..5f23d26573 --- /dev/null +++ b/test/Microsoft.ML.Tests/CalibratedModelParametersTests.cs @@ -0,0 +1,161 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using Microsoft.ML.Calibrators; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.Utilities; +using Microsoft.ML.RunTests; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Tests +{ + public class CalibratedModelParametersTests : TestDataPipeBase + { + public CalibratedModelParametersTests(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void TestParameterMixingCalibratedModelParametersLoading() + { + var data = GetDenseDataset(); + var model = ML.BinaryClassification.Trainers.LbfgsLogisticRegression( + new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 }).Fit(data); + + var modelAndSchemaPath = GetOutputPath("TestParameterMixingCalibratedModelParametersLoading.zip"); + ML.Model.Save(model, data.Schema, modelAndSchemaPath); + + var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema); + var castedModel = loadedModel as BinaryPredictionTransformer>; + + Assert.NotNull(castedModel); + + Type expectedInternalType = typeof(ParameterMixingCalibratedModelParameters); + Assert.Equal(expectedInternalType, castedModel.Model.GetType()); + Assert.Equal(model.Model.GetType(), castedModel.Model.GetType()); + Done(); + } + + [Fact] + public void TestValueMapperCalibratedModelParametersLoading() + { + var data = GetDenseDataset(); + + var model = ML.BinaryClassification.Trainers.Gam( + new GamBinaryTrainer.Options { NumberOfThreads = 1}).Fit(data); + + var modelAndSchemaPath = GetOutputPath("TestValueMapperCalibratedModelParametersLoading.zip"); + ML.Model.Save(model, data.Schema, modelAndSchemaPath); + + var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema); + var castedModel = loadedModel as BinaryPredictionTransformer>; + + Assert.NotNull(castedModel); + + Type expectedInternalType = typeof(ValueMapperCalibratedModelParameters); + Assert.Equal(expectedInternalType, castedModel.Model.GetType()); + Assert.Equal(model.Model.GetType(), castedModel.Model.GetType()); + Done(); + } + + + [Fact] + public void TestFeatureWeightsCalibratedModelParametersLoading() + { + var data = GetDenseDataset(); + + var model = ML.BinaryClassification.Trainers.FastTree( + new FastTreeBinaryTrainer.Options { NumberOfThreads = 1}).Fit(data); + + var modelAndSchemaPath = GetOutputPath("TestFeatureWeightsCalibratedModelParametersLoading.zip"); + ML.Model.Save(model, data.Schema, modelAndSchemaPath); + + var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema); + var castedModel = loadedModel as BinaryPredictionTransformer>; + + Assert.NotNull(castedModel); + + Type expectedInternalType = typeof(FeatureWeightsCalibratedModelParameters); + Assert.Equal(expectedInternalType, castedModel.Model.GetType()); + Assert.Equal(model.Model.GetType(), castedModel.Model.GetType()); + Done(); + } + + #region Helpers + /// + /// Features: x1, x2, x3, xRand; y = 10*x1 + 20x2 + 5.5x3 + e, xRand- random and Label y is to dependant on xRand. + /// xRand has the least importance: Evaluation metrics do not change a lot when xRand is permuted. + /// x2 has the biggest importance. + /// + private IDataView GetDenseDataset() + { + // Setup synthetic dataset. + const int numberOfInstances = 1000; + var rand = new Random(10); + float[] yArray = new float[numberOfInstances], + x1Array = new float[numberOfInstances], + x2Array = new float[numberOfInstances], + x3Array = new float[numberOfInstances], + x4RandArray = new float[numberOfInstances]; + + for (var i = 0; i < numberOfInstances; i++) + { + var x1 = rand.Next(1000); + x1Array[i] = x1; + var x2Important = rand.Next(10000); + x2Array[i] = x2Important; + var x3 = rand.Next(5000); + x3Array[i] = x3; + var x4Rand = rand.Next(1000); + x4RandArray[i] = x4Rand; + + var noise = rand.Next(50); + + yArray[i] = (float)(10 * x1 + 20 * x2Important + 5.5 * x3 + noise); + } + + GetBinaryClassificationLabels(yArray); + + // Create data view. + var bldr = new ArrayDataViewBuilder(Env); + bldr.AddColumn("X1", NumberDataViewType.Single, x1Array); + bldr.AddColumn("X2Important", NumberDataViewType.Single, x2Array); + bldr.AddColumn("X3", NumberDataViewType.Single, x3Array); + bldr.AddColumn("X4Rand", NumberDataViewType.Single, x4RandArray); + bldr.AddColumn("Label", NumberDataViewType.Single, yArray); + + var srcDV = bldr.GetDataView(); + var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand") + .Append(ML.Transforms.NormalizeMinMax("Features")); + + return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) + .Fit(srcDV).Transform(srcDV); + } + + private void GetBinaryClassificationLabels(float[] rawScores) + { + float averageScore = GetArrayAverage(rawScores); + + // Center the response and then take the sigmoid to generate the classes + for (int i = 0; i < rawScores.Length; i++) + rawScores[i] = MathUtils.Sigmoid(rawScores[i] - averageScore) > 0.5 ? 1 : 0; + } + + private float GetArrayAverage(float[] scores) + { + // Compute the average so we can center the response + float averageScore = 0.0f; + for (int i = 0; i < scores.Length; i++) + averageScore += scores[i]; + averageScore /= scores.Length; + + return averageScore; + } + #endregion + } +} diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index ac04e5452a..3feca9421b 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -4,8 +4,8 @@ using System; using System.Collections.Immutable; -using System.IO; using System.Linq; +using Microsoft.ML.Calibrators; using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.RunTests; @@ -199,13 +199,29 @@ public void TestPfiRegressionOnSparseFeatures(bool saveModel) /// /// Test PFI Binary Classification for Dense Features /// - [Fact] - public void TestPfiBinaryClassificationOnDenseFeatures() + [Theory] + [InlineData(true)] + [InlineData(false)] + public void TestPfiBinaryClassificationOnDenseFeatures(bool saveModel) { var data = GetDenseDataset(TaskType.BinaryClassification); var model = ML.BinaryClassification.Trainers.LbfgsLogisticRegression( new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 }).Fit(data); - var pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + + ImmutableArray pfi; + if (saveModel) + { + var modelAndSchemaPath = GetOutputPath("TestPfiBinaryClassificationOnDenseFeatures.zip"); + ML.Model.Save(model, data.Schema, modelAndSchemaPath); + + var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema); + var castedModel = loadedModel as BinaryPredictionTransformer>; + pfi = ML.BinaryClassification.PermutationFeatureImportance(castedModel, data); + } + else + { + pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + } // Pfi Indices: // X1: 0 @@ -237,13 +253,29 @@ public void TestPfiBinaryClassificationOnDenseFeatures() /// /// Test PFI Binary Classification for Sparse Features /// - [Fact] - public void TestPfiBinaryClassificationOnSparseFeatures() + [Theory] + [InlineData(true)] + [InlineData(false)] + public void TestPfiBinaryClassificationOnSparseFeatures(bool saveModel) { var data = GetSparseDataset(TaskType.BinaryClassification); var model = ML.BinaryClassification.Trainers.LbfgsLogisticRegression( new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 }).Fit(data); - var pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + + ImmutableArray pfi; + if (saveModel) + { + var modelAndSchemaPath = GetOutputPath("TestPfiBinaryClassificationOnSparseFeatures.zip"); + ML.Model.Save(model, data.Schema, modelAndSchemaPath); + + var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema); + var castedModel = loadedModel as BinaryPredictionTransformer>; + pfi = ML.BinaryClassification.PermutationFeatureImportance(castedModel, data); + } + else + { + pfi = ML.BinaryClassification.PermutationFeatureImportance(model, data); + } // Pfi Indices: // X1: 0 @@ -437,7 +469,7 @@ public void TestPfiRankingOnDenseFeatures(bool saveModel) /// - /// Test PFI Multiclass Classification for Sparse Features + /// Test PFI Ranking Classification for Sparse Features /// [Theory] [InlineData(true)] diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index f80ee68d2d..ea7a84de43 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -125,7 +125,7 @@ public void TestLRWithStats() using (var fs = File.OpenRead(modelAndSchemaPath)) transformerChain = ML.Model.Load(fs, out var schema); - var lastTransformer = ((TransformerChain)transformerChain).LastTransformer as BinaryPredictionTransformer, ICalibrator>>; + var lastTransformer = ((TransformerChain)transformerChain).LastTransformer as BinaryPredictionTransformer>; var model = lastTransformer.Model; linearModel = model.SubModel as LinearBinaryModelParameters; @@ -145,7 +145,7 @@ public void TestLRWithStatsBackCompatibility() using (FileStream fs = File.OpenRead(dropModelPath)) { - var result = ModelFileUtils.LoadPredictorOrNull(Env, fs) as ParameterMixingCalibratedModelParameters, ICalibrator>; + var result = ModelFileUtils.LoadPredictorOrNull(Env, fs) as CalibratedModelParametersBase; var subPredictor = result?.SubModel as LinearBinaryModelParameters; var stats = subPredictor?.Statistics;