diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs deleted file mode 100644 index 6cf402bb6f..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs +++ /dev/null @@ -1,61 +0,0 @@ -using System; -using System.Linq; -using Microsoft.ML.Trainers; -using Microsoft.ML.SamplesUtils; -using Microsoft.ML; - -namespace Samples.Dynamic.PermutationFeatureImportance -{ - public static class PfiHelper - { - public static IDataView GetHousingRegressionIDataView(MLContext mlContext, out string labelName, out string[] featureNames, bool binaryPrediction = false) - { - // Read the Housing regression dataset - var data = DatasetUtils.LoadHousingRegressionDataset(mlContext); - - // Define the label column - var labelColumn = "MedianHomeValue"; - - if (binaryPrediction) - { - labelColumn = nameof(BinaryOutputRow.AboveAverage); - data = mlContext.Transforms.CustomMapping(GreaterThanAverage, null).Fit(data).Transform(data); - data = mlContext.Transforms.DropColumns("MedianHomeValue").Fit(data).Transform(data); - } - - labelName = labelColumn; - featureNames = data.Schema.AsEnumerable() - .Select(column => column.Name) // Get the column names - .Where(name => name != labelColumn) // Drop the Label - .ToArray(); - - return data; - } - - // Define a class for all the input columns that we intend to consume. - private class ContinuousInputRow - { - public float MedianHomeValue { get; set; } - } - - // Define a class for all output columns that we intend to produce. - private class BinaryOutputRow - { - public bool AboveAverage { get; set; } - } - - // Define an Action to apply a custom mapping from one object to the other - private readonly static Action<ContinuousInputRow, BinaryOutputRow> GreaterThanAverage = (input, output) - => output.AboveAverage = input.MedianHomeValue > 22.6; - - public static float[] GetLinearModelWeights(OlsModelParameters linearModel) - { - return linearModel.Weights.ToArray(); - } - - public static float[] GetLinearModelWeights(LinearBinaryModelParameters linearModel) - { - return linearModel.Weights.ToArray(); - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs deleted file mode 100644 index 3b7f3d112b..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs +++ /dev/null @@ -1,78 +0,0 @@ -using System; -using System.Linq; -using Microsoft.ML; - -namespace Samples.Dynamic.PermutationFeatureImportance -{ - public static class PfiRegression - { - public static void Example() - { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(); - - // Step 1: Read the data - var data = PfiHelper.GetHousingRegressionIDataView(mlContext, out string labelName, out string[] featureNames); - - // Step 2: Pipeline - // Concatenate the features to create a Feature vector. - // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. - // Then append a linear regression trainer. - var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.Regression.Trainers.Ols( - labelColumnName: labelName, featureColumnName: "Features")); - var model = pipeline.Fit(data); - - // Extract the model from the pipeline - var linearPredictor = model.LastTransformer; - var weights = PfiHelper.GetLinearModelWeights(linearPredictor.Model); - - // Compute the permutation metrics using the properly normalized data. - var transformedData = model.Transform(data); - var permutationMetrics = mlContext.Regression.PermutationFeatureImportance( - linearPredictor, transformedData, labelColumnName: labelName, permutationCount: 3); - - // Now let's look at which features are most important to the model overall - // Get the feature indices sorted by their impact on R-Squared - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.RSquared }) - .OrderByDescending(feature => Math.Abs(feature.RSquared.Mean)) - .Select(feature => feature.index); - - // Print out the permutation results, with the model weights, in order of their impact: - // Expected console output for 100 permutations: - // Feature Model Weight Change in R-Squared 95% Confidence Interval of the Mean - // RoomsPerDwelling 53.35 -0.4298 0.005705 - // EmploymentDistance -19.21 -0.2609 0.004591 - // NitricOxides -19.32 -0.1569 0.003701 - // HighwayDistance 6.11 -0.1173 0.0025 - // TeacherRatio -21.92 -0.1106 0.002207 - // TaxRate -8.68 -0.1008 0.002083 - // CrimesPerCapita -16.37 -0.05988 0.00178 - // PercentPre40s -4.52 -0.03836 0.001432 - // PercentResidental 3.91 -0.02006 0.001079 - // CharlesRiver 3.49 -0.01839 0.000841 - // PercentNonRetail -1.17 -0.002111 0.0003176 - // - // Let's dig into these results a little bit. First, if you look at the weights of the model, they generally correlate - // with the results of PFI, but there are some significant misorderings. For example, "Tax Rate" and "Highway Distance" - // have relatively small model weights, but the permutation analysis shows these feature to have a larger effect - // on the accuracy of the model than higher-weighted features. To understand why the weights don't reflect the same - // feature importance as PFI, we need to go back to the basics of linear models: one of the assumptions of a linear - // model is that the features are uncorrelated. Now, the features in this dataset are clearly correlated: the tax rate - // for a house and the student-to-teacher ratio at the nearest school, for example, are often coupled through school - // levies. The tax rate, distance to a highway, and the crime rate would also seem to be correlated through social - // dynamics. We could draw out similar relationships for all variables in this dataset. The reason why the linear - // model weights don't reflect the same feature importance as PFI is that the solution to the linear model redistributes - // weights between correlated variables in unpredictable ways, so that the weights themselves are no longer a good - // measure of feature importance. - Console.WriteLine("Feature\tModel Weight\tChange in R-Squared\t95% Confidence Interval of the Mean"); - var rSquared = permutationMetrics.Select(x => x.RSquared).ToArray(); // Fetch r-squared as an array - foreach (int i in sortedIndices) - { - Console.WriteLine($"{featureNames[i]}\t{weights[i]:0.00}\t{rSquared[i].Mean:G4}\t{1.96 * rSquared[i].StandardError:G4}"); - } - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs deleted file mode 100644 index b3b646c35f..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs +++ /dev/null @@ -1,77 +0,0 @@ -using System; -using System.Linq; -using Microsoft.ML; -using Microsoft.ML.Trainers; - -namespace Samples.Dynamic.PermutationFeatureImportance -{ - public static class PfiBinaryClassification - { - public static void Example() - { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(seed:999123); - - // Step 1: Read the data - var data = PfiHelper.GetHousingRegressionIDataView(mlContext, - out string labelName, out string[] featureNames, binaryPrediction: true); - - // Step 2: Pipeline - // Concatenate the features to create a Feature vector. - // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. - // Then append a logistic regression trainer. - var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression( - labelColumnName: labelName, featureColumnName: "Features")); - var model = pipeline.Fit(data); - - // Extract the model from the pipeline - var linearPredictor = model.LastTransformer; - // Linear models for binary classification are wrapped by a calibrator as a generic predictor - // To access it directly, we must extract it out and cast it to the proper class - var weights = PfiHelper.GetLinearModelWeights(linearPredictor.Model.SubModel as LinearBinaryModelParameters); - - // Compute the permutation metrics using the properly normalized data. - var transformedData = model.Transform(data); - var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance( - linearPredictor, transformedData, labelColumnName: labelName, permutationCount: 3); - - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on AreaUnderRocCurve. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve }) - .OrderByDescending(feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) - .Select(feature => feature.index); - - // Print out the permutation results, with the model weights, in order of their impact: - // Expected console output (for 100 permutations): - // Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC - // PercentPre40s -1.96 -0.06316 0.002377 - // RoomsPerDwelling 3.71 -0.04385 0.001245 - // EmploymentDistance -1.31 -0.02139 0.0006867 - // TeacherRatio -2.46 -0.0203 0.0009566 - // PercentNonRetail -1.58 -0.01846 0.001586 - // CharlesRiver 0.66 -0.008605 0.0005136 - // PercentResidental 0.60 0.002483 0.0004818 - // TaxRate -0.95 -0.00221 0.0007394 - // NitricOxides -0.32 0.00101 0.0001428 - // CrimesPerCapita -0.04 -3.029E-05 1.678E-05 - // HighwayDistance 0.00 0 0 - // Let's look at these results. - // First, if you look at the weights of the model, they generally correlate with the results of PFI, - // but there are some significant misorderings. See the discussion in the Regression example for an - // explanation of why this happens and how to interpret it. - // Second, the logistic regression learner uses L1 regularization by default. Here, it causes the "HighWay Distance" - // feature to be zeroed out from the model. PFI assigns zero importance to this variable, as expected. - // Third, some features show an *increase* in AUC. This means that the model actually improved - // when these features were shuffled. This is a sign to investigate these features further. - Console.WriteLine("Feature\tModel Weight\tChange in AUC\t95% Confidence in the Mean Change in AUC"); - var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray(); // Fetch AUC as an array - foreach (int i in sortedIndices) - { - Console.WriteLine($"{featureNames[i]}\t{weights[i]:0.00}\t{auc[i].Mean:G4}\t{1.96 * auc[i].StandardError:G4}"); - } - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs new file mode 100644 index 0000000000..55d6c54cc4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs @@ -0,0 +1,105 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification +{ + public static class PermutationFeatureImportance + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed:1); + + // Create sample data. + var samples = GenerateData(); + + // Load the sample data as an IDataView. + var data = mlContext.Data.LoadFromEnumerable(samples); + + // Define a training pipeline that concatenates features into a vector, normalizes them, and then + // trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()); + + // Fit the pipeline to the data. + var model = pipeline.Fit(data); + + // Transform the dataset. + var transformedData = model.Transform(data); + + // Extract the predictor. + var linearPredictor = model.LastTransformer; + + // Compute the permutation metrics for the linear model using the normalized data. + var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance( + linearPredictor, transformedData, permutationCount: 30); + + // Now let's look at which features are most important to the model overall. + // Get the feature indices sorted by their impact on AUC. + var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve}) + .OrderByDescending(feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) + .Select(feature => feature.index); + + Console.WriteLine("Feature\tModel Weight\tChange in AUC\t95% Confidence in the Mean Change in AUC"); + var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray(); + foreach (int i in sortedIndices) + { + Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}", + featureColumns[i], + linearPredictor.Model.SubModel.Weights[i], + auc[i].Mean, + 1.96 * auc[i].StandardError); + } + + // Expected output: + // Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC + // Feature2 35.15 -0.387 0.002015 + // Feature1 17.94 -0.1514 0.0008963 + } + + private class Data + { + public bool Label { get; set; } + + public float Feature1 { get; set; } + + public float Feature2 { get; set; } + } + + /// <summary> + /// Generate an enumerable of Data objects, creating the label as a simple + /// linear combination of the features. + /// </summary> + /// <param name="nExamples">The number of examples.</param> + /// <param name="bias">The bias, or offset, in the calculation of the label.</param> + /// <param name="weight1">The weight to multiply the first feature with to compute the label.</param> + /// <param name="weight2">The weight to multiply the second feature with to compute the label.</param> + /// <param name="seed">The seed for generating feature values and label noise.</param> + /// <returns>An enumerable of Data objects.</returns> + private static IEnumerable<Data> GenerateData(int nExamples = 10000, + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) + { + var rng = new Random(seed); + for (int i = 0; i < nExamples; i++) + { + var data = new Data + { + Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + }; + + // Create a noisy label. + var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + data.Label = Sigmoid(value) > 0.5; + yield return data; + } + } + + private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x)); + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs new file mode 100644 index 0000000000..963fd238ca --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs @@ -0,0 +1,109 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.MulticlassClassification +{ + public static class PermutationFeatureImportance + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed:1); + + // Create sample data. + var samples = GenerateData(); + + // Load the sample data as an IDataView. + var data = mlContext.Data.LoadFromEnumerable(samples); + + // Define a training pipeline that concatenates features into a vector, normalizes them, and then + // trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + + // Fit the pipeline to the data. + var model = pipeline.Fit(data); + + // Transform the dataset. + var transformedData = model.Transform(data); + + // Extract the predictor. + var linearPredictor = model.LastTransformer; + + // Compute the permutation metrics for the linear model using the normalized data. + var permutationMetrics = mlContext.MulticlassClassification.PermutationFeatureImportance( + linearPredictor, transformedData, permutationCount: 30); + + // Now let's look at which features are most important to the model overall. + // Get the feature indices sorted by their impact on microaccuracy. + var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.MicroAccuracy}) + .OrderByDescending(feature => Math.Abs(feature.MicroAccuracy.Mean)) + .Select(feature => feature.index); + + Console.WriteLine("Feature\tChange in MicroAccuracy\t95% Confidence in the Mean Change in MicroAccuracy"); + var microAccuracy = permutationMetrics.Select(x => x.MicroAccuracy).ToArray(); + foreach (int i in sortedIndices) + { + Console.WriteLine("{0}\t{1:G4}\t{2:G4}", + featureColumns[i], + microAccuracy[i].Mean, + 1.96 * microAccuracy[i].StandardError); + } + + // Expected output: + //Feature Change in MicroAccuracy 95% Confidence in the Mean Change in MicroAccuracy + //Feature2 -0.1395 0.0006567 + //Feature1 -0.05367 0.0006908 + } + + private class Data + { + public float Label { get; set; } + + public float Feature1 { get; set; } + + public float Feature2 { get; set; } + } + + /// <summary> + /// Generate an enumerable of Data objects, creating the label as a simple + /// linear combination of the features. + /// </summary> + /// <param name="nExamples">The number of examples.</param> + /// <param name="bias">The bias, or offset, in the calculation of the label.</param> + /// <param name="weight1">The weight to multiply the first feature with to compute the label.</param> + /// <param name="weight2">The weight to multiply the second feature with to compute the label.</param> + /// <param name="seed">The seed for generating feature values and label noise.</param> + /// <returns>An enumerable of Data objects.</returns> + private static IEnumerable<Data> GenerateData(int nExamples = 10000, + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) + { + var rng = new Random(seed); + var max = bias + 4.5 * weight1 + 4.5 * weight2 + 0.5; + for (int i = 0; i < nExamples; i++) + { + var data = new Data + { + Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + }; + + // Create a noisy label. + var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + if (value < max / 3) + data.Label = 0; + else if (value < 2 * max / 3) + data.Label = 1; + else + data.Label = 2; + yield return data; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs new file mode 100644 index 0000000000..41928a70ee --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs @@ -0,0 +1,113 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.Ranking +{ + public static class PermutationFeatureImportance + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed:1); + + // Create sample data. + var samples = GenerateData(); + + // Load the sample data as an IDataView. + var data = mlContext.Data.LoadFromEnumerable(samples); + + // Define a training pipeline that concatenates features into a vector, normalizes them, and then + // trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("GroupId")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.Ranking.Trainers.FastTree()); + + // Fit the pipeline to the data. + var model = pipeline.Fit(data); + + // Transform the dataset. + var transformedData = model.Transform(data); + + // Extract the predictor. + var linearPredictor = model.LastTransformer; + + // Compute the permutation metrics for the linear model using the normalized data. + var permutationMetrics = mlContext.Ranking.PermutationFeatureImportance( + linearPredictor, transformedData, permutationCount: 30); + + // Now let's look at which features are most important to the model overall. + // Get the feature indices sorted by their impact on NDCG@1. + var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.NormalizedDiscountedCumulativeGains}) + .OrderByDescending(feature => Math.Abs(feature.NormalizedDiscountedCumulativeGains[0].Mean)) + .Select(feature => feature.index); + + Console.WriteLine("Feature\tChange in NDCG@1\t95% Confidence in the Mean Change in NDCG@1"); + var ndcg = permutationMetrics.Select(x => x.NormalizedDiscountedCumulativeGains).ToArray(); + foreach (int i in sortedIndices) + { + Console.WriteLine("{0}\t{1:G4}\t{2:G4}", + featureColumns[i], + ndcg[i][0].Mean, + 1.96 * ndcg[i][0].StandardError); + } + + // Expected output: + // Feature Change in NDCG@1 95% Confidence in the Mean Change in NDCG@1 + // Feature2 -0.2421 0.001748 + // Feature1 -0.0513 0.001184 + } + + private class Data + { + public float Label { get; set; } + + public int GroupId { get; set; } + + public float Feature1 { get; set; } + + public float Feature2 { get; set; } + } + + /// <summary> + /// Generate an enumerable of Data objects, creating the label as a simple + /// linear combination of the features. + /// </summary> + /// <param name="nExamples">The number of examples.</param> + /// <param name="bias">The bias, or offset, in the calculation of the label.</param> + /// <param name="weight1">The weight to multiply the first feature with to compute the label.</param> + /// <param name="weight2">The weight to multiply the second feature with to compute the label.</param> + /// <param name="seed">The seed for generating feature values and label noise.</param> + /// <returns>An enumerable of Data objects.</returns> + private static IEnumerable<Data> GenerateData(int nExamples = 10000, + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1, int groupSize = 5) + { + var rng = new Random(seed); + var max = bias + 4.5 * weight1 + 4.5 * weight2 + 0.5; + for (int i = 0; i < nExamples; i++) + { + var data = new Data + { + GroupId = i / groupSize, + Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + }; + + // Create a noisy label. + var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + if (value < max / 3) + data.Label = 0; + else if (value < 2 * max / 3) + data.Label = 1; + else + data.Label = 2; + yield return data; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs new file mode 100644 index 0000000000..90cf94db2a --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs @@ -0,0 +1,102 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.Regression +{ + public static class PermutationFeatureImportance + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed:1); + + // Create sample data. + var samples = GenerateData(); + + // Load the sample data as an IDataView. + var data = mlContext.Data.LoadFromEnumerable(samples); + + // Define a training pipeline that concatenates features into a vector, normalizes them, and then + // trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.Regression.Trainers.Ols()); + + // Fit the pipeline to the data. + var model = pipeline.Fit(data); + + // Transform the dataset. + var transformedData = model.Transform(data); + + // Extract the predictor. + var linearPredictor = model.LastTransformer; + + // Compute the permutation metrics for the linear model using the normalized data. + var permutationMetrics = mlContext.Regression.PermutationFeatureImportance( + linearPredictor, transformedData, permutationCount: 30); + + // Now let's look at which features are most important to the model overall. + // Get the feature indices sorted by their impact on RMSE. + var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.RootMeanSquaredError}) + .OrderByDescending(feature => Math.Abs(feature.RootMeanSquaredError.Mean)) + .Select(feature => feature.index); + + Console.WriteLine("Feature\tModel Weight\tChange in RMSE\t95% Confidence in the Mean Change in RMSE"); + var rmse = permutationMetrics.Select(x => x.RootMeanSquaredError).ToArray(); + foreach (int i in sortedIndices) + { + Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}", + featureColumns[i], + linearPredictor.Model.Weights[i], + rmse[i].Mean, + 1.96 * rmse[i].StandardError); + } + + // Expected output: + // Feature Model Weight Change in RMSE 95% Confidence in the Mean Change in RMSE + // Feature2 9.00 4.009 0.008304 + // Feature1 4.48 1.901 0.003351 + } + + private class Data + { + public float Label { get; set; } + + public float Feature1 { get; set; } + + public float Feature2 { get; set; } + } + + /// <summary> + /// Generate an enumerable of Data objects, creating the label as a simple + /// linear combination of the features. + /// </summary> + /// <param name="nExamples">The number of examples.</param> + /// <param name="bias">The bias, or offset, in the calculation of the label.</param> + /// <param name="weight1">The weight to multiply the first feature with to compute the label.</param> + /// <param name="weight2">The weight to multiply the second feature with to compute the label.</param> + /// <param name="seed">The seed for generating feature values and label noise.</param> + /// <returns>An enumerable of Data objects.</returns> + private static IEnumerable<Data> GenerateData(int nExamples = 10000, + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) + { + var rng = new Random(seed); + for (int i = 0; i < nExamples; i++) + { + var data = new Data + { + Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), + }; + + // Create a noisy label. + data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + yield return data; + } + } + } +} diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 9e111dac5a..1c9cebd602 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -41,7 +41,7 @@ public static class PermutationFeatureImportanceExtensions /// <example> /// <format type="text/markdown"> /// <] + /// [!code-csharp[PermutationFeatureImportance](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs)] /// ]]> /// </format> /// </example> @@ -117,7 +117,7 @@ private static RegressionMetrics RegressionDelta( /// <example> /// <format type="text/markdown"> /// <] + /// [!code-csharp[PermutationFeatureImportance](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs)] /// ]]> /// </format> /// </example> @@ -194,6 +194,13 @@ private static BinaryClassificationMetrics BinaryClassifierDelta( /// example of working with these results to analyze the feature importance of a model. /// </para> /// </remarks> + /// <example> + /// <format type="text/markdown"> + /// <] + /// ]]> + /// </format> + /// </example> /// <param name="catalog">The clustering catalog.</param> /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param> /// <param name="data">The evaluation data set.</param> @@ -272,6 +279,13 @@ private static MulticlassClassificationMetrics MulticlassClassificationDelta( /// example of working with these results to analyze the feature importance of a model. /// </para> /// </remarks> + /// <example> + /// <format type="text/markdown"> + /// <] + /// ]]> + /// </format> + /// </example> /// <param name="catalog">The clustering catalog.</param> /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param> /// <param name="data">The evaluation data set.</param>