-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Fixes #4292 about using PFI with BPT and CMPB #4306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+437
−49
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
a016aa6
Added working sample to use PFI with binary class loaded from disk
antoniovs1029 bb97226
Remove comments from sample example of PFI with BPT loaded from disk.
antoniovs1029 30ed977
Added static class ParameterMixingCalibratedModelParameters to fix pr…
antoniovs1029 0b4bcd6
Load the calibrator and submodel first to generate a generic type at …
antoniovs1029 4eff967
Fixed the problem by means of adding an extra member to the Parameter…
antoniovs1029 cd5c612
Fixed problem in previous commit
antoniovs1029 c48347b
Cleaned up the sample of PFI with BPT loaded from disk
antoniovs1029 ec617e4
Fixed tests that were using a cast that now returns null. Notice that…
antoniovs1029 e085f77
Clean up some comments
antoniovs1029 c93802f
Used class attributes to solve the problem
antoniovs1029 3a9285f
Fixed empty spaces in code
antoniovs1029 013fb58
Remove hardcode of type CalibratedModelParametersBase
antoniovs1029 7a76cb7
Removed unused 'using Calibrators'
antoniovs1029 dd40190
Added Attribute suffix
antoniovs1029 37d97aa
Removed duplicated strings
antoniovs1029 6ab644c
Updated PredictionTransformerLoadTypeAttribute to use property instea…
antoniovs1029 8f687cb
Added tests for using PFI with Binary Classification loaded from disk
antoniovs1029 c1d11a1
Removed unused 'using System.IO'
antoniovs1029 4cf02dc
Fixed other CalibratedModelParameters classes, and added tests. Still…
antoniovs1029 7714f2f
Removed unused 'using' statements
antoniovs1029 bcd112f
Change in the FeatureWeightCalibratedModelParameters constructor to b…
antoniovs1029 89534b6
Removed comments and non-generic PMCMP class
antoniovs1029 c02507e
Corrected 2 access modifiers
antoniovs1029 7e7505d
Merge remote-tracking branch 'upstream/master' into myissue05
antoniovs1029 8263941
Merge remote-tracking branch 'upstream/master' into myissue05
antoniovs1029 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
106 changes: 106 additions & 0 deletions
106
...Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportanceLoadFromDisk.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Calibrators; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Trainers; | ||
|
||
namespace Samples.Dynamic.Trainers.BinaryClassification | ||
{ | ||
public static class PermutationFeatureImportanceLoadFromDisk | ||
{ | ||
public static void Example() | ||
{ | ||
|
||
var mlContext = new MLContext(seed: 1); | ||
var samples = GenerateData(); | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
// Create pipeline | ||
var featureColumns = | ||
new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; | ||
var pipeline = mlContext.Transforms | ||
.Concatenate("Features", featureColumns) | ||
.Append(mlContext.Transforms.NormalizeMinMax("Features")) | ||
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression() | ||
); | ||
|
||
// Create and save model | ||
var model0 = pipeline.Fit(data); | ||
var lt = model0.LastTransformer; | ||
var modelPath = "./model.zip"; | ||
mlContext.Model.Save(model0, data.Schema, modelPath); | ||
|
||
// Load model | ||
var model = mlContext.Model.Load(modelPath, out var schema); | ||
|
||
// Transform the dataset. | ||
var transformedData = model.Transform(data); | ||
|
||
var linearPredictor = (model as TransformerChain<ITransformer>).LastTransformer as BinaryPredictionTransformer<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>; | ||
|
||
// Execute PFI with the linearPredictor | ||
var permutationMetrics = mlContext.BinaryClassification | ||
.PermutationFeatureImportance(linearPredictor, transformedData, | ||
permutationCount: 30); | ||
|
||
// Sort indices according to PFI results | ||
var sortedIndices = permutationMetrics | ||
.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve }) | ||
.OrderByDescending( | ||
feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) | ||
.Select(feature => feature.index); | ||
|
||
Console.WriteLine("Feature\tModel Weight\tChange in AUC" | ||
+ "\t95% Confidence in the Mean Change in AUC"); | ||
var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray(); | ||
foreach (int i in sortedIndices) | ||
{ | ||
Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}", | ||
featureColumns[i], | ||
linearPredictor.Model.SubModel.Weights[i], // this way we can access the weights inside the submodel | ||
auc[i].Mean, | ||
1.96 * auc[i].StandardError); | ||
} | ||
|
||
// Expected output: | ||
// Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC | ||
// Feature2 35.15 -0.387 0.002015 | ||
// Feature1 17.94 -0.1514 0.0008963 | ||
} | ||
|
||
private class Data | ||
{ | ||
public bool Label { get; set; } | ||
|
||
public float Feature1 { get; set; } | ||
|
||
public float Feature2 { get; set; } | ||
} | ||
|
||
/// Generate Data | ||
private static IEnumerable<Data> GenerateData(int nExamples = 10000, | ||
double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) | ||
{ | ||
var rng = new Random(seed); | ||
for (int i = 0; i < nExamples; i++) | ||
{ | ||
var data = new Data | ||
{ | ||
Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), | ||
Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)), | ||
}; | ||
|
||
// Create a noisy label. | ||
var value = (float)(bias + weight1 * data.Feature1 + weight2 * | ||
data.Feature2 + rng.NextDouble() - 0.5); | ||
|
||
data.Label = Sigmoid(value) > 0.5; | ||
yield return data; | ||
} | ||
} | ||
|
||
private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x)); | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.