diff --git a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs index cc861c0b9b..d8d99cbe8f 100644 --- a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs @@ -14,10 +14,10 @@ namespace Microsoft.ML public static class ExplainabilityCatalog { /// - /// Feature Contribution Calculation computes model-specific contribution scores for each feature. - /// Note that this functionality is not supported by all the models. See for a list of the suported models. + /// Create a that computes model-specific contribution scores for + /// each feature of the input vector. /// - /// The model explainability operations catalog. + /// The transforms catalog. /// A that supports Feature Contribution Calculation, /// and which will also be used for scoring. /// The number of positive contributions to report, sorted from highest magnitude to lowest magnitude. @@ -40,10 +40,10 @@ public static FeatureContributionCalculatingEstimator CalculateFeatureContributi => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), predictionTransformer.Model, numberOfPositiveContributions, numberOfNegativeContributions, predictionTransformer.FeatureColumnName, normalize); /// - /// Feature Contribution Calculation computes model-specific contribution scores for each feature. - /// Note that this functionality is not supported by all the models. See for a list of the suported models. + /// Create a that computes model-specific contribution scores for + /// each feature of the input vector. /// - /// The model explainability operations catalog. + /// The transforms catalog. /// A that supports Feature Contribution Calculation, /// and which will also be used for scoring. /// The number of positive contributions to report, sorted from highest magnitude to lowest magnitude. diff --git a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs index ec44099fca..cde47e8ce0 100644 --- a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs @@ -26,47 +26,8 @@ namespace Microsoft.ML.Transforms { /// - /// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example. - /// See the list of currently supported models below. + /// resulting from fitting a . /// - /// - /// - /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions - /// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific - /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative - /// (they make the score lower). - /// - /// - /// Feature Contribution Calculation is currently supported for the following models: - /// Regression: - /// OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression, - /// GeneralizedAdditiveModels (GAM), LightGbm, FastTree, FastForest, FastTreeTweedie - /// Binary Classification: - /// AveragedPerceptron, LinearSupportVectorMachines, LogisticRegression, StochasticDualCoordinateAscent (SDCA), - /// StochasticGradientDescent (SGD), SymbolicStochasticGradientDescent, GeneralizedAdditiveModels (GAM), - /// FastForest, FastTree, LightGbm - /// Ranking: - /// FastTree, LightGbm - /// - /// - /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly, - /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at - /// the feature value. - /// - /// - /// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact - /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature - /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered. - /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1 - /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false - /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score - /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with - /// many decision trees. - /// - /// - /// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer. - /// - /// public sealed class FeatureContributionCalculatingTransformer : OneToOneTransformerBase { internal sealed class Options : TransformInputBase @@ -266,9 +227,67 @@ private Delegate GetValueGetter(DataViewRow input, int colSrc) } /// - /// Estimator producing a FeatureContributionCalculatingTransformer which scores the model on an input dataset and - /// computes model-specific contribution scores for each feature. + /// Computes model-specific per-feature contributions to the score of each input vector. /// + /// + /// | + /// | Output column data type | Known-sized vector of | + /// + /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions + /// it can be useful to inspect which features influenced them most significantly. This transformer computes a model-specific + /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative + /// (they make the score lower). + /// + /// Feature Contribution Calculation is currently supported for the following models: + /// - Regression: + /// - OlsTrainer + /// - SdcaRegressionTrainer + /// - OnlineGradientDescentTrainer + /// - LbfgsPoissonRegressionTrainer + /// - GamRegressionTrainer + /// - LightGbmRegressionTrainer + /// - FastTreeRegressionTrainer + /// - FastForestRegressionTrainer + /// - FastTreeTweedieTrainer + /// - Binary Classification: + /// - AveragedPerceptronTrainer + /// - LinearSvmTrainer + /// - LbfgsLogisticRegressionBinaryTrainer + /// - SdcaNonCalibratedBinaryTrainer + /// - SdcaLogisticRegressionBinaryTrainer + /// - SgdCalibratedTrainer + /// - SgdNonCalibratedTrainer + /// - SymbolicSgdLogisticRegressionBinaryTrainer + /// - GamBinaryTrainer + /// - FastForestBinaryTrainer + /// - FastTreeBinaryTrainer + /// - LightGbmBinaryTrainer + /// - Ranking: + /// - FastTreeRankingTrainer + /// - LightGbmRankingTrainer + /// + /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly, + /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at + /// the feature value. + /// + /// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact + /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature + /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered. + /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1 + /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false + /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score + /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with + /// many decision trees. + /// ]]> + /// + /// + /// public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator { private readonly string _featureColumn;