Handle NaN optimization metric in AutoML (#5031)

najeeb-kazmi · web-flow · commit db8406021c40 · 2020-04-23T20:09:04.000-07:00
* Handle all folds returning NaN optimization metric in CrossValSummaryRunner

* Handle NaN in calculation of average scores and index of closest fold

* Handle all metrics being NaN in finding Best Run

* nit

* nit

* nit

* Handle all NaNs in best model selection

* Return average metrics instead of metrics form the fold with optimizing metric closest to average

* nit

* Add PerClassLogLoss and ConfusionMatrix from the fold closest to average score

* feedback
diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs
@@ -66,11 +66,11 @@ public CrossValRunner(MLContext context,
 
         private static double CalcAverageScore(IEnumerable<double> scores)
         {
-            if (scores.Any(s => double.IsNaN(s)))
-            {
+            var newScores = scores.Where(r => !double.IsNaN(r));
+            // Return NaN iff all scores are NaN
+            if (newScores.Count() == 0)
                 return double.NaN;
-            }
-            return scores.Average();
+            return newScores.Average();
         }
     }
 }
diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -6,6 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 
 namespace Microsoft.ML.AutoML
@@ -70,27 +71,105 @@ public CrossValSummaryRunner(MLContext context,
 
             // Get the model from the best fold
             var bestFoldIndex = BestResultUtil.GetIndexOfBestScore(trainResults.Select(r => r.score), _optimizingMetricInfo.IsMaximizing);
+            // bestFoldIndex will be -1 if the optimization metric for all folds is NaN.
+            // In this case, return model from the first fold.
+            bestFoldIndex = bestFoldIndex != -1 ? bestFoldIndex : 0;
             var bestModel = trainResults.ElementAt(bestFoldIndex).model;
 
-            // Get the metrics from the fold whose score is closest to avg of all fold scores
-            var avgScore = trainResults.Average(r => r.score);
+            // Get the average metrics across all folds
+            var avgScore = GetAverageOfNonNaNScores(trainResults.Select(x => x.score));
             var indexClosestToAvg = GetIndexClosestToAverage(trainResults.Select(r => r.score), avgScore);
             var metricsClosestToAvg = trainResults[indexClosestToAvg].metrics;
+            var avgMetrics = GetAverageMetrics(trainResults.Select(x => x.metrics), metricsClosestToAvg);
 
             // Build result objects
-            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail<TMetrics>(pipeline, avgScore, allRunsSucceeded, metricsClosestToAvg, bestModel, null);
+            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail<TMetrics>(pipeline, avgScore, allRunsSucceeded, avgMetrics, bestModel, null);
             var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);
             return (suggestedPipelineRunDetail, runDetail);
         }
 
+        private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetrics metricsClosestToAvg)
+        {
+            if (typeof(TMetrics) == typeof(BinaryClassificationMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as BinaryClassificationMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new BinaryClassificationMetrics(
+                    auc: GetAverageOfNonNaNScores(newMetrics.Select(x => x.AreaUnderRocCurve)),
+                    accuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.Accuracy)),
+                    positivePrecision: GetAverageOfNonNaNScores(newMetrics.Select(x => x.PositivePrecision)),
+                    positiveRecall: GetAverageOfNonNaNScores(newMetrics.Select(x => x.PositiveRecall)),
+                    negativePrecision: GetAverageOfNonNaNScores(newMetrics.Select(x => x.NegativePrecision)),
+                    negativeRecall: GetAverageOfNonNaNScores(newMetrics.Select(x => x.NegativeRecall)),
+                    f1Score: GetAverageOfNonNaNScores(newMetrics.Select(x => x.F1Score)),
+                    auprc: GetAverageOfNonNaNScores(newMetrics.Select(x => x.AreaUnderPrecisionRecallCurve)),
+                    // Return ConfusionMatrix from the fold closest to average score
+                    confusionMatrix: (metricsClosestToAvg as BinaryClassificationMetrics).ConfusionMatrix);
+                return result as TMetrics;
+            }
+
+            if (typeof(TMetrics) == typeof(MulticlassClassificationMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as MulticlassClassificationMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new MulticlassClassificationMetrics(
+                    accuracyMicro: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MicroAccuracy)),
+                    accuracyMacro: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MacroAccuracy)),
+                    logLoss: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLoss)),
+                    logLossReduction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLossReduction)),
+                    topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
+                    topKAccuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.TopKAccuracy)),
+                    // Return PerClassLogLoss and ConfusionMatrix from the fold closest to average score
+                    perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
+                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix);
+                return result as TMetrics;
+            }
+
+            if (typeof(TMetrics) == typeof(RegressionMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as RegressionMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new RegressionMetrics(
+                    l1: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MeanAbsoluteError)),
+                    l2: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MeanSquaredError)),
+                    rms: GetAverageOfNonNaNScores(newMetrics.Select(x => x.RootMeanSquaredError)),
+                    lossFunction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LossFunction)),
+                    rSquared: GetAverageOfNonNaNScores(newMetrics.Select(x => x.RSquared)));
+                return result as TMetrics;
+            }
+
+            throw new NotImplementedException($"Metric {typeof(TMetrics)} not implemented");
+        }
+
+        private static double GetAverageOfNonNaNScores(IEnumerable<double> results)
+        {
+            var newResults = results.Where(r => !double.IsNaN(r));
+            // Return NaN iff all scores are NaN
+            if (newResults.Count() == 0)
+                return double.NaN;
+            // Return average of non-NaN scores otherwise
+            return newResults.Average(r => r);
+        }
+
         private static int GetIndexClosestToAverage(IEnumerable<double> values, double average)
         {
+            // Average will be NaN iff all values are NaN.
+            // Return the first index in this case.
+            if (double.IsNaN(average))
+                return 0;
+
             int avgFoldIndex = -1;
             var smallestDistFromAvg = double.PositiveInfinity;
             for (var i = 0; i < values.Count(); i++)
             {
-                var distFromAvg = Math.Abs(values.ElementAt(i) - average);
-                if (distFromAvg < smallestDistFromAvg || smallestDistFromAvg == double.PositiveInfinity)
+                var value = values.ElementAt(i);
+                if (double.IsNaN(value))
+                    continue;
+                var distFromAvg = Math.Abs(value - average);
+                if (distFromAvg < smallestDistFromAvg)
                 {
                     smallestDistFromAvg = distFromAvg;
                     avgFoldIndex = i;
diff --git a/src/Microsoft.ML.AutoML/Utils/BestResultUtil.cs b/src/Microsoft.ML.AutoML/Utils/BestResultUtil.cs
@@ -41,6 +41,9 @@ public static RunDetail<TMetrics> GetBestRun<TMetrics>(IEnumerable<RunDetail<TMe
             if (!results.Any()) { return null; }
             var scores = results.Select(r => metricsAgent.GetScore(r.ValidationMetrics));
             var indexOfBestScore = GetIndexOfBestScore(scores, isMetricMaximizing);
+            // indexOfBestScore will be -1 if the optimization metric for all models is NaN.
+            // In this case, return the first model.
+            indexOfBestScore = indexOfBestScore != -1 ? indexOfBestScore : 0;
             return results.ElementAt(indexOfBestScore);
         }
 
@@ -51,6 +54,9 @@ public static CrossValidationRunDetail<TMetrics> GetBestRun<TMetrics>(IEnumerabl
             if (!results.Any()) { return null; }
             var scores = results.Select(r => r.Results.Average(x => metricsAgent.GetScore(x.ValidationMetrics)));
             var indexOfBestScore = GetIndexOfBestScore(scores, isMetricMaximizing);
+            // indexOfBestScore will be -1 if the optimization metric for all models is NaN.
+            // In this case, return the first model.
+            indexOfBestScore = indexOfBestScore != -1 ? indexOfBestScore : 0;
             return results.ElementAt(indexOfBestScore);
         }
 
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs
@@ -122,5 +122,12 @@ internal BinaryClassificationMetrics(double auc, double accuracy, double positiv
             F1Score = f1Score;
             AreaUnderPrecisionRecallCurve = auprc;
         }
+
+        internal BinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall,
+            double negativePrecision, double negativeRecall, double f1Score, double auprc, ConfusionMatrix confusionMatrix)
+            : this(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc)
+        {
+            ConfusionMatrix = confusionMatrix;
+        }
     }
 }
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -134,5 +134,12 @@ internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMa
             TopKAccuracy = topKAccuracy;
             PerClassLogLoss = perClassLogLoss.ToImmutableArray();
         }
+
+        internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
+            int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
+            : this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracy, perClassLogLoss)
+        {
+            ConfusionMatrix = confusionMatrix;
+        }
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -66,11 +66,11 @@ public CrossValRunner(MLContext context,`
`66`	`66`
`67`	`67`	`private static double CalcAverageScore(IEnumerable<double> scores)`
`68`	`68`	`{`
`69`		`- if (scores.Any(s => double.IsNaN(s)))`
`70`		`- {`
	`69`	`+ var newScores = scores.Where(r => !double.IsNaN(r));`
	`70`	`+ // Return NaN iff all scores are NaN`
	`71`	`+ if (newScores.Count() == 0)`
`71`	`72`	`return double.NaN;`
`72`		`- }`
`73`		`- return scores.Average();`
	`73`	`+ return newScores.Average();`
`74`	`74`	`}`
`75`	`75`	`}`
`76`	`76`	`}`
Original file line number	Diff line number	Diff line change
`@@ -122,5 +122,12 @@ internal BinaryClassificationMetrics(double auc, double accuracy, double positiv`
`122`	`122`	`F1Score = f1Score;`
`123`	`123`	`AreaUnderPrecisionRecallCurve = auprc;`
`124`	`124`	`}`
	`125`	`+`
	`126`	`+ internal BinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall,`
	`127`	`+ double negativePrecision, double negativeRecall, double f1Score, double auprc, ConfusionMatrix confusionMatrix)`
	`128`	`+ : this(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc)`
	`129`	`+ {`
	`130`	`+ ConfusionMatrix = confusionMatrix;`
	`131`	`+ }`
`125`	`132`	`}`
`126`	`133`	`}`
Original file line number	Diff line number	Diff line change
`@@ -134,5 +134,12 @@ internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMa`
`134`	`134`	`TopKAccuracy = topKAccuracy;`
`135`	`135`	`PerClassLogLoss = perClassLogLoss.ToImmutableArray();`
`136`	`136`	`}`
	`137`	`+`
	`138`	`+ internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,`
	`139`	`+ int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)`
	`140`	`+ : this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracy, perClassLogLoss)`
	`141`	`+ {`
	`142`	`+ ConfusionMatrix = confusionMatrix;`
	`143`	`+ }`
`137`	`144`	`}`
`138`	`145`	`}`