From 0c9f66f48429c5f1796276214de5d2e57568d5ca Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 16 Apr 2025 20:24:48 -0700
Subject: [PATCH 1/5] Add a shorter alias for --format

---
 .../Microsoft.Extensions.AI.Evaluation.Console/Program.cs       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Program.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Program.cs
index bdae87d9d53..8150f3a4d8c 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Program.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Program.cs
@@ -90,7 +90,7 @@ private static async Task<int> Main(string[] args)
 
         var formatOpt =
             new Option<ReportCommand.Format>(
-                "--format",
+                ["-f", "--format"],
                 () => ReportCommand.Format.html,
                 "Specify the format for the generated report.");
 

From d42ccecdfa4127ab73321f53f23fc987dda1728d Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 16 Apr 2025 16:51:15 -0700
Subject: [PATCH 2/5] Remove extension methods for adding single diagnostic

Since the same can be achieved using the params overload
---
 .../ChatConversationEvaluator.cs              |  6 +-
 .../RelevanceTruthAndCompletenessEvaluator.cs |  8 +-
 .../SingleNumericMetricEvaluator.cs           |  4 +-
 .../EvaluationMetricExtensions.cs             |  2 +-
 .../CompositeEvaluator.cs                     |  2 +-
 .../EvaluationMetricExtensions.cs             | 23 ++----
 .../EvaluationResultExtensions.cs             | 18 -----
 .../ResultsTests.cs                           | 78 +++++++++----------
 .../ResultStoreTester.cs                      |  2 +-
 .../ScenarioRunResultTests.cs                 | 20 ++---
 10 files changed, 68 insertions(+), 95 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
index cbc904277ab..6550454806f 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
@@ -49,7 +49,7 @@ public async ValueTask<EvaluationResult> EvaluateAsync(
 
         if (string.IsNullOrWhiteSpace(modelResponse.Text))
         {
-            result.AddDiagnosticToAllMetrics(
+            result.AddDiagnosticsToAllMetrics(
                 EvaluationDiagnostic.Error(
                     "Evaluation failed because the model response supplied for evaluation was null or empty."));
 
@@ -73,7 +73,7 @@ void OnTokenBudgetExceeded()
                     EvaluationDiagnostic.Error(
                         $"Evaluation failed because the specified limit of {inputTokenLimit} input tokens was exceeded.");
 
-                result.AddDiagnosticToAllMetrics(tokenBudgetExceeded);
+                result.AddDiagnosticsToAllMetrics(tokenBudgetExceeded);
             }
 
             if (!string.IsNullOrWhiteSpace(SystemPrompt))
@@ -176,7 +176,7 @@ await PerformEvaluationAsync(
         if (inputTokenLimit > 0 && ignoredMessagesCount > 0)
         {
 #pragma warning disable S103 // Lines should not be too long
-            result.AddDiagnosticToAllMetrics(
+            result.AddDiagnosticsToAllMetrics(
                 EvaluationDiagnostic.Warning(
                     $"The evaluation may be inconclusive because the oldest {ignoredMessagesCount} messages in the supplied conversation history were ignored in order to stay under the specified limit of {inputTokenLimit} input tokens."));
 #pragma warning restore S103
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
index c6c38cf583a..ee586b9b242 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
@@ -145,7 +145,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(
             if (string.IsNullOrEmpty(evaluationResponseText))
             {
                 rating = Rating.Inconclusive;
-                result.AddDiagnosticToAllMetrics(
+                result.AddDiagnosticsToAllMetrics(
                     EvaluationDiagnostic.Error(
                         "Evaluation failed because the model failed to produce a valid evaluation response."));
             }
@@ -168,7 +168,7 @@ await JsonOutputFixer.RepairJsonAsync(
                         if (string.IsNullOrEmpty(repairedJson))
                         {
                             rating = Rating.Inconclusive;
-                            result.AddDiagnosticToAllMetrics(
+                            result.AddDiagnosticsToAllMetrics(
                                 EvaluationDiagnostic.Error(
                                     $"""
                                     Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
@@ -183,7 +183,7 @@ await JsonOutputFixer.RepairJsonAsync(
                     catch (JsonException ex)
                     {
                         rating = Rating.Inconclusive;
-                        result.AddDiagnosticToAllMetrics(
+                        result.AddDiagnosticsToAllMetrics(
                             EvaluationDiagnostic.Error(
                                 $"""
                                 Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
@@ -281,7 +281,7 @@ void UpdateResult()
 
             if (!string.IsNullOrWhiteSpace(rating.Error))
             {
-                result.AddDiagnosticToAllMetrics(EvaluationDiagnostic.Error(rating.Error!));
+                result.AddDiagnosticsToAllMetrics(EvaluationDiagnostic.Error(rating.Error!));
             }
         }
     }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs
index 6c81250ed1c..d4abf52a9e3 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs
@@ -105,7 +105,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(
 
             if (string.IsNullOrEmpty(evaluationResponseText))
             {
-                metric.AddDiagnostic(
+                metric.AddDiagnostics(
                     EvaluationDiagnostic.Error(
                         "Evaluation failed because the model failed to produce a valid evaluation response."));
             }
@@ -115,7 +115,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(
             }
             else
             {
-                metric.AddDiagnostic(
+                metric.AddDiagnostics(
                     EvaluationDiagnostic.Error(
                         $"Failed to parse '{evaluationResponseText!}' as an integer score for '{MetricName}'."));
             }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/EvaluationMetricExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/EvaluationMetricExtensions.cs
index 20246e3aaa2..8a0ffcbd31b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/EvaluationMetricExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/EvaluationMetricExtensions.cs
@@ -97,6 +97,6 @@ internal static void LogJsonData(this EvaluationMetric metric, string data)
     internal static void LogJsonData(this EvaluationMetric metric, JsonNode data)
     {
         string serializedData = data.ToJsonString(new JsonSerializerOptions { WriteIndented = true });
-        metric.AddDiagnostic(EvaluationDiagnostic.Informational(serializedData));
+        metric.AddDiagnostics(EvaluationDiagnostic.Informational(serializedData));
     }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs
index 7dc544c66c8..6feba92d6c3 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs
@@ -159,7 +159,7 @@ async ValueTask<EvaluationResult> EvaluateAsync(IEvaluator e)
                 foreach (string metricName in e.EvaluationMetricNames)
                 {
                     var metric = new EvaluationMetric(metricName);
-                    metric.AddDiagnostic(EvaluationDiagnostic.Error(message));
+                    metric.AddDiagnostics(EvaluationDiagnostic.Error(message));
                     result.Metrics.Add(metric.Name, metric);
                 }
 
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
index 29a22a2bd60..e4980d39f9f 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
@@ -40,20 +40,6 @@ metric.Diagnostics is not null &&
                 : metric.Diagnostics.Any(predicate));
     }
 
-    /// <summary>
-    /// Adds the supplied <see cref="EvaluationDiagnostic"/> to the supplied <see cref="EvaluationMetric"/>'s
-    /// <see cref="EvaluationMetric.Diagnostics"/> collection.
-    /// </summary>
-    /// <param name="metric">The <see cref="EvaluationMetric"/>.</param>
-    /// <param name="diagnostic">The <see cref="EvaluationDiagnostic"/> to be added.</param>
-    public static void AddDiagnostic(this EvaluationMetric metric, EvaluationDiagnostic diagnostic)
-    {
-        _ = Throw.IfNull(metric);
-
-        metric.Diagnostics ??= new List<EvaluationDiagnostic>();
-        metric.Diagnostics.Add(diagnostic);
-    }
-
     /// <summary>
     /// Adds the supplied <see cref="EvaluationDiagnostic"/>s to the supplied <see cref="EvaluationMetric"/>'s
     /// <see cref="EvaluationMetric.Diagnostics"/> collection.
@@ -65,9 +51,14 @@ public static void AddDiagnostics(this EvaluationMetric metric, IEnumerable<Eval
         _ = Throw.IfNull(metric);
         _ = Throw.IfNull(diagnostics);
 
-        foreach (EvaluationDiagnostic diagnostic in diagnostics)
+        if (diagnostics.Any())
         {
-            metric.AddDiagnostic(diagnostic);
+            metric.Diagnostics ??= new List<EvaluationDiagnostic>();
+
+            foreach (EvaluationDiagnostic diagnostic in diagnostics)
+            {
+                metric.Diagnostics.Add(diagnostic);
+            }
         }
     }
 
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
index 5ca59b16584..bbe0f1bc0a2 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
@@ -13,24 +13,6 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </summary>
 public static class EvaluationResultExtensions
 {
-    /// <summary>
-    /// Adds the supplied <paramref name="diagnostic"/> to all <see cref="EvaluationMetric"/>s contained in the
-    /// supplied <paramref name="result"/>.
-    /// </summary>
-    /// <param name="result">
-    /// The <see cref="EvaluationResult"/> containing the <see cref="EvaluationMetric"/>s that are to be altered.
-    /// </param>
-    /// <param name="diagnostic">The <see cref="EvaluationDiagnostic"/> that is to be added.</param>
-    public static void AddDiagnosticToAllMetrics(this EvaluationResult result, EvaluationDiagnostic diagnostic)
-    {
-        _ = Throw.IfNull(result);
-
-        foreach (EvaluationMetric metric in result.Metrics.Values)
-        {
-            metric.AddDiagnostic(diagnostic);
-        }
-    }
-
     /// <summary>
     /// Adds the supplied <paramref name="diagnostics"/> to all <see cref="EvaluationMetric"/>s contained in the
     /// supplied <paramref name="result"/>.
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
index 8ce98133586..1b52ee9d6d5 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
@@ -419,32 +419,32 @@ public async Task ResultWithDiagnosticsOnUninterpretedMetrics()
         ReportingConfiguration reportingConfiguration = CreateReportingConfiguration(evaluator);
 
         var metric1 = new BooleanMetric("Metric with all diagnostic severities");
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics");
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric2.Reason = "Reason for metric 2";
 
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
         metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only");
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
         metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only");
-        metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
         metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
@@ -472,32 +472,32 @@ public async Task ResultWithDiagnosticsOnFailingMetrics()
         ReportingConfiguration reportingConfiguration = CreateReportingConfiguration(evaluator);
 
         var metric1 = new BooleanMetric("Metric with all diagnostic severities");
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics");
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric2.Reason = "Reason for metric 2";
 
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
         metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only");
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
         metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only");
-        metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
         metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
@@ -531,32 +531,32 @@ public async Task ResultWithDiagnosticsOnPassingMetrics()
         ReportingConfiguration reportingConfiguration = CreateReportingConfiguration(evaluator);
 
         var metric1 = new BooleanMetric("Metric with all diagnostic severities", value: true);
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
-        metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
+        metric1.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics", value: true);
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
-        metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
+        metric2.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 2"));
         metric2.Reason = "Reason for metric 2";
 
         var metric3 = new NumericMetric("Metric with error diagnostics only", value: 5);
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
-        metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 1"));
+        metric3.AddDiagnostics(EvaluationDiagnostic.Error("Error 2"));
         metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only", value: "A");
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
-        metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 1"));
+        metric4.AddDiagnostics(EvaluationDiagnostic.Warning("Warning 2"));
         metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only", value: 4);
-        metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.AddDiagnostics(EvaluationDiagnostic.Informational("Informational 1"));
         metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ResultStoreTester.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ResultStoreTester.cs
index c2547dea47e..1ce033b3cd7 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ResultStoreTester.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ResultStoreTester.cs
@@ -22,7 +22,7 @@ private static ScenarioRunResult CreateTestResult(string scenarioName, string it
         BooleanMetric booleanMetric = new BooleanMetric("boolean", value: true);
 
         NumericMetric numericMetric = new NumericMetric("numeric", value: 3);
-        numericMetric.AddDiagnostic(EvaluationDiagnostic.Informational("Informational Message"));
+        numericMetric.AddDiagnostics(EvaluationDiagnostic.Informational("Informational Message"));
 
         StringMetric stringMetric = new StringMetric("string", value: "Good");
 
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ScenarioRunResultTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ScenarioRunResultTests.cs
index d31e966f096..e5aa47e9fc8 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ScenarioRunResultTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Tests/ScenarioRunResultTests.cs
@@ -18,17 +18,17 @@ public class ScenarioRunResultTests
     public void SerializeScenarioRunResult()
     {
         var booleanMetric = new BooleanMetric("boolean", value: true);
-        booleanMetric.AddDiagnostic(EvaluationDiagnostic.Error("error"));
-        booleanMetric.AddDiagnostic(EvaluationDiagnostic.Warning("warning"));
+        booleanMetric.AddDiagnostics(EvaluationDiagnostic.Error("error"));
+        booleanMetric.AddDiagnostics(EvaluationDiagnostic.Warning("warning"));
 
         var numericMetric = new NumericMetric("numeric", value: 3);
-        numericMetric.AddDiagnostic(EvaluationDiagnostic.Informational("info"));
+        numericMetric.AddDiagnostics(EvaluationDiagnostic.Informational("info"));
 
         var stringMetric = new StringMetric("string", value: "A");
 
         var metricWithNoValue = new EvaluationMetric("none");
-        metricWithNoValue.AddDiagnostic(EvaluationDiagnostic.Error("error"));
-        metricWithNoValue.AddDiagnostic(EvaluationDiagnostic.Informational("info"));
+        metricWithNoValue.AddDiagnostics(EvaluationDiagnostic.Error("error"));
+        metricWithNoValue.AddDiagnostics(EvaluationDiagnostic.Informational("info"));
 
         var turn1 =
             new ChatTurnDetails(
@@ -82,17 +82,17 @@ public void SerializeScenarioRunResult()
     public void SerializeDatasetCompact()
     {
         var booleanMetric = new BooleanMetric("boolean", value: true);
-        booleanMetric.AddDiagnostic(EvaluationDiagnostic.Error("error"));
-        booleanMetric.AddDiagnostic(EvaluationDiagnostic.Warning("warning"));
+        booleanMetric.AddDiagnostics(EvaluationDiagnostic.Error("error"));
+        booleanMetric.AddDiagnostics(EvaluationDiagnostic.Warning("warning"));
 
         var numericMetric = new NumericMetric("numeric", value: 3);
-        numericMetric.AddDiagnostic(EvaluationDiagnostic.Informational("info"));
+        numericMetric.AddDiagnostics(EvaluationDiagnostic.Informational("info"));
 
         var stringMetric = new StringMetric("string", value: "A");
 
         var metricWithNoValue = new EvaluationMetric("none");
-        metricWithNoValue.AddDiagnostic(EvaluationDiagnostic.Error("error"));
-        metricWithNoValue.AddDiagnostic(EvaluationDiagnostic.Informational("info"));
+        metricWithNoValue.AddDiagnostics(EvaluationDiagnostic.Error("error"));
+        metricWithNoValue.AddDiagnostics(EvaluationDiagnostic.Informational("info"));
 
         var turn1 =
             new ChatTurnDetails(

From a9d3710fc561f5d7e9fcf29345083a78ad3ccbb6 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 16 Apr 2025 18:42:44 -0700
Subject: [PATCH 3/5] Introduce Context property on EvaluationMetric

---
 .../TypeScript/components/EvalTypes.d.ts      |  3 +
 .../EvaluationMetric.cs                       | 14 ++++
 .../EvaluationMetricExtensions.cs             | 29 ++++++-
 .../EvaluationResultExtensions.cs             | 76 +++++++++++++++++++
 4 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
index c9accb7a90d..4de977ddc4b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
@@ -94,6 +94,9 @@ type BaseEvaluationMetric = {
     $type: string;
     name: string;
     interpretation?: EvaluationMetricInterpretation;
+    context?: {
+        [K: string]: AIContent[]
+    };
     diagnostics?: EvaluationDiagnostic[];
     metadata: { 
         [K: string]: string 
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
index 7ff604347ba..f9256ca125c 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
@@ -44,6 +44,20 @@ public class EvaluationMetric(string name, string? reason = null)
     public EvaluationMetricInterpretation? Interpretation { get; set; }
 
 #pragma warning disable CA2227
+    /// <summary>
+    /// Gets or sets any contextual information that was considered by the <see cref="IEvaluator"/> as part of the
+    /// evaluation that produced the current <see cref="EvaluationMetric"/>.
+    /// </summary>
+    /// <remarks>
+    /// Each entry in the returned dictionary has a name (key), and a collection of <see cref="AIContent"/> objects
+    /// (value). An <see cref="IEvaluator"/> can use this dictionary to record one or more
+    /// <see cref="EvaluationContext"/>s that it considred as part of the evaluation that produced this
+    /// <see cref="EvaluationMetric"/>. For example, it can do so by including an entry with a name for the considered
+    /// <see cref="EvaluationContext"/> as the key, and the <see cref="AIContent"/> objects returned from
+    /// <see cref="EvaluationContext.GetContents"/> as the value.
+    /// </remarks>
+    public IDictionary<string, IList<AIContent>>? Context { get; set; }
+
     // CA2227: Collection properties should be read only.
     // We disable this warning because we want this type to be fully mutable for serialization purposes and for general
     // convenience.
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
index e4980d39f9f..f5b6bb4c33b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetricExtensions.cs
@@ -13,6 +13,31 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </summary>
 public static class EvaluationMetricExtensions
 {
+    /// <summary>
+    /// Adds or updates contextual information with the specified <paramref name="name"/> and <paramref name="value"/>
+    /// in the supplied <paramref name="metric"/>'s <see cref="EvaluationMetric.Context"/> collection.
+    /// </summary>
+    /// <param name="metric">The <see cref="EvaluationMetric"/>.</param>
+    /// <param name="name">The name for the contextual information to be added or updated.</param>
+    /// <param name="value">The contextual information to be added or updated.</param>
+    public static void AddOrUpdateContext(this EvaluationMetric metric, string name, params AIContent[] value)
+        => metric.AddOrUpdateContext(name, value as IEnumerable<AIContent>);
+
+    /// <summary>
+    /// Adds or updates contextual information with the specified <paramref name="name"/> and <paramref name="value"/>
+    /// in the supplied <paramref name="metric"/>'s <see cref="EvaluationMetric.Context"/> collection.
+    /// </summary>
+    /// <param name="metric">The <see cref="EvaluationMetric"/>.</param>
+    /// <param name="name">The name for the contextual information to be added or updated.</param>
+    /// <param name="value">The contextual information to be added or updated.</param>
+    public static void AddOrUpdateContext(this EvaluationMetric metric, string name, IEnumerable<AIContent> value)
+    {
+        _ = Throw.IfNull(metric);
+
+        metric.Context ??= new Dictionary<string, IList<AIContent>>();
+        metric.Context[name] = [.. value];
+    }
+
     /// <summary>
     /// Determines if the supplied <paramref name="metric"/> contains any
     /// <see cref="EvaluationDiagnostic"/> matching the supplied <paramref name="predicate"/>.
@@ -73,7 +98,7 @@ public static void AddDiagnostics(this EvaluationMetric metric, params Evaluatio
 
     /// <summary>
     /// Adds or updates metadata with the specified <paramref name="name"/> and <paramref name="value"/> in the
-    /// supplied <see cref="EvaluationMetric"/>'s <see cref="EvaluationMetric.Metadata"/> collection.
+    /// supplied <paramref name="metric"/>'s <see cref="EvaluationMetric.Metadata"/> collection.
     /// </summary>
     /// <param name="metric">The <see cref="EvaluationMetric"/>.</param>
     /// <param name="name">The name of the metadata.</param>
@@ -87,7 +112,7 @@ public static void AddOrUpdateMetadata(this EvaluationMetric metric, string name
     }
 
     /// <summary>
-    /// Adds or updates the supplied <paramref name="metadata"/> to the supplied <see cref="EvaluationMetric"/>'s
+    /// Adds or updates the supplied <paramref name="metadata"/> in the supplied <paramref name="metric"/>'s
     /// <see cref="EvaluationMetric.Metadata"/> collection.
     /// </summary>
     /// <param name="metric">The <see cref="EvaluationMetric"/>.</param>
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
index bbe0f1bc0a2..2e737434ea6 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationResultExtensions.cs
@@ -13,6 +13,43 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </summary>
 public static class EvaluationResultExtensions
 {
+    /// <summary>
+    /// Adds or updates contextual information with the specified <paramref name="name"/> and <paramref name="value"/>
+    /// in all <see cref="EvaluationMetric"/>s contained in the supplied <paramref name="result"/>.
+    /// </summary>
+    /// <param name="result">
+    /// The <see cref="EvaluationResult"/> containing the <see cref="EvaluationMetric"/>s that are to be altered.
+    /// </param>
+    /// <param name="name">The name for the contextual information to be added or updated.</param>
+    /// <param name="value">The contextual information to be added or updated.</param>
+    public static void AddOrUpdateContextInAllMetrics(
+        this EvaluationResult result,
+        string name,
+        params AIContent[] value)
+            => result.AddOrUpdateContextInAllMetrics(name, value as IEnumerable<AIContent>);
+
+    /// <summary>
+    /// Adds or updates contextual information with the specified <paramref name="name"/> and <paramref name="value"/>
+    /// in all <see cref="EvaluationMetric"/>s contained in the supplied <paramref name="result"/>.
+    /// </summary>
+    /// <param name="result">
+    /// The <see cref="EvaluationResult"/> containing the <see cref="EvaluationMetric"/>s that are to be altered.
+    /// </param>
+    /// <param name="name">The name for the contextual information to be added or updated.</param>
+    /// <param name="value">The contextual information to be added or updated.</param>
+    public static void AddOrUpdateContextInAllMetrics(
+        this EvaluationResult result,
+        string name,
+        IEnumerable<AIContent> value)
+    {
+        _ = Throw.IfNull(result);
+
+        foreach (EvaluationMetric metric in result.Metrics.Values)
+        {
+            metric.AddOrUpdateContext(name, value);
+        }
+    }
+
     /// <summary>
     /// Adds the supplied <paramref name="diagnostics"/> to all <see cref="EvaluationMetric"/>s contained in the
     /// supplied <paramref name="result"/>.
@@ -92,4 +129,43 @@ public static void Interpret(
             }
         }
     }
+
+    /// <summary>
+    /// Adds or updates metadata with the specified <paramref name="name"/> and <paramref name="value"/> in all
+    /// <see cref="EvaluationMetric"/>s contained in the supplied <paramref name="result"/>.
+    /// </summary>
+    /// <param name="result">
+    /// The <see cref="EvaluationResult"/> containing the <see cref="EvaluationMetric"/>s that are to be altered.
+    /// </param>
+    /// <param name="name">The name of the metadata.</param>
+    /// <param name="value">The value of the metadata.</param>
+    public static void AddOrUpdateMetadataInAllMetrics(this EvaluationResult result, string name, string value)
+    {
+        _ = Throw.IfNull(result);
+
+        foreach (EvaluationMetric metric in result.Metrics.Values)
+        {
+            metric.AddOrUpdateMetadata(name, value);
+        }
+    }
+
+    /// <summary>
+    /// Adds or updates the supplied <paramref name="metadata"/> in all <see cref="EvaluationMetric"/>s contained in
+    /// the supplied <paramref name="result"/>.
+    /// </summary>
+    /// <param name="result">
+    /// The <see cref="EvaluationResult"/> containing the <see cref="EvaluationMetric"/>s that are to be altered.
+    /// </param>
+    /// <param name="metadata">The metadata to be added or updated.</param>
+    public static void AddOrUpdateMetadataInAllMetrics(
+        this EvaluationResult result,
+        IDictionary<string, string> metadata)
+    {
+        _ = Throw.IfNull(result);
+
+        foreach (EvaluationMetric metric in result.Metrics.Values)
+        {
+            metric.AddOrUpdateMetadata(metadata);
+        }
+    }
 }

From 223e08754f69682d6c2eb2110236e41dbfb13ddb Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 16 Apr 2025 20:02:11 -0700
Subject: [PATCH 4/5] Update Quality and Safety evaluators that use context to
 record the context used in the metrics they produce

---
 .../ChatConversationEvaluator.cs              |  3 +-
 .../EquivalenceEvaluator.cs                   | 48 ++++++++++++++-----
 .../GroundednessEvaluator.cs                  | 38 ++++++++++++++-
 .../GroundednessProEvaluator.cs               | 20 +++++---
 .../UngroundedAttributesEvaluator.cs          | 20 +++++---
 5 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
index 6550454806f..63e17ebe1ac 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs
@@ -1,7 +1,6 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
@@ -35,7 +34,7 @@ public abstract class ChatConversationEvaluator : IEvaluator
     protected virtual string? SystemPrompt => null;
 
     /// <inheritdoc/>
-    public async ValueTask<EvaluationResult> EvaluateAsync(
+    public virtual async ValueTask<EvaluationResult> EvaluateAsync(
         IEnumerable<ChatMessage> messages,
         ChatResponse modelResponse,
         ChatConfiguration? chatConfiguration = null,
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluator.cs
index 15b7f8b3f31..4a15f3640ee 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluator.cs
@@ -49,6 +49,28 @@ public sealed class EquivalenceEvaluator : SingleNumericMetricEvaluator
     /// <inheritdoc/>
     protected override bool IgnoresHistory => true;
 
+    /// <inheritdoc/>
+    public override async ValueTask<EvaluationResult> EvaluateAsync(
+        IEnumerable<ChatMessage> messages,
+        ChatResponse modelResponse,
+        ChatConfiguration? chatConfiguration = null,
+        IEnumerable<EvaluationContext>? additionalContext = null,
+        CancellationToken cancellationToken = default)
+    {
+        EvaluationResult result =
+            await base.EvaluateAsync(
+                messages,
+                modelResponse,
+                chatConfiguration,
+                additionalContext,
+                cancellationToken).ConfigureAwait(false);
+
+        EquivalenceEvaluatorContext context = GetRelevantContext(additionalContext);
+        result.AddOrUpdateContextInAllMetrics("Ground Truth", context.GetContents());
+
+        return result;
+    }
+
     /// <inheritdoc/>
     protected override async ValueTask<string> RenderEvaluationPromptAsync(
         ChatMessage? userRequest,
@@ -66,18 +88,8 @@ userRequest is not null
                 ? await RenderAsync(userRequest, cancellationToken).ConfigureAwait(false)
                 : string.Empty;
 
-        string groundTruth;
-
-        if (additionalContext?.OfType<EquivalenceEvaluatorContext>().FirstOrDefault()
-                is EquivalenceEvaluatorContext context)
-        {
-            groundTruth = context.GroundTruth;
-        }
-        else
-        {
-            throw new InvalidOperationException(
-                $"A value of type '{nameof(EquivalenceEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
-        }
+        EquivalenceEvaluatorContext context = GetRelevantContext(additionalContext);
+        string groundTruth = context.GroundTruth;
 
         string prompt =
             $$"""
@@ -149,4 +161,16 @@ alleviating stress and augmenting general mood.
 
         return prompt;
     }
+
+    private static EquivalenceEvaluatorContext GetRelevantContext(IEnumerable<EvaluationContext>? additionalContext)
+    {
+        if (additionalContext?.OfType<EquivalenceEvaluatorContext>().FirstOrDefault()
+                is EquivalenceEvaluatorContext context)
+        {
+            return context;
+        }
+
+        throw new InvalidOperationException(
+            $"A value of type '{nameof(EquivalenceEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
+    }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluator.cs
index bd09774150e..c8881864d40 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluator.cs
@@ -49,6 +49,30 @@ public sealed class GroundednessEvaluator : SingleNumericMetricEvaluator
     /// <inheritdoc/>
     protected override bool IgnoresHistory => false;
 
+    /// <inheritdoc/>
+    public override async ValueTask<EvaluationResult> EvaluateAsync(
+        IEnumerable<ChatMessage> messages,
+        ChatResponse modelResponse,
+        ChatConfiguration? chatConfiguration = null,
+        IEnumerable<EvaluationContext>? additionalContext = null,
+        CancellationToken cancellationToken = default)
+    {
+        EvaluationResult result =
+            await base.EvaluateAsync(
+                messages,
+                modelResponse,
+                chatConfiguration,
+                additionalContext,
+                cancellationToken).ConfigureAwait(false);
+
+        if (GetRelevantContext(additionalContext) is GroundednessEvaluatorContext context)
+        {
+            result.AddOrUpdateContextInAllMetrics("Grounding Context", context.GetContents());
+        }
+
+        return result;
+    }
+
     /// <inheritdoc/>
     protected override async ValueTask<string> RenderEvaluationPromptAsync(
         ChatMessage? userRequest,
@@ -68,8 +92,7 @@ userRequest is not null
 
         var builder = new StringBuilder();
 
-        if (additionalContext?.OfType<GroundednessEvaluatorContext>().FirstOrDefault()
-                is GroundednessEvaluatorContext context)
+        if (GetRelevantContext(additionalContext) is GroundednessEvaluatorContext context)
         {
             _ = builder.Append(context.GroundingContext);
             _ = builder.AppendLine();
@@ -162,4 +185,15 @@ is not French.
 
         return prompt;
     }
+
+    private static GroundednessEvaluatorContext? GetRelevantContext(IEnumerable<EvaluationContext>? additionalContext)
+    {
+        if (additionalContext?.OfType<GroundednessEvaluatorContext>().FirstOrDefault()
+                is GroundednessEvaluatorContext context)
+        {
+            return context;
+        }
+
+        return null;
+    }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/GroundednessProEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/GroundednessProEvaluator.cs
index 6af681d751f..c857e263763 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/GroundednessProEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/GroundednessProEvaluator.cs
@@ -63,22 +63,30 @@ await EvaluateContentSafetyAsync(
                 contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.QuestionAnswer.ToString(),
                 cancellationToken: cancellationToken).ConfigureAwait(false);
 
+        GroundednessProEvaluatorContext context = GetRelevantContext(additionalContext);
+        result.AddOrUpdateContextInAllMetrics("Grounding Context", context.GetContents());
+
         return result;
     }
 
     /// <inheritdoc/>
     protected override IReadOnlyList<EvaluationContext>? FilterAdditionalContext(
         IEnumerable<EvaluationContext>? additionalContext)
+    {
+        GroundednessProEvaluatorContext context = GetRelevantContext(additionalContext);
+        return [context];
+    }
+
+    private static GroundednessProEvaluatorContext GetRelevantContext(
+        IEnumerable<EvaluationContext>? additionalContext)
     {
         if (additionalContext?.OfType<GroundednessProEvaluatorContext>().FirstOrDefault()
                 is GroundednessProEvaluatorContext context)
         {
-            return [context];
-        }
-        else
-        {
-            throw new InvalidOperationException(
-                $"A value of type '{nameof(GroundednessProEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
+            return context;
         }
+
+        throw new InvalidOperationException(
+            $"A value of type '{nameof(GroundednessProEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
     }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/UngroundedAttributesEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/UngroundedAttributesEvaluator.cs
index 79a5deb4888..656d5280c7c 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/UngroundedAttributesEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/UngroundedAttributesEvaluator.cs
@@ -67,22 +67,30 @@ await EvaluateContentSafetyAsync(
                 contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.QueryResponse.ToString(),
                 cancellationToken: cancellationToken).ConfigureAwait(false);
 
+        UngroundedAttributesEvaluatorContext context = GetRelevantContext(additionalContext);
+        result.AddOrUpdateContextInAllMetrics("Grounding Context", context.GetContents());
+
         return result;
     }
 
     /// <inheritdoc/>
     protected override IReadOnlyList<EvaluationContext>? FilterAdditionalContext(
         IEnumerable<EvaluationContext>? additionalContext)
+    {
+        UngroundedAttributesEvaluatorContext context = GetRelevantContext(additionalContext);
+        return [context];
+    }
+
+    private static UngroundedAttributesEvaluatorContext GetRelevantContext(
+        IEnumerable<EvaluationContext>? additionalContext)
     {
         if (additionalContext?.OfType<UngroundedAttributesEvaluatorContext>().FirstOrDefault()
                 is UngroundedAttributesEvaluatorContext context)
         {
-            return [context];
-        }
-        else
-        {
-            throw new InvalidOperationException(
-                $"A value of type '{nameof(UngroundedAttributesEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
+            return context;
         }
+
+        throw new InvalidOperationException(
+            $"A value of type '{nameof(UngroundedAttributesEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection.");
     }
 }

From 38f4bedf1e5a6d7137d12c573e991d8c09e8a415 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 16 Apr 2025 21:20:15 -0700
Subject: [PATCH 5/5] Update report to display context bubbles under the
 conversation details for selected metrics

---
 .../components/ConversationDetails.tsx        | 39 ++++++++++++++++++-
 .../TypeScript/components/ScoreDetail.tsx     |  2 +-
 .../TypeScript/components/Styles.ts           |  8 ++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ConversationDetails.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ConversationDetails.tsx
index 325f66ee7e2..eddc15b598f 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ConversationDetails.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ConversationDetails.tsx
@@ -8,11 +8,13 @@ import ReactMarkdown from "react-markdown";
 import { useReportContext } from "./ReportContext";
 import { useStyles } from "./Styles";
 import { ChatMessageDisplay, isTextContent, isImageContent } from "./Summary";
+import type { MetricType } from "./MetricCard";
 
-export const ConversationDetails = ({ messages, model, usage }: {
+export const ConversationDetails = ({ messages, model, usage, selectedMetric }: {
     messages: ChatMessageDisplay[];
     model?: string;
     usage?: UsageDetails;
+    selectedMetric?: MetricType | null;
 }) => {
     const classes = useStyles();
     const [isExpanded, setIsExpanded] = useState(true);
@@ -59,7 +61,27 @@ export const ConversationDetails = ({ messages, model, usage }: {
         return result;
     };
 
+    const getContextGroups = () => {
+        if (!selectedMetric || !selectedMetric.context) {
+            return [];
+        }
+
+        const contextGroups: { key: string, contents: AIContent[] }[] = [];
+        
+        for (const [key, contents] of Object.entries(selectedMetric.context)) {
+            if (contents && contents.length > 0) {
+                contextGroups.push({
+                    key: key.toLowerCase(),
+                    contents: contents
+                });
+            }
+        }
+
+        return contextGroups;
+    };
+
     const messageGroups = groupMessages();
+    const contextGroups = getContextGroups();
 
     return (
         <div className={classes.section}>
@@ -79,7 +101,7 @@ export const ConversationDetails = ({ messages, model, usage }: {
                         );
 
                         return (
-                            <div key={index} className={messageRowClass}>
+                            <div key={`msg-${index}`} className={messageRowClass}>
                                 <div className={classes.messageParticipantName}>{group.participantName}</div>
                                 <div className={classes.messageBubble}>
                                     {group.contents.map((content, contentIndex) => (
@@ -91,6 +113,19 @@ export const ConversationDetails = ({ messages, model, usage }: {
                             </div>
                         );
                     })}
+                    
+                    {contextGroups.map((group, index) => (
+                        <div key={`context-${index}`} className={mergeClasses(classes.messageRow, classes.userMessageRow)}>
+                            <div className={classes.messageParticipantName}>{`supplied evaluation context (${group.key})`}</div>
+                            <div className={classes.contextBubble}>
+                                {group.contents.map((content, contentIndex) => (
+                                    <div key={contentIndex}>
+                                        {renderContent(content)}
+                                    </div>
+                                ))}
+                            </div>
+                        </div>
+                    ))}
                 </div>
             )}
         </div>
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScoreDetail.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScoreDetail.tsx
index 1147cce6669..7d614abea33 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScoreDetail.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScoreDetail.tsx
@@ -32,7 +32,7 @@ export const ScoreDetail = ({ scenario, scoreSummary }: { scenario: ScenarioRunR
             onMetricSelect={setSelectedMetric}
             selectedMetric={selectedMetric} />
         {selectedMetric && <MetricDetailsSection metric={selectedMetric} />}
-        <ConversationDetails messages={messages} model={model} usage={usage} />
+        <ConversationDetails messages={messages} model={model} usage={usage} selectedMetric={selectedMetric} />
         {scenario.chatDetails && scenario.chatDetails.turnDetails.length > 0 && <ChatDetailsSection chatDetails={scenario.chatDetails} />}
     </div>);
 };
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Styles.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Styles.ts
index 7ddc980a372..89867593479 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Styles.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Styles.ts
@@ -127,6 +127,14 @@ export const useStyles = makeStyles({
         backgroundColor: tokens.colorNeutralBackground3,
         border: '1px solid ' + tokens.colorNeutralStroke2,
     },
+    contextBubble: {
+        padding: '0.75rem 1rem',
+        borderRadius: '12px',
+        overflow: 'hidden',
+        wordBreak: 'break-word',
+        backgroundColor: tokens.colorBrandBackground2,
+        border: '1px solid ' + tokens.colorNeutralStroke2,
+    },
     cacheHitIcon: {
         color: tokens.colorPaletteGreenForeground1,
     },