Enable Serving Calibration metric to visualize segment calibration of serving traffic.

jiayixu64 · facebook-github-bot · commit 7777565a62cb · 2024-07-08T12:10:31.000-07:00
Summary: * Pull Request resolved: #2201 * Problem: * Calibration metrics will show losses of full data after data consolidation, where the Calibration of each serving traffic will not be visible. * Solution: * Enable segment Calibration visualization to plot the Calibration across examples for each serving task separately, instead of on total volume of the consolidated data. * Enable on APS, follow the implementation for PyPer D49698301. * Usage: Add `SERVING_CALIBRATION` and task indices in `rec_metrics` of the model config. {F1741788763} Differential Revision: D59296724 fbshipit-source-id: a92c12af915c728cb49d6bbcf7925359e255d9a6
diff --git a/torchrec/metrics/metric_module.py b/torchrec/metrics/metric_module.py
@@ -48,6 +48,7 @@
 from torchrec.metrics.recall_session import RecallSessionMetric
 from torchrec.metrics.scalar import ScalarMetric
 from torchrec.metrics.segmented_ne import SegmentedNEMetric
+from torchrec.metrics.serving_calibration import ServingCalibrationMetric
 from torchrec.metrics.serving_ne import ServingNEMetric
 from torchrec.metrics.throughput import ThroughputMetric
 from torchrec.metrics.tower_qps import TowerQPSMetric
@@ -78,6 +79,7 @@
     RecMetricEnum.PRECISION: PrecisionMetric,
     RecMetricEnum.RECALL: RecallMetric,
     RecMetricEnum.SERVING_NE: ServingNEMetric,
+    RecMetricEnum.SERVING_CALIBRATION: ServingCalibrationMetric,
 }
 
 
diff --git a/torchrec/metrics/metrics_config.py b/torchrec/metrics/metrics_config.py
@@ -40,6 +40,7 @@ class RecMetricEnum(RecMetricEnumBase):
     PRECISION = "precision"
     RECALL = "recall"
     SERVING_NE = "serving_ne"
+    SERVING_CALIBRATION = "serving_calibration"
 
 
 @dataclass(unsafe_hash=True, eq=True)
diff --git a/torchrec/metrics/metrics_namespace.py b/torchrec/metrics/metrics_namespace.py
@@ -70,6 +70,7 @@ class MetricName(MetricNameBase):
     RECALL = "recall"
 
     SERVING_NE = "serving_ne"
+    SERVING_CALIBRATION = "serving_calibration"
 
 
 class MetricNamespaceBase(StrValueMixin, Enum):
@@ -109,6 +110,7 @@ class MetricNamespace(MetricNamespaceBase):
     RECALL = "recall"
 
     SERVING_NE = "serving_ne"
+    SERVING_CALIBRATION = "serving_calibration"
 
 
 class MetricPrefix(StrValueMixin, Enum):
diff --git a/torchrec/metrics/serving_calibration.py b/torchrec/metrics/serving_calibration.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, cast, Dict, List, Optional, Type
+
+import torch
+from torchrec.metrics.calibration import compute_calibration, get_calibration_states
+from torchrec.metrics.metrics_namespace import MetricName, MetricNamespace, MetricPrefix
+from torchrec.metrics.rec_metric import (
+    MetricComputationReport,
+    RecMetric,
+    RecMetricComputation,
+    RecMetricException,
+)
+
+CALIBRATION_NUM = "calibration_num"
+CALIBRATION_DENOM = "calibration_denom"
+NUM_EXAMPLES = "num_examples"
+
+
+class ServingCalibrationMetricComputation(RecMetricComputation):
+    r"""
+    This class implements the RecMetricComputation for Calibration, which is the
+    ratio between the prediction and the labels (conversions).
+
+    The constructor arguments are defined in RecMetricComputation.
+    See the docstring of RecMetricComputation for more detail.
+    """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self._add_state(
+            CALIBRATION_NUM,
+            torch.zeros(self._n_tasks, dtype=torch.double),
+            add_window_state=True,
+            dist_reduce_fx="sum",
+            persistent=True,
+        )
+        self._add_state(
+            CALIBRATION_DENOM,
+            torch.zeros(self._n_tasks, dtype=torch.double),
+            add_window_state=True,
+            dist_reduce_fx="sum",
+            persistent=True,
+        )
+        self._add_state(
+            NUM_EXAMPLES,
+            torch.zeros(self._n_tasks, dtype=torch.long),
+            add_window_state=False,
+            dist_reduce_fx="sum",
+            persistent=True,
+        )
+
+    def update(
+        self,
+        *,
+        predictions: Optional[torch.Tensor],
+        labels: torch.Tensor,
+        weights: Optional[torch.Tensor],
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        if predictions is None or weights is None:
+            raise RecMetricException(
+                "Inputs 'predictions' and 'weights' should not be None for CalibrationMetricComputation update"
+            )
+        num_samples = predictions.shape[-1]
+        for state_name, state_value in get_calibration_states(
+            labels, predictions, weights
+        ).items():
+            state = getattr(self, state_name)
+            state += state_value
+            self._aggregate_window_state(state_name, state_value, num_samples)
+
+        num_examples_delta = torch.count_nonzero(weights, dim=-1)
+        state_num_examples = getattr(self, NUM_EXAMPLES)
+        state_num_examples += num_examples_delta
+
+    def _compute(self) -> List[MetricComputationReport]:
+        return [
+            MetricComputationReport(
+                name=MetricName.CALIBRATION,
+                metric_prefix=MetricPrefix.LIFETIME,
+                value=compute_calibration(
+                    cast(torch.Tensor, self.calibration_num),
+                    cast(torch.Tensor, self.calibration_denom),
+                ),
+            ),
+            MetricComputationReport(
+                name=MetricName.CALIBRATION,
+                metric_prefix=MetricPrefix.WINDOW,
+                value=compute_calibration(
+                    self.get_window_state(CALIBRATION_NUM),
+                    self.get_window_state(CALIBRATION_DENOM),
+                ),
+            ),
+            MetricComputationReport(
+                name=MetricName.TOTAL_EXAMPLES,
+                metric_prefix=MetricPrefix.DEFAULT,
+                value=cast(torch.Tensor, self.num_examples).detach(),
+            ),
+        ]
+
+
+class ServingCalibrationMetric(RecMetric):
+    _namespace: MetricNamespace = MetricNamespace.SERVING_CALIBRATION
+    _computation_class: Type[RecMetricComputation] = ServingCalibrationMetricComputation
diff --git a/torchrec/metrics/tests/test_serving_calibration.py b/torchrec/metrics/tests/test_serving_calibration.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+# pyre-strict
+
+import unittest
+from typing import Dict, Type
+
+import torch
+from torchrec.metrics.rec_metric import RecComputeMode, RecMetric
+from torchrec.metrics.serving_calibration import ServingCalibrationMetric
+from torchrec.metrics.test_utils import (
+    metric_test_helper,
+    rec_metric_value_test_launcher,
+    TestMetric,
+)
+
+
+class TestServingCalibrationMetric(TestMetric):
+    @staticmethod
+    def _get_states(
+        labels: torch.Tensor, predictions: torch.Tensor, weights: torch.Tensor
+    ) -> Dict[str, torch.Tensor]:
+        calibration_num = torch.sum(predictions * weights)
+        calibration_denom = torch.sum(labels * weights)
+        num_samples = torch.tensor(labels.size()[0]).double()
+        return {
+            "calibration_num": calibration_num,
+            "calibration_denom": calibration_denom,
+            "num_samples": num_samples,
+        }
+
+    @staticmethod
+    def _compute(states: Dict[str, torch.Tensor]) -> torch.Tensor:
+        return torch.where(
+            states["calibration_denom"] <= 0.0,
+            0.0,
+            states["calibration_num"] / states["calibration_denom"],
+        ).double()
+
+
+WORLD_SIZE = 4
+
+
+class ServingCalibrationMetricTest(unittest.TestCase):
+    clazz: Type[RecMetric] = ServingCalibrationMetric
+    task_name: str = "calibration"
+
+    def test_unfused_calibration(self) -> None:
+        rec_metric_value_test_launcher(
+            target_clazz=ServingCalibrationMetric,
+            target_compute_mode=RecComputeMode.UNFUSED_TASKS_COMPUTATION,
+            test_clazz=TestServingCalibrationMetric,
+            metric_name=ServingCalibrationMetricTest.task_name,
+            task_names=["t1", "t2", "t3"],
+            fused_update_limit=0,
+            compute_on_all_ranks=False,
+            should_validate_update=False,
+            world_size=WORLD_SIZE,
+            entry_point=metric_test_helper,
+        )
+
+    def test_fused_calibration(self) -> None:
+        rec_metric_value_test_launcher(
+            target_clazz=ServingCalibrationMetric,
+            target_compute_mode=RecComputeMode.FUSED_TASKS_COMPUTATION,
+            test_clazz=TestServingCalibrationMetric,
+            metric_name=ServingCalibrationMetricTest.task_name,
+            task_names=["t1", "t2", "t3"],
+            fused_update_limit=0,
+            compute_on_all_ranks=False,
+            should_validate_update=False,
+            world_size=WORLD_SIZE,
+            entry_point=metric_test_helper,
+        )