From 9bb7e1831b0cb11717a7f6dc67a9ba5fbe46d163 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 27 Feb 2023 13:29:19 +0100
Subject: [PATCH 1/3] add docstring for dataset wrapper (#7333)

Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
---
 docs/source/datasets.rst                   |  9 +++
 torchvision/datapoints/_dataset_wrapper.py | 65 +++++++++++++++++++++-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
index 68c72e7af8c..35e5eaf2a9f 100644
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -169,3 +169,12 @@ Base classes for custom datasets
     DatasetFolder
     ImageFolder
     VisionDataset
+
+Transforms v2
+-------------
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    wrap_dataset_for_transforms_v2
diff --git a/torchvision/datapoints/_dataset_wrapper.py b/torchvision/datapoints/_dataset_wrapper.py
index e358c83d9d1..87ce3ba93a1 100644
--- a/torchvision/datapoints/_dataset_wrapper.py
+++ b/torchvision/datapoints/_dataset_wrapper.py
@@ -14,8 +14,71 @@
 __all__ = ["wrap_dataset_for_transforms_v2"]
 
 
-# TODO: naming!
 def wrap_dataset_for_transforms_v2(dataset):
+    """[BETA] Wrap a ``torchvision.dataset`` for usage with :mod:`torchvision.transforms.v2`.
+
+    .. v2betastatus:: wrap_dataset_for_transforms_v2 function
+
+    Example:
+        >>> dataset = torchvision.datasets.CocoDetection(...)
+        >>> dataset = wrap_dataset_for_transforms_v2(dataset)
+
+    .. note::
+
+       For now, only the most popular datasets are supported. Furthermore, the wrapper only supports dataset
+       configurations that are fully supported by ``torchvision.transforms.v2``. If you encounter an error prompting you
+       to raise an issue to ``torchvision`` for a dataset or configuration that you need, please do so.
+
+    The dataset samples are wrapped according to the description below.
+
+    Special cases:
+
+        * :class:`~torchvision.datasets.CocoDetection`: Instead of returning the target as list of dicts, the wrapper
+          returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` (in ``XYXY`` coordinate format),
+          ``"masks"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.datapoints``.
+          The original keys are preserved.
+        * :class:`~torchvision.datasets.VOCDetection`: The key-value-pairs ``"boxes"`` and ``"labels"`` are added to
+          the target and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are
+          preserved.
+        * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY``
+          coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBox` datapoint.
+        * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dictsthe wrapper returns a dict
+          of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data
+          in the corresponding ``torchvision.datapoints``. The original keys are preserved.
+        * :class:`~torchvision.datasets.OxfordIIITPet`: The target for ``target_type="segmentation"`` is wrapped into a
+          :class:`~torchvision.datapoints.Mask` datapoint.
+        * :class:`~torchvision.datasets.Cityscapes`: The target for ``target_type="semantic"`` is wrapped into a
+          :class:`~torchvision.datapoints.Mask` datapoint. The target for ``target_type="instance"`` is *replaced* by
+          a dictionary with the key-value-pairs ``"masks"`` (as :class:`~torchvision.datapoints.Mask` datapoint) and
+          ``"labels"``.
+        * :class:`~torchvision.datasets.WIDERFace`: The value for key ``"bbox"`` in the target is converted to ``XYXY``
+          coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBox` datapoint.
+
+    Image classification datasets
+
+        This wrapper is a no-op for image classification datasets, since they were already fully supported by
+        :mod:`torchvision.transforms` and thus no change is needed for :mod:`torchvision.transforms.v2`.
+
+    Segmentation datasets
+
+        Segmentation datasets, e.g. :class:`~torchvision.datasets.VOCSegmentation` return a two-tuple of
+        :class:`PIL.Image.Image`'s. This wrapper leaves the image as is (first item), while wrapping the
+        segmentation mask into a :class:`~torchvision.datapoints.Mask` (second item).
+
+    Video classification datasets
+
+        Video classification datasets, e.g. :class:`~torchvision.datasets.Kinetics` return a three-tuple containing a
+        :class:`torch.Tensor` for the video and audio and a :class:`int` as label. This wrapper wraps the video into a
+        :class:`~torchvision.datapoints.Video` while leaving the other items as is.
+
+        .. note::
+
+            Only datasets constructed with ``output_format="TCHW"`` are supported, since the alternative
+            ``output_format="THWC"`` is not supported by :mod:`torchvision.transforms.v2`.
+
+    Args:
+        dataset: the dataset instance to wrap for compatibility with transforms v2.
+    """
     return VisionDatasetDatapointWrapper(dataset)
 
 

From 5e78488bb37f349c18beca51ac101c78b75b5caf Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 1 Mar 2023 14:25:11 +0000
Subject: [PATCH 2/3] Revert "[Cherry-pick for 0.15] Fix flaky test + add smoke
 test (#7367)"

This reverts commit 6eb3efd879fd0e9d98f89609b9e4fc3cbe325350.
---
 test/common_utils.py                  | 2 +-
 test/smoke_test.py                    | 2 --
 test/test_transforms_v2_functional.py | 8 ++++----
 test/transforms_v2_kernel_infos.py    | 4 ++--
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index b321df9f3a5..2f74f3686c3 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -351,7 +351,7 @@ def assert_close(
 
 def parametrized_error_message(*args, **kwargs):
     def to_str(obj):
-        if isinstance(obj, torch.Tensor) and obj.numel() > 30:
+        if isinstance(obj, torch.Tensor) and obj.numel() > 10:
             return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
         elif isinstance(obj, enum.Enum):
             return f"{type(obj).__name__}.{obj.name}"
diff --git a/test/smoke_test.py b/test/smoke_test.py
index e8ee178d95e..728c9440fd3 100644
--- a/test/smoke_test.py
+++ b/test/smoke_test.py
@@ -59,8 +59,6 @@ def main() -> None:
     smoke_test_torchvision_resnet50_classify()
     if torch.cuda.is_available():
         smoke_test_torchvision_resnet50_classify("cuda")
-    if torch.backends.mps.is_available():
-        smoke_test_torchvision_resnet50_classify("mps")
 
 
 if __name__ == "__main__":
diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
index ee9576b6487..e648b35d441 100644
--- a/test/test_transforms_v2_functional.py
+++ b/test/test_transforms_v2_functional.py
@@ -146,7 +146,7 @@ def test_scripted_vs_eager(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device),
-            msg=parametrized_error_message(input, other_args, **kwargs),
+            msg=parametrized_error_message(*([actual, expected] + other_args), **kwargs),
         )
 
     def _unbatch(self, batch, *, data_dims):
@@ -204,7 +204,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=batched_input.dtype, device=batched_input.device),
-            msg=parametrized_error_message(batched_input, *other_args, **kwargs),
+            msg=parametrized_error_message(*other_args, **kwargs),
         )
 
     @sample_inputs
@@ -236,7 +236,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs):
             output_cpu,
             check_device=False,
             **info.get_closeness_kwargs(test_id, dtype=input_cuda.dtype, device=input_cuda.device),
-            msg=parametrized_error_message(input_cpu, *other_args, **kwargs),
+            msg=parametrized_error_message(*other_args, **kwargs),
         )
 
     @sample_inputs
@@ -294,7 +294,7 @@ def test_float32_vs_uint8(self, test_id, info, args_kwargs):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=torch.float32, device=input.device),
-            msg=parametrized_error_message(input, *other_args, **kwargs),
+            msg=parametrized_error_message(*other_args, **kwargs),
         )
 
 
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
index 6fea2513712..3c3611cb8cc 100644
--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -860,8 +860,8 @@ def sample_inputs_rotate_video():
             reference_fn=reference_rotate_bounding_box,
             reference_inputs_fn=reference_inputs_rotate_bounding_box,
             closeness_kwargs={
-                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-4, rtol=1e-4),
-                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-4, rtol=1e-4),
+                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-6, rtol=1e-6),
+                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-5, rtol=1e-5),
             },
         ),
         KernelInfo(

From 73fb3a8ad2fe6eff5cca8ad081b5fe20c96389fc Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 1 Mar 2023 15:02:53 +0100
Subject: [PATCH 3/3] fix flaky test for rotate_bounding_box (#7362)

Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
---
 test/common_utils.py                  | 2 +-
 test/test_transforms_v2_functional.py | 8 ++++----
 test/transforms_v2_kernel_infos.py    | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/common_utils.py b/test/common_utils.py
index 2f74f3686c3..b321df9f3a5 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -351,7 +351,7 @@ def assert_close(
 
 def parametrized_error_message(*args, **kwargs):
     def to_str(obj):
-        if isinstance(obj, torch.Tensor) and obj.numel() > 10:
+        if isinstance(obj, torch.Tensor) and obj.numel() > 30:
             return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
         elif isinstance(obj, enum.Enum):
             return f"{type(obj).__name__}.{obj.name}"
diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
index e648b35d441..ee9576b6487 100644
--- a/test/test_transforms_v2_functional.py
+++ b/test/test_transforms_v2_functional.py
@@ -146,7 +146,7 @@ def test_scripted_vs_eager(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device),
-            msg=parametrized_error_message(*([actual, expected] + other_args), **kwargs),
+            msg=parametrized_error_message(input, other_args, **kwargs),
         )
 
     def _unbatch(self, batch, *, data_dims):
@@ -204,7 +204,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=batched_input.dtype, device=batched_input.device),
-            msg=parametrized_error_message(*other_args, **kwargs),
+            msg=parametrized_error_message(batched_input, *other_args, **kwargs),
         )
 
     @sample_inputs
@@ -236,7 +236,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs):
             output_cpu,
             check_device=False,
             **info.get_closeness_kwargs(test_id, dtype=input_cuda.dtype, device=input_cuda.device),
-            msg=parametrized_error_message(*other_args, **kwargs),
+            msg=parametrized_error_message(input_cpu, *other_args, **kwargs),
         )
 
     @sample_inputs
@@ -294,7 +294,7 @@ def test_float32_vs_uint8(self, test_id, info, args_kwargs):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=torch.float32, device=input.device),
-            msg=parametrized_error_message(*other_args, **kwargs),
+            msg=parametrized_error_message(input, *other_args, **kwargs),
         )
 
 
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
index 3c3611cb8cc..6fea2513712 100644
--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -860,8 +860,8 @@ def sample_inputs_rotate_video():
             reference_fn=reference_rotate_bounding_box,
             reference_inputs_fn=reference_inputs_rotate_bounding_box,
             closeness_kwargs={
-                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-6, rtol=1e-6),
-                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-5, rtol=1e-5),
+                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-4, rtol=1e-4),
+                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-4, rtol=1e-4),
             },
         ),
         KernelInfo(