pytorch · NicolasHug · Feb 24, 2023 · Feb 24, 2023 · Feb 24, 2023 · Feb 24, 2023
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -34,6 +34,7 @@
 sys.path.append(os.path.abspath("."))
 
 torchvision.disable_beta_transforms_warning()
+import torchvision.datapoints  # Don't remove, otherwise the docs for datapoints aren't linked properly
 
 # -- General configuration ------------------------------------------------
 

diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst
@@ -2,6 +2,12 @@ Datapoints
 ==========
 
 .. currentmodule:: torchvision.datapoints
+
+Datapoints are tensor subclasses which the :mod:`~torchvision.transforms.v2` v2 transforms use under the hood to
+dispatch their inputs to the appropriate lower-level kernels. Most users do not
+need to manipulate datapoints directly and can simply rely on dataset wrapping -
+see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
+
 .. autosummary::
     :toctree: generated/
     :template: class.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -31,8 +31,8 @@ architectures, and common image transformations for computer vision.
    :maxdepth: 2
    :caption: Package Reference
 
-   datapoints
    transforms
+   datapoints
    models
    datasets
    utils

diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -198,6 +198,12 @@ Miscellaneous
 Conversion
 ----------
 
+.. note::
+    Beware, some of these conversion transforms below will scale the values
+    while performing the conversion, while some may not do any scaling. By
+    scaling, we mean e.g. that a ``uint8`` -> ``float32`` would map the [0,
+    255] range into [0, 1] (and vice-versa).
+
 .. autosummary::
     :toctree: generated/
     :template: class.rst
@@ -211,8 +217,8 @@ Conversion
     v2.PILToTensor
     v2.ToImageTensor
     ConvertImageDtype
-    v2.ConvertImageDtype
     v2.ConvertDtype
+    v2.ConvertImageDtype
     v2.ToDtype
     v2.ConvertBoundingBoxFormat
 

diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -105,7 +105,9 @@ def __repr__(self) -> str:
 
 
 class ToTensor:
-    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.
+    """Convert a PIL Image or ndarray to tensor and scale the values accordingly.
+
+    This transform does not support torchscript.
 
     Converts a PIL Image or numpy.ndarray (H x W x C) in the range
     [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
@@ -139,7 +141,9 @@ def __repr__(self) -> str:
 
 
 class PILToTensor:
-    """Convert a ``PIL Image`` to a tensor of the same type. This transform does not support torchscript.
+    """Convert a PIL Image to a tensor of the same type - this does not scale values.
+
+    This transform does not support torchscript.
 
     Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
     """
@@ -166,7 +170,8 @@ def __repr__(self) -> str:
 
 
 class ConvertImageDtype(torch.nn.Module):
-    """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+    """Convert a tensor image to the given ``dtype`` and scale the values accordingly.
+
     This function does not support PIL Image.
 
     Args:
@@ -194,7 +199,9 @@ def forward(self, image):
 
 
 class ToPILImage:
-    """Convert a tensor or an ndarray to PIL Image. This transform does not support torchscript.
+    """Convert a tensor or an ndarray to PIL Image - this does not scale values.
+
+    This transform does not support torchscript.
 
     Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
     H x W x C to a PIL Image while preserving the value range.

diff --git a/torchvision/transforms/v2/_container.py b/torchvision/transforms/v2/_container.py
@@ -138,9 +138,7 @@ def __init__(
         if p is None:
             p = [1] * len(transforms)
         elif len(p) != len(transforms):
-            raise ValueError(
-                f"Length of p doesn't match the number of transforms: " f"{len(p)} != {len(transforms)}"
-            )
+            raise ValueError(f"Length of p doesn't match the number of transforms: {len(p)} != {len(transforms)}")
 
         super().__init__()
 

diff --git a/torchvision/transforms/v2/_deprecated.py b/torchvision/transforms/v2/_deprecated.py
@@ -10,7 +10,7 @@
 
 
 class ToTensor(Transform):
-    """[BETA] Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    """[BETA] Convert a PIL Image or ndarray to tensor and scale the values accordingly.
 
     .. betastatus:: ToTensor transform
 

diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
@@ -9,7 +9,7 @@
 
 
 class ConvertBoundingBoxFormat(Transform):
-    """[BETA] Convert bounding box coordinates to the given ``format``, e.g. from "CXCYWH" to "XYXY".
+    """[BETA] Convert bounding box coordinates to the given ``format``, eg from "CXCYWH" to "XYXY".
 
     .. betastatus:: ConvertBoundingBoxFormat transform
 
@@ -18,6 +18,7 @@ class ConvertBoundingBoxFormat(Transform):
             Possible values are defined by :class:`~torchvision.datapoints.BoundingBoxFormat` and
             string values match the enums, e.g. "XYXY" or "XYWH" etc.
     """
+
     _transformed_types = (datapoints.BoundingBox,)
 
     def __init__(self, format: Union[str, datapoints.BoundingBoxFormat]) -> None:
@@ -79,6 +80,7 @@ class ClampBoundingBox(Transform):
     .. betastatus:: ClampBoundingBox transform
 
     """
+
     _transformed_types = (datapoints.BoundingBox,)
 
     def _transform(self, inpt: datapoints.BoundingBox, params: Dict[str, Any]) -> datapoints.BoundingBox:

diff --git a/torchvision/transforms/v2/_misc.py b/torchvision/transforms/v2/_misc.py
@@ -223,13 +223,15 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
 
 
 class ToDtype(Transform):
-    """[BETA] Converts the input to a specific dtype.
+    """[BETA] Converts the input to a specific dtype - this does not scale values.
 
     .. betastatus:: ToDtype transform
 
     Args:
-        dtype (dtype or dict of Datapoint -> dtype): The dtype to convert to. A dict can be passed to specify
-            per-datapoint conversions, e.g. ``dtype={datapoints.Image: torch.float32, datapoints.Video: torch.float64}``.
+        dtype (``torch.dtype`` or dict of ``Datapoint`` -> ``torch.dtype``): The dtype to convert to.
+            A dict can be passed to specify per-datapoint conversions, e.g.
+            ``dtype={datapoints.Image: torch.float32, datapoints.Video:
+            torch.float64}``.
     """
 
     _transformed_types = (torch.Tensor,)

diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py
@@ -11,7 +11,7 @@
 
 
 class PILToTensor(Transform):
-    """[BETA] Convert a ``PIL Image`` to a tensor of the same type.
+    """[BETA] Convert a PIL Image to a tensor of the same type - this does not scale values.
 
     .. betastatus:: PILToTensor transform
 
@@ -27,7 +27,8 @@ def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Ten
 
 
 class ToImageTensor(Transform):
-    """[BETA] Convert a tensor or an ndarray or PIL Image to :class:`~torchvision.datapoints.Image`.
+    """[BETA] Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.datapoints.Image`
+    ; this does not scale values.
-    ; this does not scale values.
+    - this does not scale values.
-    ; this does not scale values.
+    - this does not scale values.
 
     .. betastatus:: ToImageTensor transform
 
@@ -43,7 +44,7 @@ def _transform(
 
 
 class ToImagePIL(Transform):
-    """[BETA] Convert a tensor or an ndarray to PIL Image.
+    """[BETA] Convert a tensor or an ndarray to PIL Image - this does not scale values.
 
     .. betastatus:: ToImagePIL transform