diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst new file mode 100644 index 00000000000..07e20b090e6 --- /dev/null +++ b/docs/source/datapoints.rst @@ -0,0 +1,13 @@ +Datapoints +========== + +.. currentmodule:: torchvision.datapoints +.. autosummary:: + :toctree: generated/ + :template: class.rst + + Image + Video + BoundingBoxFormat + BoundingBox + Mask diff --git a/docs/source/index.rst b/docs/source/index.rst index 79dbebdd047..ac047ff5869 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,6 +31,7 @@ architectures, and common image transformations for computer vision. :maxdepth: 2 :caption: Package Reference + datapoints transforms models datasets diff --git a/torchvision/datapoints/_bounding_box.py b/torchvision/datapoints/_bounding_box.py index 75e779f0b21..d8441823c3e 100644 --- a/torchvision/datapoints/_bounding_box.py +++ b/torchvision/datapoints/_bounding_box.py @@ -10,12 +10,35 @@ class BoundingBoxFormat(Enum): + """[BETA] Coordinate format of a bounding box. + + Available formats are + + * ``XYXY`` + * ``XYWH`` + * ``CXCYWH`` + """ + XYXY = "XYXY" XYWH = "XYWH" CXCYWH = "CXCYWH" class BoundingBox(Datapoint): + """[BETA] :class:`torch.Tensor` subclass for bounding boxes. + + Args: + data: Any data that can be turned into a tensor with :func:`torch.as_tensor`. + format (BoundingBoxFormat, str): Format of the bounding box. + spatial_size (two-tuple of ints): Height and width of the corresponding image or video. + dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from + ``data``. + device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a + :class:`torch.Tensor`, the device is taken from it. Otherwise, the bounding box is constructed on the CPU. + requires_grad (bool, optional): Whether autograd should record operations on the bounding box. If omitted and + ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``. + """ + format: BoundingBoxFormat spatial_size: Tuple[int, int] @@ -52,6 +75,20 @@ def wrap_like( format: Optional[BoundingBoxFormat] = None, spatial_size: Optional[Tuple[int, int]] = None, ) -> BoundingBox: + """Wrap a :class:`torch.Tensor` as :class:`BoundingBox` from a reference. + + Args: + other (BoundingBox): Reference bounding box. + tensor (Tensor): Tensor to be wrapped as :class:`BoundingBox` + format (BoundingBoxFormat, str, optional): Format of the bounding box. If omitted, it is taken from the + reference. + spatial_size (two-tuple of ints, optional): Height and width of the corresponding image or video. If + omitted, it is taken from the reference. + + """ + if isinstance(format, str): + format = BoundingBoxFormat.from_str(format.upper()) + return cls._wrap( tensor, format=format if format is not None else other.format, diff --git a/torchvision/datapoints/_image.py b/torchvision/datapoints/_image.py index 21dfe5a5cd6..e47a6c10fc3 100644 --- a/torchvision/datapoints/_image.py +++ b/torchvision/datapoints/_image.py @@ -10,6 +10,19 @@ class Image(Datapoint): + """[BETA] :class:`torch.Tensor` subclass for images. + + Args: + data (tensor-like, PIL.Image.Image): Any data that can be turned into a tensor with :func:`torch.as_tensor` as + well as PIL images. + dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from + ``data``. + device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a + :class:`torch.Tensor`, the device is taken from it. Otherwise, the bounding box is constructed on the CPU. + requires_grad (bool, optional): Whether autograd should record operations on the bounding box. If omitted and + ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``. + """ + @classmethod def _wrap(cls, tensor: torch.Tensor) -> Image: image = tensor.as_subclass(cls) diff --git a/torchvision/datapoints/_mask.py b/torchvision/datapoints/_mask.py index bb70ec12224..0135d793d32 100644 --- a/torchvision/datapoints/_mask.py +++ b/torchvision/datapoints/_mask.py @@ -10,6 +10,19 @@ class Mask(Datapoint): + """[BETA] :class:`torch.Tensor` subclass for segmentation and detection masks. + + Args: + data (tensor-like, PIL.Image.Image): Any data that can be turned into a tensor with :func:`torch.as_tensor` as + well as PIL images. + dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from + ``data``. + device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a + :class:`torch.Tensor`, the device is taken from it. Otherwise, the bounding box is constructed on the CPU. + requires_grad (bool, optional): Whether autograd should record operations on the bounding box. If omitted and + ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``. + """ + @classmethod def _wrap(cls, tensor: torch.Tensor) -> Mask: return tensor.as_subclass(cls) diff --git a/torchvision/datapoints/_video.py b/torchvision/datapoints/_video.py index ab51c10233d..a6fbe2bd473 100644 --- a/torchvision/datapoints/_video.py +++ b/torchvision/datapoints/_video.py @@ -9,6 +9,18 @@ class Video(Datapoint): + """[BETA] :class:`torch.Tensor` subclass for videos. + + Args: + data (tensor-like): Any data that can be turned into a tensor with :func:`torch.as_tensor`. + dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from + ``data``. + device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a + :class:`torch.Tensor`, the device is taken from it. Otherwise, the bounding box is constructed on the CPU. + requires_grad (bool, optional): Whether autograd should record operations on the bounding box. If omitted and + ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``. + """ + @classmethod def _wrap(cls, tensor: torch.Tensor) -> Video: video = tensor.as_subclass(cls)