diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index f34e5daa063..4f655c6a95c 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -7,6 +7,10 @@ from torchvision.prototype.transforms import Transform +# I recommend deleting all classes from this branch that are old to assist code reviews. +# I know that you are wokring on this already #5323 but I would try to keep your feature branch as clean as possible. +# The issue here is that these implementations are incompatible with the functional approach on the same branch which can lead to confusion. + class HorizontalFlip(Transform): NO_OP_FEATURE_TYPES = {Label} diff --git a/torchvision/prototype/transforms/functional/_augment.py b/torchvision/prototype/transforms/functional/_augment.py index 814c34e5b00..a0b975b998b 100644 --- a/torchvision/prototype/transforms/functional/_augment.py +++ b/torchvision/prototype/transforms/functional/_augment.py @@ -24,6 +24,7 @@ def mixup_one_hot_label(one_hot_label_batch: torch.Tensor, *, lam: float, inplac def cutmix_image(image: torch.Tensor, *, box: Tuple[int, int, int, int], inplace: bool = False) -> torch.Tensor: + # Shouldn't be we using `image_batch` similar to `mixup_image()`, to indicate that the input must be a batch? if not inplace: image = image.clone() diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index c8142742fa8..733da291aa0 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -3,7 +3,7 @@ import torch from torchvision.prototype.features import BoundingBoxFormat from torchvision.transforms import ( # noqa: F401 - functional as _F, + functional as _F, # Shouldn't we importing `functional_tensor`? InterpolationMode, ) @@ -11,7 +11,7 @@ def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor: - return image.flip((-1,)) + return image.flip((-1,)) # Why not use the _F.hflip()? def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tuple[int, int]) -> torch.Tensor: @@ -20,7 +20,7 @@ def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tupl old_format=BoundingBoxFormat.XYXY, new_format=BoundingBoxFormat.XYWH, ).unbind(-1) - x = image_size[1] - (x + w) + x = image_size[1] - (x + w) # Why not avoid formatting and do straight `boxes[:, [0, 2]] = width - boxes[:, [2, 0]]`? return convert_bounding_box_format( torch.stack((x, y, w, h), dim=-1), old_format=BoundingBoxFormat.XYWH, @@ -28,7 +28,7 @@ def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tupl ) -_resize_image = _F.resize +_resize_image = _F.resize # How about videos? The low level transfroms supports it def resize_image( @@ -40,7 +40,7 @@ def resize_image( ) -> torch.Tensor: new_height, new_width = size num_channels, old_height, old_width = image.shape[-3:] - batch_shape = image.shape[:-3] + batch_shape = image.shape[:-3] # In some places you use `image` to denote batches and in others `image_batch`. Should we align the names? return _resize_image( image.reshape((-1, num_channels, old_height, old_width)), size=size, @@ -71,7 +71,7 @@ def resize_bounding_box( ) -> torch.Tensor: old_height, old_width = old_image_size new_height, new_width = new_image_size - return ( + return ( # Shouldn't we be using a low-level kernel instead, similar to above? bounding_box.view(-1, 2, 2) .mul(torch.tensor([new_width / old_width, new_height / old_height])) .view(bounding_box.shape) diff --git a/torchvision/prototype/transforms/functional/_meta_conversion.py b/torchvision/prototype/transforms/functional/_meta_conversion.py index 484066a39ee..9df5899a38a 100644 --- a/torchvision/prototype/transforms/functional/_meta_conversion.py +++ b/torchvision/prototype/transforms/functional/_meta_conversion.py @@ -4,7 +4,7 @@ def _xywh_to_xyxy(xywh: torch.Tensor) -> torch.Tensor: - xyxy = xywh.clone() + xyxy = xywh.clone() # Nice. Do we have tests to ensure that no transform does an in-place modification of input? xyxy[..., 2:] += xyxy[..., :2] return xyxy