-
Notifications
You must be signed in to change notification settings - Fork 7.1k
[proto] Added mid-level ops and feature-based ops #6219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7d0057e
2b3e916
d483b16
8ef7b3c
c68afd6
7b8d79b
5501fd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,10 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Any, Tuple, Union, Optional | ||
from typing import Any, List, Tuple, Union, Optional, Sequence | ||
|
||
import torch | ||
from torchvision._utils import StrEnum | ||
from torchvision.transforms import InterpolationMode | ||
|
||
from ._feature import _Feature | ||
|
||
|
@@ -69,3 +70,142 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox: | |
return BoundingBox.new_like( | ||
self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format | ||
) | ||
|
||
def horizontal_flip(self) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.horizontal_flip_bounding_box(self, format=self.format, image_size=self.image_size) | ||
return BoundingBox.new_like(self, output) | ||
|
||
def vertical_flip(self) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.vertical_flip_bounding_box(self, format=self.format, image_size=self.image_size) | ||
return BoundingBox.new_like(self, output) | ||
|
||
def resize( # type: ignore[override] | ||
self, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
max_size: Optional[int] = None, | ||
antialias: bool = False, | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.resize_bounding_box(self, size, image_size=self.image_size, max_size=max_size) | ||
image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1]) | ||
return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype) | ||
|
||
def crop(self, top: int, left: int, height: int, width: int) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.crop_bounding_box(self, self.format, top, left) | ||
return BoundingBox.new_like(self, output, image_size=(height, width)) | ||
|
||
def center_crop(self, output_size: List[int]) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.center_crop_bounding_box( | ||
self, format=self.format, output_size=output_size, image_size=self.image_size | ||
) | ||
image_size = (output_size[0], output_size[0]) if len(output_size) == 1 else (output_size[0], output_size[1]) | ||
return BoundingBox.new_like(self, output, image_size=image_size) | ||
|
||
def resized_crop( | ||
self, | ||
top: int, | ||
left: int, | ||
height: int, | ||
width: int, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
antialias: bool = False, | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.resized_crop_bounding_box(self, self.format, top, left, height, width, size=size) | ||
image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1]) | ||
return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype) | ||
|
||
def pad( | ||
self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
if padding_mode not in ["constant"]: | ||
raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes") | ||
|
||
output = _F.pad_bounding_box(self, padding, format=self.format) | ||
|
||
# Update output image size: | ||
# TODO: remove the import below and make _parse_pad_padding available | ||
from torchvision.transforms.functional_tensor import _parse_pad_padding | ||
|
||
left, top, right, bottom = _parse_pad_padding(padding) | ||
height, width = self.image_size | ||
height += top + bottom | ||
width += left + right | ||
|
||
return BoundingBox.new_like(self, output, image_size=(height, width)) | ||
|
||
def rotate( | ||
self, | ||
angle: float, | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
expand: bool = False, | ||
fill: Optional[List[float]] = None, | ||
center: Optional[List[float]] = None, | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.rotate_bounding_box( | ||
self, format=self.format, image_size=self.image_size, angle=angle, expand=expand, center=center | ||
) | ||
# TODO: update output image size if expand is True | ||
if expand: | ||
raise RuntimeError("Not yet implemented") | ||
return BoundingBox.new_like(self, output, dtype=output.dtype) | ||
|
||
def affine( | ||
self, | ||
angle: float, | ||
translate: List[float], | ||
scale: float, | ||
shear: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
fill: Optional[List[float]] = None, | ||
center: Optional[List[float]] = None, | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.affine_bounding_box( | ||
self, | ||
self.format, | ||
self.image_size, | ||
angle, | ||
translate=translate, | ||
scale=scale, | ||
shear=shear, | ||
center=center, | ||
) | ||
return BoundingBox.new_like(self, output, dtype=output.dtype) | ||
|
||
def perspective( | ||
self, | ||
perspective_coeffs: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
fill: Optional[List[float]] = None, | ||
) -> BoundingBox: | ||
from torchvision.prototype.transforms import functional as _F | ||
|
||
output = _F.perspective_bounding_box(self, self.format, perspective_coeffs) | ||
return BoundingBox.new_like(self, output, dtype=output.dtype) | ||
|
||
def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> BoundingBox: | ||
raise TypeError("Erase transformation does not support bounding boxes") | ||
|
||
def mixup(self, lam: float) -> BoundingBox: | ||
raise TypeError("Mixup transformation does not support bounding boxes") | ||
|
||
def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> BoundingBox: | ||
Comment on lines
+204
to
+210
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe these operations should be kernels. Mixup, Cutmix etc are augmentation strategies. I would even be inclined not to add erase and let people to access the functional directly. The rational is that we should keep the kernels low. As we keep adding augmentations, we do this on the Transforms side. Thoughts? |
||
raise TypeError("Cutmix transformation does not support bounding boxes") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
from typing import Any, cast, TypeVar, Union, Optional, Type, Callable, Tuple, Sequence, Mapping | ||
from typing import Any, cast, TypeVar, Union, Optional, Type, Callable, List, Tuple, Sequence, Mapping | ||
|
||
import torch | ||
from torch._C import _TensorBase, DisableTorchFunction | ||
|
||
from torchvision.transforms import InterpolationMode | ||
|
||
F = TypeVar("F", bound="_Feature") | ||
|
||
|
@@ -83,3 +83,115 @@ def __torch_function__( | |
return cls.new_like(args[0], output, dtype=output.dtype, device=output.device) | ||
else: | ||
return output | ||
|
||
def horizontal_flip(self) -> Any: | ||
return self | ||
|
||
def vertical_flip(self) -> Any: | ||
return self | ||
|
||
# TODO: We have to ignore override mypy error as there is torch.Tensor built-in deprecated op: Tensor.resize | ||
# https://github.com/pytorch/pytorch/blob/e8727994eb7cdb2ab642749d6549bc497563aa06/torch/_tensor.py#L588-L593 | ||
def resize( # type: ignore[override] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is torch.Tensor built-in deprecated op: Tensor.resize : https://github.com/pytorch/pytorch/blob/e8727994eb7cdb2ab642749d6549bc497563aa06/torch/_tensor.py#L588-L593 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. holly molly! that's not good. Does it make sense to rename the method? I can see that Tensor.resize_ exists still on the docs but can't find the resize. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. non-inplace Tensor.resize is deprecated -> no docs. IMO, we can keep both. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a big TODO to get feedback on this. cc @NicolasHug thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A bit late to the party: looks like the original Minor note: it's not super clear what the TODO above is about, i.e. it's not obvious what needs to be done about it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @NicolasHug I agree that the TODO note is unclear. I'll update in a follow-up PR. The point is to fix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this be actually possible? As far as I understand, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good question. Maybe it is impossible. I was thinking about mypy overload: https://mypy.readthedocs.io/en/stable/more_types.html#function-overloading if this could help... |
||
self, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
max_size: Optional[int] = None, | ||
antialias: bool = False, | ||
) -> Any: | ||
return self | ||
|
||
def crop(self, top: int, left: int, height: int, width: int) -> Any: | ||
return self | ||
|
||
def center_crop(self, output_size: List[int]) -> Any: | ||
return self | ||
|
||
def resized_crop( | ||
self, | ||
top: int, | ||
left: int, | ||
height: int, | ||
width: int, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
antialias: bool = False, | ||
) -> Any: | ||
return self | ||
|
||
def pad( | ||
self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why don't we support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. You wanna put a TODO on the code or you keep track of this elsewhere? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll be dealing with this in Transforms PR (next one, not yet sent) |
||
) -> Any: | ||
return self | ||
|
||
def rotate( | ||
self, | ||
angle: float, | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
expand: bool = False, | ||
fill: Optional[List[float]] = None, | ||
center: Optional[List[float]] = None, | ||
) -> Any: | ||
return self | ||
|
||
def affine( | ||
self, | ||
angle: float, | ||
translate: List[float], | ||
scale: float, | ||
shear: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
fill: Optional[List[float]] = None, | ||
center: Optional[List[float]] = None, | ||
) -> Any: | ||
return self | ||
|
||
def perspective( | ||
self, | ||
perspective_coeffs: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
fill: Optional[List[float]] = None, | ||
) -> Any: | ||
return self | ||
|
||
def adjust_brightness(self, brightness_factor: float) -> Any: | ||
return self | ||
|
||
def adjust_saturation(self, saturation_factor: float) -> Any: | ||
return self | ||
|
||
def adjust_contrast(self, contrast_factor: float) -> Any: | ||
return self | ||
|
||
def adjust_sharpness(self, sharpness_factor: float) -> Any: | ||
return self | ||
|
||
def adjust_hue(self, hue_factor: float) -> Any: | ||
return self | ||
|
||
def adjust_gamma(self, gamma: float, gain: float = 1) -> Any: | ||
return self | ||
|
||
def posterize(self, bits: int) -> Any: | ||
return self | ||
|
||
def solarize(self, threshold: float) -> Any: | ||
return self | ||
|
||
def autocontrast(self) -> Any: | ||
return self | ||
|
||
def equalize(self) -> Any: | ||
return self | ||
|
||
def invert(self) -> Any: | ||
return self | ||
|
||
def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> Any: | ||
return self | ||
|
||
def mixup(self, lam: float) -> Any: | ||
return self | ||
|
||
def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> Any: | ||
return self |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Btw if this happens on a hot path, this import (even if it's not the first run) may hurt perf (from my experience of a few years back)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this is not ideal solution. Previously I tried to add the submodule as an attribute and somehow dataloader with multiple processes hangs due to that...
If you have any better ideas, please share
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's a fair point. This workaround is temporary for as long as we work on the API. We should seek a better solution prior finalising it. Potentially a refactoring and reorganization of the modules might be a solution here but that can be decided later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is an alternative approach for this at #6476