Skip to content

[proto] Added mid-level ops and feature-based ops #6219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/prototype-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ jobs:

- name: Run prototype tests
shell: bash
run: pytest --durations=20 test/test_prototype_*.py
run: pytest -vvv --durations=20 test/test_prototype_*.py
56 changes: 56 additions & 0 deletions test/test_prototype_transforms_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,18 @@ def rotate_segmentation_mask():
)


@register_kernel_info_from_sample_inputs_fn
def crop_image_tensor():
for image, top, left, height, width in itertools.product(make_images(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20]):
yield SampleInput(
image,
top=top,
left=left,
height=height,
width=width,
)


@register_kernel_info_from_sample_inputs_fn
def crop_bounding_box():
for bounding_box, top, left in itertools.product(make_bounding_boxes(), [-8, 0, 9], [-8, 0, 9]):
Expand Down Expand Up @@ -414,6 +426,17 @@ def resized_crop_segmentation_mask():
yield SampleInput(mask, top=top, left=left, height=height, width=width, size=size)


@register_kernel_info_from_sample_inputs_fn
def pad_image_tensor():
for image, padding, fill, padding_mode in itertools.product(
make_images(),
[[1], [1, 1], [1, 1, 2, 2]], # padding
[12, 12.0], # fill
["constant", "symmetric", "edge", "reflect"], # padding mode,
):
yield SampleInput(image, padding=padding, fill=fill, padding_mode=padding_mode)


@register_kernel_info_from_sample_inputs_fn
def pad_segmentation_mask():
for mask, padding, padding_mode in itertools.product(
Expand Down Expand Up @@ -499,6 +522,39 @@ def test_scriptable(kernel):
jit.script(kernel)


# Test below is intended to test mid-level op vs low-level ops it calls
# For example, resize -> resize_image_tensor, resize_bounding_boxes etc
# TODO: Rewrite this tests as sample args may include more or less params
# than needed by functions
@pytest.mark.parametrize(
"func",
[
pytest.param(func, id=name)
for name, func in F.__dict__.items()
if not name.startswith("_")
and callable(func)
and all(
feature_type not in name for feature_type in {"image", "segmentation_mask", "bounding_box", "label", "pil"}
)
and name not in {"to_image_tensor", "InterpolationMode", "decode_video_with_av", "crop", "rotate"}
# We skip 'crop' due to missing 'height' and 'width'
# We skip 'rotate' due to non implemented yet expand=True case for bboxes
],
)
def test_functional_mid_level(func):
finfos = [finfo for finfo in FUNCTIONAL_INFOS if f"{func.__name__}_" in finfo.name]
for finfo in finfos:
for sample_input in finfo.sample_inputs():
expected = finfo(sample_input)
kwargs = dict(sample_input.kwargs)
for key in ["format", "image_size"]:
if key in kwargs:
del kwargs[key]
output = func(*sample_input.args, **kwargs)
torch.testing.assert_close(output, expected, msg=f"finfo={finfo}, output={output}, expected={expected}")
break


@pytest.mark.parametrize(
("functional_info", "sample_input"),
[
Expand Down
142 changes: 141 additions & 1 deletion torchvision/prototype/features/_bounding_box.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from __future__ import annotations

from typing import Any, Tuple, Union, Optional
from typing import Any, List, Tuple, Union, Optional, Sequence

import torch
from torchvision._utils import StrEnum
from torchvision.transforms import InterpolationMode

from ._feature import _Feature

Expand Down Expand Up @@ -69,3 +70,142 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
return BoundingBox.new_like(
self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
)

def horizontal_flip(self) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw if this happens on a hot path, this import (even if it's not the first run) may hurt perf (from my experience of a few years back)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is not ideal solution. Previously I tried to add the submodule as an attribute and somehow dataloader with multiple processes hangs due to that...
If you have any better ideas, please share

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's a fair point. This workaround is temporary for as long as we work on the API. We should seek a better solution prior finalising it. Potentially a refactoring and reorganization of the modules might be a solution here but that can be decided later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is an alternative approach for this at #6476


output = _F.horizontal_flip_bounding_box(self, format=self.format, image_size=self.image_size)
return BoundingBox.new_like(self, output)

def vertical_flip(self) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.vertical_flip_bounding_box(self, format=self.format, image_size=self.image_size)
return BoundingBox.new_like(self, output)

def resize( # type: ignore[override]
self,
size: List[int],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: bool = False,
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.resize_bounding_box(self, size, image_size=self.image_size, max_size=max_size)
image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1])
return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype)

def crop(self, top: int, left: int, height: int, width: int) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.crop_bounding_box(self, self.format, top, left)
return BoundingBox.new_like(self, output, image_size=(height, width))

def center_crop(self, output_size: List[int]) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.center_crop_bounding_box(
self, format=self.format, output_size=output_size, image_size=self.image_size
)
image_size = (output_size[0], output_size[0]) if len(output_size) == 1 else (output_size[0], output_size[1])
return BoundingBox.new_like(self, output, image_size=image_size)

def resized_crop(
self,
top: int,
left: int,
height: int,
width: int,
size: List[int],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
antialias: bool = False,
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.resized_crop_bounding_box(self, self.format, top, left, height, width, size=size)
image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1])
return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype)

def pad(
self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant"
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

if padding_mode not in ["constant"]:
raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")

output = _F.pad_bounding_box(self, padding, format=self.format)

# Update output image size:
# TODO: remove the import below and make _parse_pad_padding available
from torchvision.transforms.functional_tensor import _parse_pad_padding

left, top, right, bottom = _parse_pad_padding(padding)
height, width = self.image_size
height += top + bottom
width += left + right

return BoundingBox.new_like(self, output, image_size=(height, width))

def rotate(
self,
angle: float,
interpolation: InterpolationMode = InterpolationMode.NEAREST,
expand: bool = False,
fill: Optional[List[float]] = None,
center: Optional[List[float]] = None,
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.rotate_bounding_box(
self, format=self.format, image_size=self.image_size, angle=angle, expand=expand, center=center
)
# TODO: update output image size if expand is True
if expand:
raise RuntimeError("Not yet implemented")
return BoundingBox.new_like(self, output, dtype=output.dtype)

def affine(
self,
angle: float,
translate: List[float],
scale: float,
shear: List[float],
interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: Optional[List[float]] = None,
center: Optional[List[float]] = None,
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.affine_bounding_box(
self,
self.format,
self.image_size,
angle,
translate=translate,
scale=scale,
shear=shear,
center=center,
)
return BoundingBox.new_like(self, output, dtype=output.dtype)

def perspective(
self,
perspective_coeffs: List[float],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: Optional[List[float]] = None,
) -> BoundingBox:
from torchvision.prototype.transforms import functional as _F

output = _F.perspective_bounding_box(self, self.format, perspective_coeffs)
return BoundingBox.new_like(self, output, dtype=output.dtype)

def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> BoundingBox:
raise TypeError("Erase transformation does not support bounding boxes")

def mixup(self, lam: float) -> BoundingBox:
raise TypeError("Mixup transformation does not support bounding boxes")

def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> BoundingBox:
Comment on lines +204 to +210
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe these operations should be kernels. Mixup, Cutmix etc are augmentation strategies. I would even be inclined not to add erase and let people to access the functional directly. The rational is that we should keep the kernels low. As we keep adding augmentations, we do this on the Transforms side. Thoughts?

raise TypeError("Cutmix transformation does not support bounding boxes")
116 changes: 114 additions & 2 deletions torchvision/prototype/features/_feature.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Any, cast, TypeVar, Union, Optional, Type, Callable, Tuple, Sequence, Mapping
from typing import Any, cast, TypeVar, Union, Optional, Type, Callable, List, Tuple, Sequence, Mapping

import torch
from torch._C import _TensorBase, DisableTorchFunction

from torchvision.transforms import InterpolationMode

F = TypeVar("F", bound="_Feature")

Expand Down Expand Up @@ -83,3 +83,115 @@ def __torch_function__(
return cls.new_like(args[0], output, dtype=output.dtype, device=output.device)
else:
return output

def horizontal_flip(self) -> Any:
return self

def vertical_flip(self) -> Any:
return self

# TODO: We have to ignore override mypy error as there is torch.Tensor built-in deprecated op: Tensor.resize
# https://github.com/pytorch/pytorch/blob/e8727994eb7cdb2ab642749d6549bc497563aa06/torch/_tensor.py#L588-L593
def resize( # type: ignore[override]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why ignore[override] is needed here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

holly molly! that's not good. Does it make sense to rename the method? I can see that Tensor.resize_ exists still on the docs but can't find the resize.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-inplace Tensor.resize is deprecated -> no docs. IMO, we can keep both.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a big TODO to get feedback on this. cc @NicolasHug thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit late to the party: looks like the original Tensor.resize was deprecated 4+ years ago, so I would agree that it's fine to override it IMHO.

Minor note: it's not super clear what the TODO above is about, i.e. it's not obvious what needs to be done about it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@NicolasHug I agree that the TODO note is unclear. I'll update in a follow-up PR. The point is to fix # type: ignore[override] and make mypy happy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this be actually possible? As far as I understand, mypy will throw an error as long as the 2 resize() implementations have different signatures?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good question. Maybe it is impossible. I was thinking about mypy overload: https://mypy.readthedocs.io/en/stable/more_types.html#function-overloading if this could help...

self,
size: List[int],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: bool = False,
) -> Any:
return self

def crop(self, top: int, left: int, height: int, width: int) -> Any:
return self

def center_crop(self, output_size: List[int]) -> Any:
return self

def resized_crop(
self,
top: int,
left: int,
height: int,
width: int,
size: List[int],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
antialias: bool = False,
) -> Any:
return self

def pad(
self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we support Sequence[int] as well? Did you face JIT-scriptability issues related to how it handles seq of ints and floats? If we add support, we need to put them in all places.

Copy link
Collaborator Author

@vfdev-5 vfdev-5 Jul 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can add Sequence[int] to the type hint (I didn't add it as it starts looking very bulky). JIT is not concerned as pad is not scriptable (in general torch script does not recognize Sequence and we should map it to List).
In following PRs, we have to make same type hint for all fill usages.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. You wanna put a TODO on the code or you keep track of this elsewhere?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll be dealing with this in Transforms PR (next one, not yet sent)

) -> Any:
return self

def rotate(
self,
angle: float,
interpolation: InterpolationMode = InterpolationMode.NEAREST,
expand: bool = False,
fill: Optional[List[float]] = None,
center: Optional[List[float]] = None,
) -> Any:
return self

def affine(
self,
angle: float,
translate: List[float],
scale: float,
shear: List[float],
interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: Optional[List[float]] = None,
center: Optional[List[float]] = None,
) -> Any:
return self

def perspective(
self,
perspective_coeffs: List[float],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: Optional[List[float]] = None,
) -> Any:
return self

def adjust_brightness(self, brightness_factor: float) -> Any:
return self

def adjust_saturation(self, saturation_factor: float) -> Any:
return self

def adjust_contrast(self, contrast_factor: float) -> Any:
return self

def adjust_sharpness(self, sharpness_factor: float) -> Any:
return self

def adjust_hue(self, hue_factor: float) -> Any:
return self

def adjust_gamma(self, gamma: float, gain: float = 1) -> Any:
return self

def posterize(self, bits: int) -> Any:
return self

def solarize(self, threshold: float) -> Any:
return self

def autocontrast(self) -> Any:
return self

def equalize(self) -> Any:
return self

def invert(self) -> Any:
return self

def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> Any:
return self

def mixup(self, lam: float) -> Any:
return self

def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> Any:
return self
Loading