Skip to content

Fill color support for tensor affine transforms #2904

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
696c15a
Fill color support for tensor affine transforms
voldemortX Oct 27, 2020
229c140
Merge branch 'master' into issue2887
voldemortX Oct 27, 2020
adae0f6
PEP fix
voldemortX Oct 27, 2020
b2721e8
Merge branch 'issue2887' of github.com:voldemortX/vision into issue2887
voldemortX Oct 27, 2020
1c4e48a
Docstring changes and float support
voldemortX Oct 28, 2020
62abb37
Docstring update for transforms and float type cast
voldemortX Oct 28, 2020
a585dbd
Cast only for Tensor
voldemortX Oct 28, 2020
d616210
Temporary patch for lack of Union type support, plus an extra unit test
voldemortX Oct 31, 2020
417f6ea
More plausible bilinear filling for tensors
voldemortX Nov 3, 2020
50d311d
Keep things simple & New docstrings
voldemortX Nov 5, 2020
6b0eb53
Merge branch 'master' into issue2887
voldemortX Nov 30, 2020
5589c14
Fix lint and other issues after merge
voldemortX Nov 30, 2020
731a5a9
make it in one line
voldemortX Nov 30, 2020
4389f80
Merge branch 'master' into issue2887
vfdev-5 Nov 30, 2020
2ea1003
Docstring and some code modifications
voldemortX Nov 30, 2020
4c59964
Merge branch 'issue2887' of github.com:voldemortX/vision into issue2887
voldemortX Nov 30, 2020
9e7cb7a
More tests and corresponding changes for transoforms and docstring ch…
voldemortX Dec 1, 2020
16e9b97
Simplify test configs
voldemortX Dec 1, 2020
96c70bc
Update test_functional_tensor.py
vfdev-5 Dec 1, 2020
9d9fd08
Update test_functional_tensor.py
vfdev-5 Dec 2, 2020
87560cb
Merge branch 'master' into issue2887
vfdev-5 Dec 2, 2020
bc7e9fe
Move assertions
voldemortX Dec 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 65 additions & 60 deletions test/test_functional_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,24 +552,25 @@ def _test_affine_translations(self, tensor, pil_img, scripted_affine):
def _test_affine_all_ops(self, tensor, pil_img, scripted_affine):
# 4) Test rotation + translation + scale + share
test_configs = [
(45, [5, 6], 1.0, [0.0, 0.0]),
(33, (5, -4), 1.0, [0.0, 0.0]),
(45, [-5, 4], 1.2, [0.0, 0.0]),
(33, (-4, -8), 2.0, [0.0, 0.0]),
(85, (10, -10), 0.7, [0.0, 0.0]),
(0, [0, 0], 1.0, [35.0, ]),
(-25, [0, 0], 1.2, [0.0, 15.0]),
(-45, [-10, 0], 0.7, [2.0, 5.0]),
(-45, [-10, -10], 1.2, [4.0, 5.0]),
(-90, [0, 0], 1.0, [0.0, 0.0]),
(45.5, [5, 6], 1.0, [0.0, 0.0], None),
(33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]),
(45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)),
(33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]),
(85, (10, -10), 0.7, [0.0, 0.0], [1, ]),
(0, [0, 0], 1.0, [35.0, ], (2.0, )),
(-25, [0, 0], 1.2, [0.0, 15.0], None),
(-45, [-10, 0], 0.7, [2.0, 5.0], None),
(-45, [-10, -10], 1.2, [4.0, 5.0], None),
(-90, [0, 0], 1.0, [0.0, 0.0], None),
]
for r in [NEAREST, ]:
for a, t, s, sh in test_configs:
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=r)
for a, t, s, sh, f in test_configs:
f_pil = int(f[0]) if f is not None and len(f) == 1 else f
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f_pil)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

for fn in [F.affine, scripted_affine]:
out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=r).cpu()
out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f).cpu()

if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
Expand All @@ -582,7 +583,7 @@ def _test_affine_all_ops(self, tensor, pil_img, scripted_affine):
ratio_diff_pixels,
tol,
msg="{}: {}\n{} vs \n{}".format(
(r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
(r, a, t, s, sh, f), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)

Expand Down Expand Up @@ -643,35 +644,36 @@ def _test_rotate_all_options(self, tensor, pil_img, scripted_rotate, centers):
for a in range(-180, 180, 17):
for e in [True, False]:
for c in centers:

out_pil_img = F.rotate(pil_img, angle=a, interpolation=r, expand=e, center=c)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.rotate, scripted_rotate]:
out_tensor = fn(tensor, angle=a, interpolation=r, expand=e, center=c).cpu()

if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)

self.assertEqual(
out_tensor.shape,
out_pil_tensor.shape,
msg="{}: {} vs {}".format(
(img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape
)
)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 3% of different pixels
self.assertLess(
ratio_diff_pixels,
0.03,
msg="{}: {}\n{} vs \n{}".format(
(img_size, r, dt, a, e, c),
for f in [None, [0, 0, 0], (1, 2, 3), [255, 255, 255], [1, ], (2.0, )]:
f_pil = int(f[0]) if f is not None and len(f) == 1 else f
out_pil_img = F.rotate(pil_img, angle=a, interpolation=r, expand=e, center=c, fill=f_pil)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.rotate, scripted_rotate]:
out_tensor = fn(tensor, angle=a, interpolation=r, expand=e, center=c, fill=f).cpu()

if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)

self.assertEqual(
out_tensor.shape,
out_pil_tensor.shape,
msg="{}: {} vs {}".format(
(img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape
))

num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 3% of different pixels
self.assertLess(
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
0.03,
msg="{}: {}\n{} vs \n{}".format(
(img_size, r, dt, a, e, c, f),
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
)
)
)

def test_rotate(self):
# Tests on square image
Expand Down Expand Up @@ -721,30 +723,33 @@ def test_rotate(self):

def _test_perspective(self, tensor, pil_img, scripted_transform, test_configs):
dt = tensor.dtype
for r in [NEAREST, ]:
for spoints, epoints in test_configs:
out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for f in [None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1, ], (2.0, )]:
for r in [NEAREST, ]:
for spoints, epoints in test_configs:
f_pil = int(f[0]) if f is not None and len(f) == 1 else f
out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r,
fill=f_pil)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

for fn in [F.perspective, scripted_transform]:
out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu()
for fn in [F.perspective, scripted_transform]:
out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r, fill=f).cpu()

if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)

num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% of different pixels
self.assertLess(
ratio_diff_pixels,
0.05,
msg="{}: {}\n{} vs \n{}".format(
(r, dt, spoints, epoints),
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% of different pixels
self.assertLess(
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
0.05,
msg="{}: {}\n{} vs \n{}".format(
(f, r, dt, spoints, epoints),
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
)
)
)

def test_perspective(self):

Expand Down
44 changes: 24 additions & 20 deletions test/test_transforms_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,14 +349,15 @@ def test_random_affine(self):
for translate in [(0.1, 0.2), [0.2, 0.1]]:
for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]:
for interpolation in [NEAREST, BILINEAR]:
transform = T.RandomAffine(
degrees=degrees, translate=translate,
scale=scale, shear=shear, interpolation=interpolation
)
s_transform = torch.jit.script(transform)
for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
transform = T.RandomAffine(
degrees=degrees, translate=translate,
scale=scale, shear=shear, interpolation=interpolation, fill=fill
)
s_transform = torch.jit.script(transform)

self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)

with get_tmp_dir() as tmp_dir:
s_transform.save(os.path.join(tmp_dir, "t_random_affine.pt"))
Expand All @@ -369,13 +370,14 @@ def test_random_rotate(self):
for expand in [True, False]:
for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]:
for interpolation in [NEAREST, BILINEAR]:
transform = T.RandomRotation(
degrees=degrees, interpolation=interpolation, expand=expand, center=center
)
s_transform = torch.jit.script(transform)
for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
transform = T.RandomRotation(
degrees=degrees, interpolation=interpolation, expand=expand, center=center, fill=fill
)
s_transform = torch.jit.script(transform)

self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)

with get_tmp_dir() as tmp_dir:
s_transform.save(os.path.join(tmp_dir, "t_random_rotate.pt"))
Expand All @@ -386,14 +388,16 @@ def test_random_perspective(self):

for distortion_scale in np.linspace(0.1, 1.0, num=20):
for interpolation in [NEAREST, BILINEAR]:
transform = T.RandomPerspective(
distortion_scale=distortion_scale,
interpolation=interpolation
)
s_transform = torch.jit.script(transform)
for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
transform = T.RandomPerspective(
distortion_scale=distortion_scale,
interpolation=interpolation,
fill=fill
)
s_transform = torch.jit.script(transform)

self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
self._test_transform_vs_scripted(transform, s_transform, tensor)
self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)

with get_tmp_dir() as tmp_dir:
s_transform.save(os.path.join(tmp_dir, "t_perspective.pt"))
Expand Down
38 changes: 21 additions & 17 deletions torchvision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def perspective(
startpoints: List[List[int]],
endpoints: List[List[int]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: Optional[int] = None
fill: Optional[List[float]] = None
) -> Tensor:
"""Perform perspective transform of the given image.
The image can be a PIL Image or a Tensor, in which case it is expected
Expand All @@ -573,10 +573,12 @@ def perspective(
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively.
This option is only available for ``pillow>=5.0.0``. This option is not supported for Tensor
input. Fill value for the area outside the transform in the output image is always 0.
This option is supported for PIL image and Tensor inputs.
In torchscript mode single int/float value is not supported, please use a tuple
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.

Returns:
PIL Image or Tensor: transformed Image.
Expand Down Expand Up @@ -871,7 +873,7 @@ def _get_inverse_affine_matrix(
def rotate(
img: Tensor, angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST,
expand: bool = False, center: Optional[List[int]] = None,
fill: Optional[int] = None, resample: Optional[int] = None
fill: Optional[List[float]] = None, resample: Optional[int] = None
) -> Tensor:
"""Rotate the image by angle.
The image can be a PIL Image or a Tensor, in which case it is expected
Expand All @@ -890,13 +892,12 @@ def rotate(
Note that the expand flag assumes rotation around the center and no translation.
center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
This option is not supported for Tensor input. Fill value for the area outside the transform in the output
image is always 0.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
This option is supported for PIL image and Tensor inputs.
In torchscript mode single int/float value is not supported, please use a tuple
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.

Returns:
PIL Image or Tensor: Rotated image.
Expand Down Expand Up @@ -945,8 +946,8 @@ def rotate(

def affine(
img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float],
interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[int] = None,
resample: Optional[int] = None, fillcolor: Optional[int] = None
interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None,
resample: Optional[int] = None, fillcolor: Optional[List[float]] = None
) -> Tensor:
"""Apply affine transformation on the image keeping image center invariant.
The image can be a PIL Image or a Tensor, in which case it is expected
Expand All @@ -964,10 +965,13 @@ def affine(
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (int): Optional fill color for the area outside the transform in the output image (Pillow>=5.0.0).
This option is not supported for Tensor input. Fill value for the area outside the transform in the output
image is always 0.
fillcolor (tuple or int, optional): deprecated argument and will be removed since v0.10.0.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs.
In torchscript mode single int/float value is not supported, please use a tuple
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Expand Down
11 changes: 7 additions & 4 deletions torchvision/transforms/functional_pil.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,13 @@ def _parse_fill(fill, img, min_pil_version, name="fillcolor"):
fill = 0
if isinstance(fill, (int, float)) and num_bands > 1:
fill = tuple([fill] * num_bands)
if not isinstance(fill, (int, float)) and len(fill) != num_bands:
msg = ("The number of elements in 'fill' does not match the number of "
"bands of the image ({} != {})")
raise ValueError(msg.format(len(fill), num_bands))
if isinstance(fill, (list, tuple)):
if len(fill) != num_bands:
msg = ("The number of elements in 'fill' does not match the number of "
"bands of the image ({} != {})")
raise ValueError(msg.format(len(fill), num_bands))

fill = tuple(fill)

return {name: fill}

Expand Down
Loading