Fill color support for tensor affine transforms (#2904)

voldemortX · vfdev-5 · web-flow · commit 21deb4d0f78c · 2020-12-02T11:02:44.000+01:00
* Fill color support for tensor affine transforms

* PEP fix

* Docstring changes and float support

* Docstring update for transforms and float type cast

* Cast only for Tensor

* Temporary patch for lack of Union type support, plus an extra unit test

* More plausible bilinear filling for tensors

* Keep things simple &amp; New docstrings

* Fix lint and other issues after merge

* make it in one line

* Docstring and some code modifications

* More tests and corresponding changes for transoforms and docstring changes

* Simplify test configs

* Update test_functional_tensor.py

* Update test_functional_tensor.py

* Move assertions

Co-authored-by: vfdev &lt;vfdev.5@gmail.com&gt;
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
@@ -552,24 +552,25 @@ def _test_affine_translations(self, tensor, pil_img, scripted_affine):
     def _test_affine_all_ops(self, tensor, pil_img, scripted_affine):
         # 4) Test rotation + translation + scale + share
         test_configs = [
-            (45, [5, 6], 1.0, [0.0, 0.0]),
-            (33, (5, -4), 1.0, [0.0, 0.0]),
-            (45, [-5, 4], 1.2, [0.0, 0.0]),
-            (33, (-4, -8), 2.0, [0.0, 0.0]),
-            (85, (10, -10), 0.7, [0.0, 0.0]),
-            (0, [0, 0], 1.0, [35.0, ]),
-            (-25, [0, 0], 1.2, [0.0, 15.0]),
-            (-45, [-10, 0], 0.7, [2.0, 5.0]),
-            (-45, [-10, -10], 1.2, [4.0, 5.0]),
-            (-90, [0, 0], 1.0, [0.0, 0.0]),
+            (45.5, [5, 6], 1.0, [0.0, 0.0], None),
+            (33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]),
+            (45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)),
+            (33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]),
+            (85, (10, -10), 0.7, [0.0, 0.0], [1, ]),
+            (0, [0, 0], 1.0, [35.0, ], (2.0, )),
+            (-25, [0, 0], 1.2, [0.0, 15.0], None),
+            (-45, [-10, 0], 0.7, [2.0, 5.0], None),
+            (-45, [-10, -10], 1.2, [4.0, 5.0], None),
+            (-90, [0, 0], 1.0, [0.0, 0.0], None),
         ]
         for r in [NEAREST, ]:
-            for a, t, s, sh in test_configs:
-                out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=r)
+            for a, t, s, sh, f in test_configs:
+                f_pil = int(f[0]) if f is not None and len(f) == 1 else f
+                out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f_pil)
                 out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
 
                 for fn in [F.affine, scripted_affine]:
-                    out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=r).cpu()
+                    out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f).cpu()
 
                     if out_tensor.dtype != torch.uint8:
                         out_tensor = out_tensor.to(torch.uint8)
@@ -582,7 +583,7 @@ def _test_affine_all_ops(self, tensor, pil_img, scripted_affine):
                         ratio_diff_pixels,
                         tol,
                         msg="{}: {}\n{} vs \n{}".format(
-                            (r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                            (r, a, t, s, sh, f), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
                         )
                     )
 
@@ -643,35 +644,36 @@ def _test_rotate_all_options(self, tensor, pil_img, scripted_rotate, centers):
             for a in range(-180, 180, 17):
                 for e in [True, False]:
                     for c in centers:
-
-                        out_pil_img = F.rotate(pil_img, angle=a, interpolation=r, expand=e, center=c)
-                        out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
-                        for fn in [F.rotate, scripted_rotate]:
-                            out_tensor = fn(tensor, angle=a, interpolation=r, expand=e, center=c).cpu()
-
-                            if out_tensor.dtype != torch.uint8:
-                                out_tensor = out_tensor.to(torch.uint8)
-
-                            self.assertEqual(
-                                out_tensor.shape,
-                                out_pil_tensor.shape,
-                                msg="{}: {} vs {}".format(
-                                    (img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape
-                                )
-                            )
-                            num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
-                            ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
-                            # Tolerance : less than 3% of different pixels
-                            self.assertLess(
-                                ratio_diff_pixels,
-                                0.03,
-                                msg="{}: {}\n{} vs \n{}".format(
-                                    (img_size, r, dt, a, e, c),
+                        for f in [None, [0, 0, 0], (1, 2, 3), [255, 255, 255], [1, ], (2.0, )]:
+                            f_pil = int(f[0]) if f is not None and len(f) == 1 else f
+                            out_pil_img = F.rotate(pil_img, angle=a, interpolation=r, expand=e, center=c, fill=f_pil)
+                            out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+                            for fn in [F.rotate, scripted_rotate]:
+                                out_tensor = fn(tensor, angle=a, interpolation=r, expand=e, center=c, fill=f).cpu()
+
+                                if out_tensor.dtype != torch.uint8:
+                                    out_tensor = out_tensor.to(torch.uint8)
+
+                                self.assertEqual(
+                                    out_tensor.shape,
+                                    out_pil_tensor.shape,
+                                    msg="{}: {} vs {}".format(
+                                        (img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape
+                                    ))
+
+                                num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
+                                ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
+                                # Tolerance : less than 3% of different pixels
+                                self.assertLess(
                                     ratio_diff_pixels,
-                                    out_tensor[0, :7, :7],
-                                    out_pil_tensor[0, :7, :7]
+                                    0.03,
+                                    msg="{}: {}\n{} vs \n{}".format(
+                                        (img_size, r, dt, a, e, c, f),
+                                        ratio_diff_pixels,
+                                        out_tensor[0, :7, :7],
+                                        out_pil_tensor[0, :7, :7]
+                                    )
                                 )
-                            )
 
     def test_rotate(self):
         # Tests on square image
@@ -721,30 +723,33 @@ def test_rotate(self):
 
     def _test_perspective(self, tensor, pil_img, scripted_transform, test_configs):
         dt = tensor.dtype
-        for r in [NEAREST, ]:
-            for spoints, epoints in test_configs:
-                out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r)
-                out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+        for f in [None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1, ], (2.0, )]:
+            for r in [NEAREST, ]:
+                for spoints, epoints in test_configs:
+                    f_pil = int(f[0]) if f is not None and len(f) == 1 else f
+                    out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r,
+                                                fill=f_pil)
+                    out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
 
-                for fn in [F.perspective, scripted_transform]:
-                    out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu()
+                    for fn in [F.perspective, scripted_transform]:
+                        out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r, fill=f).cpu()
 
-                    if out_tensor.dtype != torch.uint8:
-                        out_tensor = out_tensor.to(torch.uint8)
+                        if out_tensor.dtype != torch.uint8:
+                            out_tensor = out_tensor.to(torch.uint8)
 
-                    num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
-                    ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
-                    # Tolerance : less than 5% of different pixels
-                    self.assertLess(
-                        ratio_diff_pixels,
-                        0.05,
-                        msg="{}: {}\n{} vs \n{}".format(
-                            (r, dt, spoints, epoints),
+                        num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
+                        ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
+                        # Tolerance : less than 5% of different pixels
+                        self.assertLess(
                             ratio_diff_pixels,
-                            out_tensor[0, :7, :7],
-                            out_pil_tensor[0, :7, :7]
+                            0.05,
+                            msg="{}: {}\n{} vs \n{}".format(
+                                (f, r, dt, spoints, epoints),
+                                ratio_diff_pixels,
+                                out_tensor[0, :7, :7],
+                                out_pil_tensor[0, :7, :7]
+                            )
                         )
-                    )
 
     def test_perspective(self):
 
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -349,14 +349,15 @@ def test_random_affine(self):
                 for translate in [(0.1, 0.2), [0.2, 0.1]]:
                     for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]:
                         for interpolation in [NEAREST, BILINEAR]:
-                            transform = T.RandomAffine(
-                                degrees=degrees, translate=translate,
-                                scale=scale, shear=shear, interpolation=interpolation
-                            )
-                            s_transform = torch.jit.script(transform)
+                            for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
+                                transform = T.RandomAffine(
+                                    degrees=degrees, translate=translate,
+                                    scale=scale, shear=shear, interpolation=interpolation, fill=fill
+                                )
+                                s_transform = torch.jit.script(transform)
 
-                            self._test_transform_vs_scripted(transform, s_transform, tensor)
-                            self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
+                                self._test_transform_vs_scripted(transform, s_transform, tensor)
+                                self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
         with get_tmp_dir() as tmp_dir:
             s_transform.save(os.path.join(tmp_dir, "t_random_affine.pt"))
@@ -369,13 +370,14 @@ def test_random_rotate(self):
             for expand in [True, False]:
                 for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]:
                     for interpolation in [NEAREST, BILINEAR]:
-                        transform = T.RandomRotation(
-                            degrees=degrees, interpolation=interpolation, expand=expand, center=center
-                        )
-                        s_transform = torch.jit.script(transform)
+                        for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
+                            transform = T.RandomRotation(
+                                degrees=degrees, interpolation=interpolation, expand=expand, center=center, fill=fill
+                            )
+                            s_transform = torch.jit.script(transform)
 
-                        self._test_transform_vs_scripted(transform, s_transform, tensor)
-                        self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
+                            self._test_transform_vs_scripted(transform, s_transform, tensor)
+                            self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
         with get_tmp_dir() as tmp_dir:
             s_transform.save(os.path.join(tmp_dir, "t_random_rotate.pt"))
@@ -386,14 +388,16 @@ def test_random_perspective(self):
 
         for distortion_scale in np.linspace(0.1, 1.0, num=20):
             for interpolation in [NEAREST, BILINEAR]:
-                transform = T.RandomPerspective(
-                    distortion_scale=distortion_scale,
-                    interpolation=interpolation
-                )
-                s_transform = torch.jit.script(transform)
+                for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]:
+                    transform = T.RandomPerspective(
+                        distortion_scale=distortion_scale,
+                        interpolation=interpolation,
+                        fill=fill
+                    )
+                    s_transform = torch.jit.script(transform)
 
-                self._test_transform_vs_scripted(transform, s_transform, tensor)
-                self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
+                    self._test_transform_vs_scripted(transform, s_transform, tensor)
+                    self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
         with get_tmp_dir() as tmp_dir:
             s_transform.save(os.path.join(tmp_dir, "t_perspective.pt"))
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -557,7 +557,7 @@ def perspective(
         startpoints: List[List[int]],
         endpoints: List[List[int]],
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[int] = None
+        fill: Optional[List[float]] = None
 ) -> Tensor:
     """Perform perspective transform of the given image.
     The image can be a PIL Image or a Tensor, in which case it is expected
@@ -573,10 +573,12 @@ def perspective(
             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
             If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
-        fill (n-tuple or int or float): Pixel fill value for area outside the rotated
+        fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            This option is only available for ``pillow>=5.0.0``. This option is not supported for Tensor
-            input. Fill value for the area outside the transform in the output image is always 0.
+            This option is supported for PIL image and Tensor inputs.
+            In torchscript mode single int/float value is not supported, please use a tuple
+            or list of length 1: ``[value, ]``.
+            If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
 
     Returns:
         PIL Image or Tensor: transformed Image.
@@ -871,7 +873,7 @@ def _get_inverse_affine_matrix(
 def rotate(
         img: Tensor, angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST,
         expand: bool = False, center: Optional[List[int]] = None,
-        fill: Optional[int] = None, resample: Optional[int] = None
+        fill: Optional[List[float]] = None, resample: Optional[int] = None
 ) -> Tensor:
     """Rotate the image by angle.
     The image can be a PIL Image or a Tensor, in which case it is expected
@@ -890,13 +892,12 @@ def rotate(
             Note that the expand flag assumes rotation around the center and no translation.
         center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner.
             Default is the center of the image.
-        fill (n-tuple or int or float): Pixel fill value for area outside the rotated
+        fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
             image. If int or float, the value is used for all bands respectively.
-            Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
-            This option is not supported for Tensor input. Fill value for the area outside the transform in the output
-            image is always 0.
-        resample (int, optional): deprecated argument and will be removed since v0.10.0.
-            Please use `arg`:interpolation: instead.
+            This option is supported for PIL image and Tensor inputs.
+            In torchscript mode single int/float value is not supported, please use a tuple
+            or list of length 1: ``[value, ]``.
+            If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
 
     Returns:
         PIL Image or Tensor: Rotated image.
@@ -945,8 +946,8 @@ def rotate(
 
 def affine(
         img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float],
-        interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[int] = None,
-        resample: Optional[int] = None, fillcolor: Optional[int] = None
+        interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None,
+        resample: Optional[int] = None, fillcolor: Optional[List[float]] = None
 ) -> Tensor:
     """Apply affine transformation on the image keeping image center invariant.
     The image can be a PIL Image or a Tensor, in which case it is expected
@@ -964,10 +965,13 @@ def affine(
             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
             If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
             For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
-        fill (int): Optional fill color for the area outside the transform in the output image (Pillow>=5.0.0).
-            This option is not supported for Tensor input. Fill value for the area outside the transform in the output
-            image is always 0.
-        fillcolor (tuple or int, optional): deprecated argument and will be removed since v0.10.0.
+        fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed
+            image. If int or float, the value is used for all bands respectively.
+            This option is supported for PIL image and Tensor inputs.
+            In torchscript mode single int/float value is not supported, please use a tuple
+            or list of length 1: ``[value, ]``.
+            If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
+        fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
             Please use `arg`:fill: instead.
         resample (int, optional): deprecated argument and will be removed since v0.10.0.
             Please use `arg`:interpolation: instead.
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -465,10 +465,13 @@ def _parse_fill(fill, img, min_pil_version, name="fillcolor"):
         fill = 0
     if isinstance(fill, (int, float)) and num_bands > 1:
         fill = tuple([fill] * num_bands)
-    if not isinstance(fill, (int, float)) and len(fill) != num_bands:
-        msg = ("The number of elements in 'fill' does not match the number of "
-               "bands of the image ({} != {})")
-        raise ValueError(msg.format(len(fill), num_bands))
+    if isinstance(fill, (list, tuple)):
+        if len(fill) != num_bands:
+            msg = ("The number of elements in 'fill' does not match the number of "
+                   "bands of the image ({} != {})")
+            raise ValueError(msg.format(len(fill), num_bands))
+
+        fill = tuple(fill)
 
     return {name: fill}
 
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py