Refactoring

datumbox · datumbox · commit f7513a4d4ee4 · 2022-02-26T11:24:11.000Z
diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py
@@ -7,7 +7,7 @@
 from torchvision.prototype import features
 from torchvision.prototype.transforms import Transform, functional as F
 
-from ._utils import query_image
+from ._utils import query_image, get_image_dims
 
 
 class RandomErasing(Transform):
@@ -41,7 +41,7 @@ def __init__(
 
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
-        img_c, img_h, img_w = F.get_image_dims(image)
+        img_c, img_h, img_w = get_image_dims(image)
 
         if isinstance(self.value, (int, float)):
             value = [self.value]
@@ -137,7 +137,7 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         lam = float(self._dist.sample(()))
 
         image = query_image(sample)
-        _, H, W = F.get_image_dims(image)
+        _, H, W = get_image_dims(image)
 
         r_x = torch.randint(W, ())
         r_y = torch.randint(H, ())
diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py
@@ -7,7 +7,7 @@
 from torchvision.prototype.transforms import Transform, InterpolationMode, AutoAugmentPolicy, functional as F
 from torchvision.prototype.utils._internal import apply_recursively
 
-from ._utils import query_image
+from ._utils import query_image, get_image_dims
 
 K = TypeVar("K")
 V = TypeVar("V")
@@ -47,7 +47,7 @@ def dispatch(
                 return input
 
         image = query_image(sample)
-        num_channels, _, _ = F.get_image_dims(image)
+        num_channels, *_ = get_image_dims(image)
 
         fill = self.fill
         if isinstance(fill, (int, float)):
@@ -278,7 +278,7 @@ def forward(self, *inputs: Any) -> Any:
         sample = inputs if len(inputs) > 1 else inputs[0]
 
         image = query_image(sample)
-        _, height, width = F.get_image_dims(image)
+        _, height, width = get_image_dims(image)
 
         policy = self._policies[int(torch.randint(len(self._policies), ()))]
 
@@ -334,7 +334,7 @@ def forward(self, *inputs: Any) -> Any:
         sample = inputs if len(inputs) > 1 else inputs[0]
 
         image = query_image(sample)
-        _, height, width = F.get_image_dims(image)
+        _, height, width = get_image_dims(image)
 
         for _ in range(self.num_ops):
             transform_id, (magnitudes_fn, signed) = self._get_random_item(self._AUGMENTATION_SPACE)
@@ -383,7 +383,7 @@ def forward(self, *inputs: Any) -> Any:
         sample = inputs if len(inputs) > 1 else inputs[0]
 
         image = query_image(sample)
-        _, height, width = F.get_image_dims(image)
+        _, height, width = get_image_dims(image)
 
         transform_id, (magnitudes_fn, signed) = self._get_random_item(self._AUGMENTATION_SPACE)
 
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -8,7 +8,7 @@
 from torchvision.prototype.transforms import Transform, InterpolationMode, functional as F
 from torchvision.transforms.transforms import _setup_size, _interpolation_modes_from_int
 
-from ._utils import query_image
+from ._utils import query_image, get_image_dims
 
 
 class HorizontalFlip(Transform):
@@ -109,7 +109,7 @@ def __init__(
 
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
-        _, height, width = F.get_image_dims(image)
+        _, height, width = get_image_dims(image)
         area = height * width
 
         log_ratio = torch.log(torch.tensor(self.ratio))
diff --git a/torchvision/prototype/transforms/_utils.py b/torchvision/prototype/transforms/_utils.py
@@ -1,9 +1,10 @@
-from typing import Any, Optional, Union
+from typing import Any, Optional, Tuple, Union
 
 import PIL.Image
 import torch
 from torchvision.prototype import features
 from torchvision.prototype.utils._internal import query_recursively
+from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP
 
 
 def query_image(sample: Any) -> Union[PIL.Image.Image, torch.Tensor, features.Image]:
@@ -17,3 +18,16 @@ def fn(input: Any) -> Optional[Union[PIL.Image.Image, torch.Tensor, features.Ima
         return next(query_recursively(fn, sample))
     except StopIteration:
         raise TypeError("No image was found in the sample")
+
+
+def get_image_dims(image: Union[PIL.Image.Image, torch.Tensor, features.Image]) -> Tuple[int, int, int]:
+    if isinstance(image, features.Image):
+        channels = image.num_channels
+        height, width = image.image_size
+    elif isinstance(image, torch.Tensor):
+        channels, height, width = _FT.get_image_dims(image)
+    elif isinstance(image, PIL.Image.Image):
+        channels, height, width = _FP.get_image_dims(image)
+    else:
+        raise TypeError(f"unable to get image dimensions from object of type {type(image).__name__}")
+    return channels, height, width
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
@@ -1,5 +1,4 @@
 from torchvision.transforms import InterpolationMode  # usort: skip
-from ._utils import get_image_dims  # usort: skip
 from ._meta_conversion import (
     convert_bounding_box_format,
     convert_image_color_space_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -5,7 +5,6 @@
 import torch
 from torchvision.prototype import features
 from torchvision.prototype.transforms import InterpolationMode
-from torchvision.prototype.transforms.functional import get_image_dims
 from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP
 from torchvision.transforms.functional import pil_modes_mapping, _get_inverse_affine_matrix
 
@@ -40,7 +39,7 @@ def resize_image_tensor(
     antialias: Optional[bool] = None,
 ) -> torch.Tensor:
     new_height, new_width = size
-    num_channels, old_height, old_width = image.shape[-3:]
+    num_channels, old_height, old_width = _FT.get_image_dims(image)
     batch_shape = image.shape[:-3]
     return _FT.resize(
         image.reshape((-1, num_channels, old_height, old_width)),
@@ -142,7 +141,7 @@ def affine_image_tensor(
 
     center_f = [0.0, 0.0]
     if center is not None:
-        _, height, width = get_image_dims(img)
+        _, height, width = _FT.get_image_dims(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, (width, height))]
 
@@ -168,7 +167,7 @@ def affine_image_pil(
     # it is visually better to estimate the center without 0.5 offset
     # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
     if center is None:
-        _, height, width = get_image_dims(img)
+        _, height, width = _FP.get_image_dims(img)
         center = [width * 0.5, height * 0.5]
     matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
 
@@ -185,7 +184,7 @@ def rotate_image_tensor(
 ) -> torch.Tensor:
     center_f = [0.0, 0.0]
     if center is not None:
-        _, height, width = get_image_dims(img)
+        _, height, width = _FT.get_image_dims(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, (width, height))]
 
@@ -261,13 +260,13 @@ def _center_crop_compute_crop_anchor(
 
 def center_crop_image_tensor(img: torch.Tensor, output_size: List[int]) -> torch.Tensor:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
-    _, image_height, image_width = get_image_dims(img)
+    _, image_height, image_width = _FT.get_image_dims(img)
 
     if crop_height > image_height or crop_width > image_width:
         padding_ltrb = _center_crop_compute_padding(crop_height, crop_width, image_height, image_width)
         img = pad_image_tensor(img, padding_ltrb, fill=0)
 
-        _, image_height, image_width = get_image_dims(img)
+        _, image_height, image_width = _FT.get_image_dims(img)
         if crop_width == image_width and crop_height == image_height:
             return img
 
@@ -277,13 +276,13 @@ def center_crop_image_tensor(img: torch.Tensor, output_size: List[int]) -> torch
 
 def center_crop_image_pil(img: PIL.Image.Image, output_size: List[int]) -> PIL.Image.Image:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
-    _, image_height, image_width = get_image_dims(img)
+    _, image_height, image_width = _FP.get_image_dims(img)
 
     if crop_height > image_height or crop_width > image_width:
         padding_ltrb = _center_crop_compute_padding(crop_height, crop_width, image_height, image_width)
         img = pad_image_pil(img, padding_ltrb, fill=0)
 
-        _, image_height, image_width = get_image_dims(img)
+        _, image_height, image_width = _FP.get_image_dims(img)
         if crop_width == image_width and crop_height == image_height:
             return img
 
diff --git a/torchvision/prototype/transforms/functional/_utils.py b/torchvision/prototype/transforms/functional/_utils.py
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -20,6 +20,15 @@ def _is_pil_image(img: Any) -> bool:
         return isinstance(img, Image.Image)
 
 
+@torch.jit.unused
+def get_image_dims(img: Any) -> List[int]:
+    if _is_pil_image(img):
+        channels = len(img.getbands())
+        width, height = img.size
+        return [channels, height, width]
+    raise TypeError(f"Unexpected type {type(img)}")
+
+
 @torch.jit.unused
 def get_image_size(img: Any) -> List[int]:
     if _is_pil_image(img):
@@ -30,7 +39,7 @@ def get_image_size(img: Any) -> List[int]:
 @torch.jit.unused
 def get_image_num_channels(img: Any) -> int:
     if _is_pil_image(img):
-        return 1 if img.mode == "L" else 3
+        return len(img.getbands())
     raise TypeError(f"Unexpected type {type(img)}")
 
 
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -21,6 +21,13 @@ def _assert_threshold(img: Tensor, threshold: float) -> None:
         raise TypeError("Threshold should be less than bound of img.")
 
 
+def get_image_dims(img: Tensor) -> List[int]:
+    _assert_image_tensor(img)
+    channels = 1 if img.ndim == 2 else img.shape[-3]
+    height, width = img.shape[-2:]
+    return [channels, height, width]
+
+
 def get_image_size(img: Tensor) -> List[int]:
     # Returns (w, h) of tensor image
     _assert_image_tensor(img)

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`from torchvision.transforms import InterpolationMode # usort: skip`
`2`		`-from ._utils import get_image_dims # usort: skip`
`3`	`2`	`from ._meta_conversion import (`
`4`	`3`	`convert_bounding_box_format,`
`5`	`4`	`convert_image_color_space_tensor,`