From 0360975c1153a459ecafb40e87bdc4bc405ea141 Mon Sep 17 00:00:00 2001 From: PyExtreme Date: Tue, 26 Nov 2019 21:35:18 +0530 Subject: [PATCH 1/5] add scriptable transform: center_crop --- torchvision/transforms/functional_tensor.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index c741ab2e7e8..2fcc96746b4 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -125,6 +125,23 @@ def adjust_saturation(img, saturation_factor): return _blend(img, rgb_to_grayscale(img), saturation_factor) +def center_crop(img, output_size): + """Crop the Image Tensor and resize it to desired size. + + Args: + img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. + output_size (sequence or int): (height, width) of the crop box. If int, + it is used for both directions + Returns: + Tensor: Cropped image. + """ + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return crop(img, crop_top, crop_left, crop_height, crop_width) + + def _blend(img1, img2, ratio): bound = 1 if img1.dtype.is_floating_point else 255 return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype) From bae204052cc0646e182aaaab28db83995831af41 Mon Sep 17 00:00:00 2001 From: PyExtreme Date: Wed, 27 Nov 2019 05:51:33 +0530 Subject: [PATCH 2/5] add test: center_crop --- test/test_functional_tensor.py | 7 +++++++ torchvision/transforms/functional_tensor.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index e318420102b..77ef7035486 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -76,6 +76,13 @@ def test_rgb_to_grayscale(self): max_diff = (grayscale_tensor - grayscale_pil_img).abs().max() self.assertLess(max_diff, 1.0001) + def test_center_crop(self): + img_tensor = torch.randint(0, 255, (1, 32, 32), dtype=torch.uint8) + cropped_tensor = F_t.center_crop(img_tensor, [10, 10]) + cropped_pil_image = F.center_crop(transforms.ToPILImage()(img_tensor), [10, 10]) + cropped_pil_tensor = (transforms.ToTensor()(cropped_pil_image) * 255).to(torch.uint8) + self.assertTrue(torch.equal(cropped_tensor, cropped_pil_tensor)) + if __name__ == '__main__': unittest.main() diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 2fcc96746b4..3a00b570ee5 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -135,7 +135,7 @@ def center_crop(img, output_size): Returns: Tensor: Cropped image. """ - image_width, image_height = img.size + _, image_width, image_height = img.size() crop_height, crop_width = output_size crop_top = int(round((image_height - crop_height) / 2.)) crop_left = int(round((image_width - crop_width) / 2.)) From c6c3a8ca0b103d641d708421e873d5eb0a076310 Mon Sep 17 00:00:00 2001 From: PyExtreme Date: Wed, 27 Nov 2019 06:37:03 +0530 Subject: [PATCH 3/5] add scriptable transform: five_crop --- test/test_functional_tensor.py | 15 +++++++++++ torchvision/transforms/functional_tensor.py | 28 +++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 77ef7035486..286dcdd77ec 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -83,6 +83,21 @@ def test_center_crop(self): cropped_pil_tensor = (transforms.ToTensor()(cropped_pil_image) * 255).to(torch.uint8) self.assertTrue(torch.equal(cropped_tensor, cropped_pil_tensor)) + def test_five_crop(self): + img_tensor = torch.randint(0, 255, (1, 32, 32), dtype=torch.uint8) + cropped_tensor = F_t.five_crop(img_tensor, [10, 10]) + cropped_pil_image = F.five_crop(transforms.ToPILImage()(img_tensor), [10, 10]) + self.assertTrue(torch.equal(cropped_tensor[0], + (transforms.ToTensor()(cropped_pil_image[0]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[1], + (transforms.ToTensor()(cropped_pil_image[2]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[2], + (transforms.ToTensor()(cropped_pil_image[1]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[3], + (transforms.ToTensor()(cropped_pil_image[3]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[4], + (transforms.ToTensor()(cropped_pil_image[4]) * 255).to(torch.uint8))) + if __name__ == '__main__': unittest.main() diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 3a00b570ee5..8ecd763307b 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -141,6 +141,34 @@ def center_crop(img, output_size): crop_left = int(round((image_width - crop_width) / 2.)) return crop(img, crop_top, crop_left, crop_height, crop_width) +def five_crop(img, size): + """Crop the given Image Tensor into four corners and the central crop. + .. Note:: + This transform returns a tuple of Tensors and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + Returns: + tuple: tuple (tl, tr, bl, br, center) + Corresponding top left, top right, bottom left, bottom right and center crop. + """ + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + _, image_width, image_height = img.size() + crop_height, crop_width = size + if crop_width > image_width or crop_height > image_height: + msg = "Requested crop size {} is bigger than input size {}" + raise ValueError(msg.format(size, (image_height, image_width))) + + tl = crop(img, 0, 0, crop_width, crop_height) + tr = crop(img, image_width - crop_width, 0, image_width, crop_height) + bl = crop(img, 0, image_height - crop_height, crop_width, image_height) + br = crop(img, image_width - crop_width, image_height - crop_height, + image_width, image_height) + center = center_crop(img, (crop_height, crop_width)) + return (tl, tr, bl, br, center) + def _blend(img1, img2, ratio): bound = 1 if img1.dtype.is_floating_point else 255 From d7b14dcc041110633efdd808505bc80334b3018a Mon Sep 17 00:00:00 2001 From: PyExtreme Date: Wed, 27 Nov 2019 06:54:52 +0530 Subject: [PATCH 4/5] add scriptable transform: five_crop --- test/test_functional_tensor.py | 25 +++++++++++++++ torchvision/transforms/functional_tensor.py | 35 +++++++++++++++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 286dcdd77ec..e464bf733a8 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -98,6 +98,31 @@ def test_five_crop(self): self.assertTrue(torch.equal(cropped_tensor[4], (transforms.ToTensor()(cropped_pil_image[4]) * 255).to(torch.uint8))) + def test_ten_crop(self): + img_tensor = torch.randint(0, 255, (1, 32, 32), dtype=torch.uint8) + cropped_tensor = F_t.ten_crop(img_tensor, [10, 10]) + cropped_pil_image = F.ten_crop(transforms.ToPILImage()(img_tensor), [10, 10]) + self.assertTrue(torch.equal(cropped_tensor[0], + (transforms.ToTensor()(cropped_pil_image[0]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[1], + (transforms.ToTensor()(cropped_pil_image[2]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[2], + (transforms.ToTensor()(cropped_pil_image[1]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[3], + (transforms.ToTensor()(cropped_pil_image[3]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[4], + (transforms.ToTensor()(cropped_pil_image[4]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[5], + (transforms.ToTensor()(cropped_pil_image[5]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[6], + (transforms.ToTensor()(cropped_pil_image[7]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[7], + (transforms.ToTensor()(cropped_pil_image[6]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[8], + (transforms.ToTensor()(cropped_pil_image[8]) * 255).to(torch.uint8))) + self.assertTrue(torch.equal(cropped_tensor[9], + (transforms.ToTensor()(cropped_pil_image[9]) * 255).to(torch.uint8))) + if __name__ == '__main__': unittest.main() diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 8ecd763307b..4a572b61d83 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -132,6 +132,7 @@ def center_crop(img, output_size): img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. output_size (sequence or int): (height, width) of the crop box. If int, it is used for both directions + Returns: Tensor: Cropped image. """ @@ -141,6 +142,7 @@ def center_crop(img, output_size): crop_left = int(round((image_width - crop_width) / 2.)) return crop(img, crop_top, crop_left, crop_height, crop_width) + def five_crop(img, size): """Crop the given Image Tensor into four corners and the central crop. .. Note:: @@ -150,6 +152,7 @@ def five_crop(img, size): size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. + Returns: tuple: tuple (tl, tr, bl, br, center) Corresponding top left, top right, bottom left, bottom right and center crop. @@ -164,12 +167,40 @@ def five_crop(img, size): tl = crop(img, 0, 0, crop_width, crop_height) tr = crop(img, image_width - crop_width, 0, image_width, crop_height) bl = crop(img, 0, image_height - crop_height, crop_width, image_height) - br = crop(img, image_width - crop_width, image_height - crop_height, - image_width, image_height) + br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height) center = center_crop(img, (crop_height, crop_width)) return (tl, tr, bl, br, center) +def ten_crop(img, size, vertical_flip=False): + """Crop the given Image Tensor into four corners and the central crop plus the + flipped version of these (horizontal flipping is used by default). + .. Note:: + This transform returns a tuple of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + vertical_flip (bool): Use vertical flipping instead of horizontal + + Returns: + tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) + Corresponding top left, top right, bottom left, bottom right and center crop + and same for the flipped image. + """ + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + first_five = five_crop(img, size) + + if vertical_flip: + img = vflip(img) + else: + img = hflip(img) + + second_five = five_crop(img, size) + return first_five + second_five + + def _blend(img1, img2, ratio): bound = 1 if img1.dtype.is_floating_point else 255 return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype) From 11f1649390f112bed9de2cdc11e2adebe3ec6657 Mon Sep 17 00:00:00 2001 From: PyExtreme Date: Wed, 27 Nov 2019 08:01:23 +0530 Subject: [PATCH 5/5] add scriptable transform: fix minor issues --- torchvision/transforms/functional_tensor.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 4a572b61d83..bd56ae3a131 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -136,10 +136,14 @@ def center_crop(img, output_size): Returns: Tensor: Cropped image. """ + if not F._is_tensor_image(img): + raise TypeError('tensor is not a torch image.') + _, image_width, image_height = img.size() crop_height, crop_width = output_size crop_top = int(round((image_height - crop_height) / 2.)) crop_left = int(round((image_width - crop_width) / 2.)) + return crop(img, crop_top, crop_left, crop_height, crop_width) @@ -148,6 +152,7 @@ def five_crop(img, size): .. Note:: This transform returns a tuple of Tensors and there may be a mismatch in the number of inputs and targets your ``Dataset`` returns. + Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is @@ -157,7 +162,11 @@ def five_crop(img, size): tuple: tuple (tl, tr, bl, br, center) Corresponding top left, top right, bottom left, bottom right and center crop. """ + if not F._is_tensor_image(img): + raise TypeError('tensor is not a torch image.') + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + _, image_width, image_height = img.size() crop_height, crop_width = size if crop_width > image_width or crop_height > image_height: @@ -169,6 +178,7 @@ def five_crop(img, size): bl = crop(img, 0, image_height - crop_height, crop_width, image_height) br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height) center = center_crop(img, (crop_height, crop_width)) + return (tl, tr, bl, br, center) @@ -178,6 +188,7 @@ def ten_crop(img, size, vertical_flip=False): .. Note:: This transform returns a tuple of images and there may be a mismatch in the number of inputs and targets your ``Dataset`` returns. + Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is @@ -187,8 +198,11 @@ def ten_crop(img, size, vertical_flip=False): Returns: tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) Corresponding top left, top right, bottom left, bottom right and center crop - and same for the flipped image. + and same for the flipped image's tensor. """ + if not F._is_tensor_image(img): + raise TypeError('tensor is not a torch image.') + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." first_five = five_crop(img, size) @@ -198,6 +212,7 @@ def ten_crop(img, size, vertical_flip=False): img = hflip(img) second_five = five_crop(img, size) + return first_five + second_five