-
Notifications
You must be signed in to change notification settings - Fork 7.1k
Remove non-functional Transforms from presets #4952
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,7 @@ | |
import torch | ||
from torch import Tensor, nn | ||
|
||
from ... import transforms as T | ||
from ...transforms import functional as F | ||
from ...transforms import functional as F, InterpolationMode | ||
|
||
|
||
__all__ = ["CocoEval", "ImageNetEval", "Kinect400Eval", "VocEval"] | ||
|
@@ -26,42 +25,47 @@ def __init__( | |
resize_size: int = 256, | ||
mean: Tuple[float, ...] = (0.485, 0.456, 0.406), | ||
std: Tuple[float, ...] = (0.229, 0.224, 0.225), | ||
interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR, | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
) -> None: | ||
super().__init__() | ||
self._resize = T.Resize(resize_size, interpolation=interpolation) | ||
self._crop = T.CenterCrop(crop_size) | ||
self._normalize = T.Normalize(mean=mean, std=std) | ||
self._crop_size = [crop_size] | ||
self._size = [resize_size] | ||
self._mean = list(mean) | ||
self._std = list(std) | ||
self._interpolation = interpolation | ||
|
||
def forward(self, img: Tensor) -> Tensor: | ||
img = self._crop(self._resize(img)) | ||
img = F.resize(img, self._size, interpolation=self._interpolation) | ||
img = F.center_crop(img, self._crop_size) | ||
if not isinstance(img, Tensor): | ||
img = F.pil_to_tensor(img) | ||
img = F.convert_image_dtype(img, torch.float) | ||
return self._normalize(img) | ||
img = F.normalize(img, mean=self._mean, std=self._std) | ||
return img | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replacing with the functional equivalents. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's equivalent. Confirmed by checking the accuracy of models before and after. |
||
|
||
|
||
class Kinect400Eval(nn.Module): | ||
def __init__( | ||
self, | ||
resize_size: Tuple[int, int], | ||
crop_size: Tuple[int, int], | ||
resize_size: Tuple[int, int], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to this PR, but why do we use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good point. This is how the original recipes have been implemented, so I need to do this here too. If you see the resize operates differently if you specify both dimensions vs only 1. Prior merging we should consider adding unions etc here and cleaning up further. |
||
mean: Tuple[float, ...] = (0.43216, 0.394666, 0.37645), | ||
std: Tuple[float, ...] = (0.22803, 0.22145, 0.216989), | ||
interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR, | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
) -> None: | ||
super().__init__() | ||
self._convert = T.ConvertImageDtype(torch.float) | ||
self._resize = T.Resize(resize_size, interpolation=interpolation) | ||
self._normalize = T.Normalize(mean=mean, std=std) | ||
self._crop = T.CenterCrop(crop_size) | ||
self._crop_size = list(crop_size) | ||
self._size = list(resize_size) | ||
self._mean = list(mean) | ||
self._std = list(std) | ||
self._interpolation = interpolation | ||
|
||
def forward(self, vid: Tensor) -> Tensor: | ||
vid = vid.permute(0, 3, 1, 2) # (T, H, W, C) => (T, C, H, W) | ||
vid = self._convert(vid) | ||
vid = self._resize(vid) | ||
vid = self._normalize(vid) | ||
vid = self._crop(vid) | ||
vid = F.resize(vid, self._size, interpolation=self._interpolation) | ||
vid = F.center_crop(vid, self._crop_size) | ||
vid = F.convert_image_dtype(vid, torch.float) | ||
vid = F.normalize(vid, mean=self._mean, std=self._std) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reordered the operations. This is more "usual/canonical" order of ops. I'm running tests to confirm the accuracy remains the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The accuracy before and after the change remains the same. The above change is safe. |
||
return vid.permute(1, 0, 2, 3) # (T, C, H, W) => (C, T, H, W) | ||
|
||
|
||
|
@@ -71,8 +75,8 @@ def __init__( | |
resize_size: int, | ||
mean: Tuple[float, ...] = (0.485, 0.456, 0.406), | ||
std: Tuple[float, ...] = (0.229, 0.224, 0.225), | ||
interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR, | ||
interpolation_target: T.InterpolationMode = T.InterpolationMode.NEAREST, | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
interpolation_target: InterpolationMode = InterpolationMode.NEAREST, | ||
) -> None: | ||
super().__init__() | ||
self._size = [resize_size] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changing order to match the other presets.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just for appearance, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, just for styling. :)