Moving mixup and cutmix to references.

datumbox · datumbox · commit eb932b956dde · 2021-09-15T15:17:31.000+01:00
diff --git a/references/classification/train.py b/references/classification/train.py
@@ -10,6 +10,7 @@
 from torchvision.transforms.functional import InterpolationMode
 
 import presets
+import transforms
 import utils
 
 try:
@@ -170,9 +171,9 @@ def main(args):
     num_classes = len(dataset.classes)
     mixup_transforms = []
     if args.mixup_alpha > 0.0:
-        mixup_transforms.append(torchvision.transforms.RandomMixup(num_classes, p=1.0, alpha=args.mixup_alpha))
+        mixup_transforms.append(transforms.RandomMixup(num_classes, p=1.0, alpha=args.mixup_alpha))
     if args.cutmix_alpha > 0.0:
-        mixup_transforms.append(torchvision.transforms.RandomCutmix(num_classes, p=1.0, alpha=args.cutmix_alpha))
+        mixup_transforms.append(transforms.RandomCutmix(num_classes, p=1.0, alpha=args.cutmix_alpha))
     if mixup_transforms:
         mixupcutmix = torchvision.transforms.RandomChoice(mixup_transforms)
         collate_fn = lambda batch: mixupcutmix(*default_collate(batch))  # noqa: E731
diff --git a/references/classification/transforms.py b/references/classification/transforms.py
@@ -0,0 +1,175 @@
+import math
+import torch
+
+from typing import Tuple
+from torch import Tensor
+from torchvision.transforms import functional as F
+
+
+class RandomMixup(torch.nn.Module):
+    """Randomly apply Mixup to the provided batch and targets.
+    The class implements the data augmentations as described in the paper
+    `"mixup: Beyond Empirical Risk Minimization" <https://arxiv.org/abs/1710.09412>`_.
+
+    Args:
+        num_classes (int): number of classes used for one-hot encoding.
+        p (float): probability of the batch being transformed. Default value is 0.5.
+        alpha (float): hyperparameter of the Beta distribution used for mixup.
+            Default value is 1.0.
+        inplace (bool): boolean to make this transform inplace. Default set to False.
+    """
+
+    def __init__(self, num_classes: int,
+                 p: float = 0.5, alpha: float = 1.0,
+                 inplace: bool = False) -> None:
+        super().__init__()
+        assert num_classes > 0, "Please provide a valid positive value for the num_classes."
+        assert alpha > 0, "Alpha param can't be zero."
+
+        self.num_classes = num_classes
+        self.p = p
+        self.alpha = alpha
+        self.inplace = inplace
+
+    def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Args:
+            batch (Tensor): Float tensor of size (B, C, H, W)
+            target (Tensor): Integer tensor of size (B, )
+
+        Returns:
+            Tensor: Randomly transformed batch.
+        """
+        if batch.ndim != 4:
+            raise ValueError("Batch ndim should be 4. Got {}".format(batch.ndim))
+        elif target.ndim != 1:
+            raise ValueError("Target ndim should be 1. Got {}".format(target.ndim))
+        elif not batch.is_floating_point():
+            raise TypeError('Batch dtype should be a float tensor. Got {}.'.format(batch.dtype))
+        elif target.dtype != torch.int64:
+            raise TypeError("Target dtype should be torch.int64. Got {}".format(target.dtype))
+
+        if not self.inplace:
+            batch = batch.clone()
+            target = target.clone()
+
+        if target.ndim == 1:
+            target = torch.nn.functional.one_hot(target, num_classes=self.num_classes).to(dtype=torch.float32)
+
+        if torch.rand(1).item() >= self.p:
+            return batch, target
+
+        # It's faster to roll the batch by one instead of shuffling it to create image pairs
+        batch_rolled = batch.roll(1, 0)
+        target_rolled = target.roll(1)
+
+        # Implemented as on mixup paper, page 3.
+        lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0])
+        batch_rolled.mul_(1.0 - lambda_param)
+        batch.mul_(lambda_param).add_(batch_rolled)
+
+        target_rolled.mul_(1.0 - lambda_param)
+        target.mul_(lambda_param).add_(target_rolled)
+
+        return batch, target
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + '('
+        s += 'num_classes={num_classes}'
+        s += ', p={p}'
+        s += ', alpha={alpha}'
+        s += ', inplace={inplace}'
+        s += ')'
+        return s.format(**self.__dict__)
+
+
+class RandomCutmix(torch.nn.Module):
+    """Randomly apply Cutmix to the provided batch and targets.
+    The class implements the data augmentations as described in the paper
+    `"CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features"
+    <https://arxiv.org/abs/1905.04899>`_.
+
+    Args:
+        num_classes (int): number of classes used for one-hot encoding.
+        p (float): probability of the batch being transformed. Default value is 0.5.
+        alpha (float): hyperparameter of the Beta distribution used for cutmix.
+            Default value is 1.0.
+        inplace (bool): boolean to make this transform inplace. Default set to False.
+    """
+
+    def __init__(self, num_classes: int,
+                 p: float = 0.5, alpha: float = 1.0,
+                 inplace: bool = False) -> None:
+        super().__init__()
+        assert num_classes > 0, "Please provide a valid positive value for the num_classes."
+        assert alpha > 0, "Alpha param can't be zero."
+
+        self.num_classes = num_classes
+        self.p = p
+        self.alpha = alpha
+        self.inplace = inplace
+
+    def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Args:
+            batch (Tensor): Float tensor of size (B, C, H, W)
+            target (Tensor): Integer tensor of size (B, )
+
+        Returns:
+            Tensor: Randomly transformed batch.
+        """
+        if batch.ndim != 4:
+            raise ValueError("Batch ndim should be 4. Got {}".format(batch.ndim))
+        elif target.ndim != 1:
+            raise ValueError("Target ndim should be 1. Got {}".format(target.ndim))
+        elif not batch.is_floating_point():
+            raise TypeError('Batch dtype should be a float tensor. Got {}.'.format(batch.dtype))
+        elif target.dtype != torch.int64:
+            raise TypeError("Target dtype should be torch.int64. Got {}".format(target.dtype))
+
+        if not self.inplace:
+            batch = batch.clone()
+            target = target.clone()
+
+        if target.ndim == 1:
+            target = torch.nn.functional.one_hot(target, num_classes=self.num_classes).to(dtype=torch.float32)
+
+        if torch.rand(1).item() >= self.p:
+            return batch, target
+
+        # It's faster to roll the batch by one instead of shuffling it to create image pairs
+        batch_rolled = batch.roll(1, 0)
+        target_rolled = target.roll(1)
+
+        # Implemented as on cutmix paper, page 12 (with minor corrections on typos).
+        lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0])
+        W, H = F.get_image_size(batch)
+
+        r_x = torch.randint(W, (1,))
+        r_y = torch.randint(H, (1,))
+
+        r = 0.5 * math.sqrt(1.0 - lambda_param)
+        r_w_half = int(r * W)
+        r_h_half = int(r * H)
+
+        x1 = int(torch.clamp(r_x - r_w_half, min=0))
+        y1 = int(torch.clamp(r_y - r_h_half, min=0))
+        x2 = int(torch.clamp(r_x + r_w_half, max=W))
+        y2 = int(torch.clamp(r_y + r_h_half, max=H))
+
+        batch[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2]
+        lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H))
+
+        target_rolled.mul_(1.0 - lambda_param)
+        target.mul_(lambda_param).add_(target_rolled)
+
+        return batch, target
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + '('
+        s += 'num_classes={num_classes}'
+        s += ', p={p}'
+        s += ', alpha={alpha}'
+        s += ', inplace={inplace}'
+        s += ')'
+        return s.format(**self.__dict__)
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -1,10 +1,6 @@
 import os
 import torch
-from torch._utils_internal import get_file_path_2
-from torch.utils.data import TensorDataset, DataLoader
-from torch.utils.data.dataloader import default_collate
 from torchvision import transforms as T
-from torchvision.io import read_image
 from torchvision.transforms import functional as F
 from torchvision.transforms import InterpolationMode
 
@@ -719,78 +715,3 @@ def test_gaussian_blur(device, meth_kwargs):
         T.GaussianBlur, meth_kwargs=meth_kwargs,
         test_exact_match=False, device=device, agg_method="max", tol=tol
     )
-
-
-@pytest.mark.parametrize('device', cpu_and_gpu())
-@pytest.mark.parametrize('tranform', [T.RandomMixup, T.RandomCutmix])
-@pytest.mark.parametrize('p', [0.0, 1.0])
-@pytest.mark.parametrize('inplace', [True, False])
-def test_random_mixupcutmix(device, tranform, p, inplace):
-    batch_size = 32
-    num_classes = 10
-    batch = torch.rand(batch_size, 3, 44, 56, device=device)
-    targets = torch.randint(num_classes, (batch_size, ), device=device, dtype=torch.int64)
-
-    fn = tranform(num_classes, p=p, inplace=inplace)
-    scripted_fn = torch.jit.script(fn)
-
-    seed = torch.seed()
-    output = fn(batch.clone(), targets.clone())
-
-    torch.manual_seed(seed)
-    output_scripted = scripted_fn(batch.clone(), targets.clone())
-    assert_equal(output[0], output_scripted[0])
-    assert_equal(output[1], output_scripted[1])
-
-    fn.__repr__()
-
-
-@pytest.mark.parametrize('tranform', [T.RandomMixup, T.RandomCutmix])
-def test_random_mixupcutmix_with_invalid_data(tranform):
-    with pytest.raises(AssertionError, match="Please provide a valid positive value for the num_classes."):
-        tranform(0)
-    with pytest.raises(AssertionError, match="Alpha param can't be zero."):
-        tranform(10, alpha=0.0)
-
-    t = tranform(10)
-    with pytest.raises(ValueError, match="Batch ndim should be 4."):
-        t(torch.rand(3, 60, 60), torch.randint(10, (1, )))
-    with pytest.raises(ValueError, match="Target ndim should be 1."):
-        t(torch.rand(32, 3, 60, 60), torch.randint(10, (32, 1)))
-    with pytest.raises(TypeError, match="Batch dtype should be a float tensor."):
-        t(torch.randint(256, (32, 3, 60, 60), dtype=torch.uint8), torch.randint(10, (32, )))
-    with pytest.raises(TypeError, match="Target dtype should be torch.int64."):
-        t(torch.rand(32, 3, 60, 60), torch.randint(10, (32, ), dtype=torch.int32))
-
-
-@pytest.mark.parametrize('device', cpu_and_gpu())
-@pytest.mark.parametrize('transform, expected', [
-    (T.RandomMixup, [60.77401351928711, 0.5151033997535706]),
-    (T.RandomCutmix, [70.13909912109375, 0.525851309299469])
-])
-def test_random_mixupcutmix_with_real_data(device, transform, expected):
-    torch.manual_seed(12)
-
-    # Build dummy dataset
-    images = []
-    for test_file in [("encode_jpeg", "grace_hopper_517x606.jpg"), ("fakedata", "logos", "rgb_pytorch.png")]:
-        fullpath = (os.path.dirname(os.path.abspath(__file__)), 'assets') + test_file
-        img = read_image(get_file_path_2(*fullpath))
-        images.append(F.resize(img, [224, 224]))
-    dataset = TensorDataset(torch.stack(images).to(device=device, dtype=torch.float32),
-                            torch.tensor([0, 1], device=device))
-
-    # Use mixup in the collate
-    trans = transform(2)
-    dataloader = DataLoader(dataset, batch_size=2, collate_fn=lambda batch: trans(*default_collate(batch)))
-
-    # Test against known statistics about the produced images
-    stats = []
-    for _ in range(25):
-        for b, t in dataloader:
-            stats.append(torch.stack([b.std(), t.std()]))
-
-    torch.testing.assert_close(
-        torch.stack(stats).mean(dim=0),
-        torch.tensor(expected, device=device)
-    )
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py