Separate Mixup from Cutmix.

datumbox · datumbox · commit 3f199027e647 · 2021-09-13T18:29:32.000+01:00
diff --git a/references/classification/train.py b/references/classification/train.py
@@ -168,9 +168,14 @@ def main(args):
     dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, args)
 
     collate_fn = None
-    if args.mixup_alpha > 0.0 or args.cutmix_alpha > 0.0:
-        mixupcutmix = torchvision.transforms.RandomMixupCutmix(len(dataset.classes), mixup_alpha=args.mixup_alpha,
-                                                               cutmix_alpha=args.cutmix_alpha)
+    num_classes = len(dataset.classes)
+    mixup_transforms = []
+    if args.mixup_alpha > 0.0:
+        mixup_transforms.append(torchvision.transforms.RandomMixup(num_classes, p=1.0, alpha=args.mixup_alpha))
+    if args.cutmix_alpha > 0.0:
+        mixup_transforms.append(torchvision.transforms.RandomCutmix(num_classes, p=1.0, alpha=args.cutmix_alpha))
+    if mixup_transforms:
+        mixupcutmix = torchvision.transforms.RandomChoice(mixup_transforms, p=[0.5, 0.5])
         collate_fn = lambda batch: mixupcutmix(*default_collate(batch))  # noqa: E731
     data_loader = torch.utils.data.DataLoader(
         dataset, batch_size=args.batch_size,
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1311,7 +1311,8 @@ def test_random_choice():
             transforms.Resize(15),
             transforms.Resize(20),
             transforms.CenterCrop(10)
-        ]
+        ],
+        [1 / 3, 1 / 3, 1 / 3]
     )
     img = transforms.ToPILImage()(torch.rand(3, 25, 25))
     num_samples = 250
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -2,6 +2,7 @@
 import torch
 from torch._utils_internal import get_file_path_2
 from torch.utils.data import TensorDataset, DataLoader
+from torch.utils.data.dataloader import default_collate
 from torchvision import transforms as T
 from torchvision.io import read_image
 from torchvision.transforms import functional as F
@@ -721,21 +722,16 @@ def test_gaussian_blur(device, meth_kwargs):
 
 
 @pytest.mark.parametrize('device', cpu_and_gpu())
-@pytest.mark.parametrize('alphas', [
-    {"mixup_alpha": 1.0, "cutmix_alpha": 1.0, 'cutmix_p': 1.0},
-    {"mixup_alpha": 1.0, "cutmix_alpha": 1.0, 'cutmix_p': 0.0},
-    {"mixup_alpha": 1.0, "cutmix_alpha": 1.0, 'p': 0.0},
-    {"mixup_alpha": 0.0, "cutmix_alpha": 1.0},
-    {"mixup_alpha": 1.0, "cutmix_alpha": 0.0},
-])
+@pytest.mark.parametrize('tranform', [T.RandomMixup, T.RandomCutmix])
+@pytest.mark.parametrize('p', [0.0, 1.0])
 @pytest.mark.parametrize('inplace', [True, False])
-def test_random_mixupcutmix(device, alphas, inplace):
+def test_random_mixupcutmix(device, tranform, p, inplace):
     batch_size = 32
     num_classes = 10
     batch = torch.rand(batch_size, 3, 44, 56, device=device)
     targets = torch.randint(num_classes, (batch_size, ), device=device, dtype=torch.int64)
 
-    fn = T.RandomMixupCutmix(num_classes, inplace=inplace, **alphas)
+    fn = tranform(num_classes, p=p, inplace=inplace)
     scripted_fn = torch.jit.script(fn)
 
     seed = torch.seed()
@@ -749,13 +745,14 @@ def test_random_mixupcutmix(device, alphas, inplace):
     fn.__repr__()
 
 
-def test_random_mixupcutmix_with_invalid_data():
+@pytest.mark.parametrize('tranform', [T.RandomMixup, T.RandomCutmix])
+def test_random_mixupcutmix_with_invalid_data(tranform):
     with pytest.raises(AssertionError, match="Please provide a valid positive value for the num_classes."):
-        T.RandomMixupCutmix(0)
-    with pytest.raises(AssertionError, match="Both alpha params can't be zero."):
-        T.RandomMixupCutmix(10, mixup_alpha=0.0, cutmix_alpha=0.0)
+        tranform(0)
+    with pytest.raises(AssertionError, match="Alpha param can't be zero."):
+        tranform(10, alpha=0.0)
 
-    t = T.RandomMixupCutmix(10)
+    t = tranform(10)
     with pytest.raises(ValueError, match="Batch ndim should be 4."):
         t(torch.rand(3, 60, 60), torch.randint(10, (1, )))
     with pytest.raises(ValueError, match="Target ndim should be 1."):
@@ -765,7 +762,11 @@ def test_random_mixupcutmix_with_invalid_data():
 
 
 @pytest.mark.parametrize('device', cpu_and_gpu())
-def test_random_mixupcutmix_with_real_data(device):
+@pytest.mark.parametrize('transform, expected', [
+    (T.RandomMixup, [60.77401351928711, 0.5151033997535706]),
+    (T.RandomCutmix, [70.13909912109375, 0.525851309299469])
+])
+def test_random_mixupcutmix_with_real_data(device, transform, expected):
     torch.manual_seed(12)
 
     # Build dummy dataset
@@ -778,17 +779,16 @@ def test_random_mixupcutmix_with_real_data(device):
                             torch.tensor([0, 1], device=device))
 
     # Use mixup in the collate
-    mixup = T.RandomMixupCutmix(2, cutmix_alpha=1.0, mixup_alpha=1.0)
-    dataloader = DataLoader(dataset, batch_size=2,
-                            collate_fn=lambda batch: mixup(*(torch.stack(x) for x in zip(*batch))))
+    trans = transform(2)
+    dataloader = DataLoader(dataset, batch_size=2, collate_fn=lambda batch: trans(*default_collate(batch)))
 
     # Test against known statistics about the produced images
     stats = []
     for _ in range(25):
         for b, t in dataloader:
-            stats.append(torch.stack([b.mean(), b.std(), t.std()]))
+            stats.append(torch.stack([b.std(), t.std()]))
 
     torch.testing.assert_close(
         torch.stack(stats).mean(dim=0),
-        torch.tensor([46.9443473815918, 64.79092407226562, 0.459820032119751])
+        torch.tensor(expected)
     )
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -22,7 +22,8 @@
            "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop",
            "LinearTransformation", "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale",
            "RandomPerspective", "RandomErasing", "GaussianBlur", "InterpolationMode", "RandomInvert", "RandomPosterize",
-           "RandomSolarize", "RandomAdjustSharpness", "RandomAutocontrast", "RandomEqualize", 'RandomMixupCutmix']
+           "RandomSolarize", "RandomAdjustSharpness", "RandomAutocontrast", "RandomEqualize", 'RandomMixup',
+           "RandomCutmix"]
 
 
 class Compose:
@@ -515,9 +516,20 @@ def __call__(self, img):
 class RandomChoice(RandomTransforms):
     """Apply single transformation randomly picked from a list. This transform does not support torchscript.
     """
-    def __call__(self, img):
-        t = random.choice(self.transforms)
-        return t(img)
+    def __init__(self, transforms, p=None):
+        super().__init__(transforms)
+        if p is not None and not isinstance(p, Sequence):
+            raise TypeError("Argument transforms should be a sequence")
+        self.p = p
+
+    def __call__(self, *args):
+        t = random.choices(self.transforms, weights=self.p)[0]
+        return t(*args)
+
+    def __repr__(self):
+        format_string = super().__repr__()
+        format_string += '(p={0})'.format(self.p)
+        return format_string
 
 
 class RandomCrop(torch.nn.Module):
@@ -1956,38 +1968,103 @@ def __repr__(self):
 
 
 # TODO: move this to references before merging and delete the tests
-class RandomMixupCutmix(torch.nn.Module):
-    """Randomly apply Mixup or Cutmix to the provided batch and targets.
-    The class implements the data augmentations as described in the papers
-    `"mixup: Beyond Empirical Risk Minimization" <https://arxiv.org/abs/1710.09412>`_ and
+class RandomMixup(torch.nn.Module):
+    """Randomly apply Mixup to the provided batch and targets.
+    The class implements the data augmentations as described in the paper
+    `"mixup: Beyond Empirical Risk Minimization" <https://arxiv.org/abs/1710.09412>`_.
+
+    Args:
+        num_classes (int): number of classes used for one-hot encoding.
+        p (float): probability of the batch being transformed. Default value is 0.5.
+        alpha (float): hyperparameter of the Beta distribution used for mixup.
+            Default value is 1.0.
+        inplace (bool): boolean to make this transform inplace. Default set to False.
+    """
+
+    def __init__(self, num_classes: int,
+                 p: float = 0.5, alpha: float = 1.0,
+                 inplace: bool = False) -> None:
+        super().__init__()
+        assert num_classes > 0, "Please provide a valid positive value for the num_classes."
+        assert alpha > 0, "Alpha param can't be zero."
+
+        self.num_classes = num_classes
+        self.p = p
+        self.alpha = alpha
+        self.inplace = inplace
+
+    def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Args:
+            batch (Tensor): Float tensor of size (B, C, H, W)
+            target (Tensor): Integer tensor of size (B, )
+
+        Returns:
+            Tensor: Randomly transformed batch.
+        """
+        if batch.ndim != 4:
+            raise ValueError("Batch ndim should be 4. Got {}".format(batch.ndim))
+        elif target.ndim != 1:
+            raise ValueError("Target ndim should be 1. Got {}".format(target.ndim))
+        elif target.dtype != torch.int64:
+            raise ValueError("Target dtype should be torch.int64. Got {}".format(target.dtype))
+
+        if not self.inplace:
+            batch = batch.clone()
+            # target = target.clone()
+
+        target = torch.nn.functional.one_hot(target, num_classes=self.num_classes).to(dtype=torch.float32)
+        if torch.rand(1).item() >= self.p:
+            return batch, target
+
+        # It's faster to roll the batch by one instead of shuffling it to create image pairs
+        batch_rolled = batch.roll(1, 0)
+        target_rolled = target.roll(1)
+
+        # Implemented as on mixup paper, page 3.
+        lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0])
+        batch_rolled.mul_(1.0 - lambda_param)
+        batch.mul_(lambda_param).add_(batch_rolled)
+
+        target_rolled.mul_(1.0 - lambda_param)
+        target.mul_(lambda_param).add_(target_rolled)
+
+        return batch, target
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + '('
+        s += 'num_classes={num_classes}'
+        s += ', p={p}'
+        s += ', alpha={alpha}'
+        s += ', inplace={inplace}'
+        s += ')'
+        return s.format(**self.__dict__)
+
+
+class RandomCutmix(torch.nn.Module):
+    """Randomly apply Cutmix to the provided batch and targets.
+    The class implements the data augmentations as described in the paper
     `"CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features"
     <https://arxiv.org/abs/1905.04899>`_.
 
     Args:
         num_classes (int): number of classes used for one-hot encoding.
-        p (float): probability of the batch being transformed. Default value is 1.0.
-        mixup_alpha (float): hyperparameter of the Beta distribution used for mixup.
-            Set to 0.0 to turn off. Default value is 1.0.
-        cutmix_p (float): probability of using cutmix instead of mixup when both are on.
-            Default value is 0.5.
-        cutmix_alpha (float): hyperparameter of the Beta distribution used for cutmix.
-            Set to 0.0 to turn off. Default value is 0.0.
+        p (float): probability of the batch being transformed. Default value is 0.5.
+        alpha (float): hyperparameter of the Beta distribution used for cutmix.
+            Default value is 1.0.
         inplace (bool): boolean to make this transform inplace. Default set to False.
     """
 
     def __init__(self, num_classes: int,
-                 p: float = 1.0, mixup_alpha: float = 1.0,
-                 cutmix_p: float = 0.5, cutmix_alpha: float = 0.0,
+                 p: float = 0.5, alpha: float = 1.0,
                  inplace: bool = False) -> None:
         super().__init__()
         assert num_classes > 0, "Please provide a valid positive value for the num_classes."
-        assert mixup_alpha > 0 or cutmix_alpha > 0, "Both alpha params can't be zero."
+        assert alpha > 0, "Alpha param can't be zero."
 
         self.num_classes = num_classes
         self.p = p
-        self.mixup_alpha = mixup_alpha
-        self.cutmix_p = cutmix_p
-        self.cutmix_alpha = cutmix_alpha
+        self.alpha = alpha
         self.inplace = inplace
 
     def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
@@ -2018,35 +2095,24 @@ def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         batch_rolled = batch.roll(1, 0)
         target_rolled = target.roll(1)
 
-        if self.mixup_alpha <= 0.0:
-            use_mixup = False
-        else:
-            use_mixup = self.cutmix_alpha <= 0.0 or torch.rand(1).item() >= self.cutmix_p
-
-        if use_mixup:
-            # Implemented as on mixup paper, page 3.
-            lambda_param = float(torch._sample_dirichlet(torch.tensor([self.mixup_alpha, self.mixup_alpha]))[0])
-            batch_rolled.mul_(1.0 - lambda_param)
-            batch.mul_(lambda_param).add_(batch_rolled)
-        else:
-            # Implemented as on cutmix paper, page 12 (with minor corrections on typos).
-            lambda_param = float(torch._sample_dirichlet(torch.tensor([self.cutmix_alpha, self.cutmix_alpha]))[0])
-            W, H = F.get_image_size(batch)
+        # Implemented as on cutmix paper, page 12 (with minor corrections on typos).
+        lambda_param = float(torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0])
+        W, H = F.get_image_size(batch)
 
-            r_x = torch.randint(W, (1,))
-            r_y = torch.randint(H, (1,))
+        r_x = torch.randint(W, (1,))
+        r_y = torch.randint(H, (1,))
 
-            r = 0.5 * math.sqrt(1.0 - lambda_param)
-            r_w_half = int(r * W)
-            r_h_half = int(r * H)
+        r = 0.5 * math.sqrt(1.0 - lambda_param)
+        r_w_half = int(r * W)
+        r_h_half = int(r * H)
 
-            x1 = int(torch.clamp(r_x - r_w_half, min=0))
-            y1 = int(torch.clamp(r_y - r_h_half, min=0))
-            x2 = int(torch.clamp(r_x + r_w_half, max=W))
-            y2 = int(torch.clamp(r_y + r_h_half, max=H))
+        x1 = int(torch.clamp(r_x - r_w_half, min=0))
+        y1 = int(torch.clamp(r_y - r_h_half, min=0))
+        x2 = int(torch.clamp(r_x + r_w_half, max=W))
+        y2 = int(torch.clamp(r_y + r_h_half, max=H))
 
-            batch[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2]
-            lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H))
+        batch[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2]
+        lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H))
 
         target_rolled.mul_(1.0 - lambda_param)
         target.mul_(lambda_param).add_(target_rolled)
@@ -2057,9 +2123,7 @@ def __repr__(self) -> str:
         s = self.__class__.__name__ + '('
         s += 'num_classes={num_classes}'
         s += ', p={p}'
-        s += ', mixup_alpha={mixup_alpha}'
-        s += ', cutmix_p={cutmix_p}'
-        s += ', cutmix_alpha={cutmix_alpha}'
+        s += ', alpha={alpha}'
         s += ', inplace={inplace}'
         s += ')'
         return s.format(**self.__dict__)