From 5ef52c68a087f617ff2384b3a81263a35f386457 Mon Sep 17 00:00:00 2001 From: Felix Wu Date: Wed, 8 Mar 2017 00:43:13 -0500 Subject: [PATCH 1/5] add camvid --- torchvision/datasets/__init__.py | 1 + torchvision/datasets/camvid.py | 118 +++++++++++++++++++++++ torchvision/joint_transforms.py | 155 +++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 torchvision/datasets/camvid.py create mode 100644 torchvision/joint_transforms.py diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 330dc8be4e5..89351f74fac 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -4,6 +4,7 @@ from .cifar import CIFAR10, CIFAR100 from .stl10 import STL10 from .mnist import MNIST +from .camvid import CamVid __all__ = ('LSUN', 'LSUNClass', 'ImageFolder', diff --git a/torchvision/datasets/camvid.py b/torchvision/datasets/camvid.py new file mode 100644 index 00000000000..706b6bdf1a4 --- /dev/null +++ b/torchvision/datasets/camvid.py @@ -0,0 +1,118 @@ +from __future__ import print_function + +import os +import torch +import torch.utils.data as data +import numpy as np +from PIL import Image +from torchvision.datasets.folder import is_image_file, default_loader + + +classes = ['Sky', 'Building', 'Column-Pole', 'Road', + 'Sidewalk', 'Tree', 'Sign-Symbol', 'Fence', 'Car', 'Pedestrain', + 'Bicyclist', 'Void'] + +# weights when using median frequency balancing used in SegNet paper +# https://arxiv.org/pdf/1511.00561.pdf +# The numbers were generated by https://github.com/yandex/segnet-torch/blob/master/datasets/camvid-gen.lua +class_weight = [0.58872014284134, 0.51052379608154, 2.6966278553009, 0.45021694898605, 1.1785038709641, + 0.77028578519821, 2.4782588481903, 2.5273461341858, 1.0122526884079, 3.2375309467316, + 4.1312313079834, 0] +# mean and std +mean = [0.41189489566336, 0.4251328133025, 0.4326707089857] +std = [0.27413549931506, 0.28506257482912, 0.28284674400252] + +class_color = [ + (128, 128, 128), + (128, 0, 0), + (192, 192, 128), + (128, 64, 128), + (0, 0, 192), + (128, 128, 0), + (192, 128, 128), + (64, 64, 128), + (64, 0, 128), + (64, 64, 0), + (0, 128, 192), + (0, 0, 0), +] + + +def make_dataset(dir): + images = [] + for root, _, fnames in sorted(os.walk(dir)): + for fname in fnames: + if is_image_file(fname): + path = os.path.join(root, fname) + item = path + images.append(item) + return images + + +def LabelToLongTensor(pic): + label = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) + label = label.view(pic.size[1], pic.size[0], 1) + label = label.transpose(0, 1).transpose(0, 2).squeeze().contiguous().long() + return label + + +def LabelToPILImage(label): + label = label.unsqueeze(0) + colored_label = torch.zeros(3, label.size(1), label.size(2)).byte() + for i, color in enumerate(class_color): + mask = label.eq(i) + for j in range(3): + colored_label[j].masked_fill_(mask, color[j]) + npimg = colored_label.numpy() + npimg = np.transpose(npimg, (1, 2, 0)) + mode = None + if npimg.shape[2] == 1: + npimg = npimg[:, :, 0] + mode = "L" + + return Image.fromarray(npimg, mode=mode) + + +class CamVid(data.Dataset): + + def __init__(self, root, split='train', joint_transform=None, + transform=None, download=False, + loader=default_loader): + self.root = root + assert split in ('train', 'val', 'test') + self.split = split + self.transform = transform + self.joint_transform = joint_transform + self.loader = loader + self.class_weight = class_weight + self.classes = classes + self.class_weight = class_weight + self.mean = mean + self.std = std + + if download: + self.download() + + self.imgs = make_dataset(os.path.join(self.root, self.split)) + + def __getitem__(self, index): + path = self.imgs[index] + img = self.loader(path) + target = Image.open(path.replace(self.split, self.split + 'annot')) + + if self.joint_transform is not None: + img, target = self.joint_transform([img, target]) + + if self.transform is not None: + img = self.transform(img) + + target = LabelToLongTensor(target) + return img, target + + def __len__(self): + return len(self.imgs) + + def download(self): + # TODO: please download the dataset from + # https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid + raise NotImplementedError diff --git a/torchvision/joint_transforms.py b/torchvision/joint_transforms.py new file mode 100644 index 00000000000..1c08c15b9e8 --- /dev/null +++ b/torchvision/joint_transforms.py @@ -0,0 +1,155 @@ +from __future__ import division +import torch +import math +import random +from PIL import Image, ImageOps +import numpy as np +import numbers +import types + +class JointScale(object): + """Rescales the input PIL.Image to the given 'size'. + 'size' will be the size of the smaller edge. + For example, if height > width, then image will be + rescaled to (size * height / width, size) + size: size of the smaller edge + interpolation: Default: PIL.Image.BILINEAR + """ + + def __init__(self, size, interpolation=Image.BILINEAR): + self.size = size + self.interpolation = interpolation + + def __call__(self, imgs): + w, h = imgs[0].size + if (w <= h and w == self.size) or (h <= w and h == self.size): + return img + if w < h: + ow = self.size + oh = int(self.size * h / w) + return [img.resize((ow, oh), self.interpolation) for img in imgs] + else: + oh = self.size + ow = int(self.size * w / h) + return [img.resize((ow, oh), self.interpolation) for img in imgs] + + +class JointCenterCrop(object): + """Crops the given PIL.Image at the center to have a region of + the given size. size can be a tuple (target_height, target_width) + or an integer, in which case the target will be of a square shape (size, size) + """ + + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, imgs): + w, h = imgs[0].size + th, tw = self.size + x1 = int(round((w - tw) / 2.)) + y1 = int(round((h - th) / 2.)) + return [img.crop((x1, y1, x1 + tw, y1 + th)) for img in imgs] + + +class JointPad(object): + """Pads the given PIL.Image on all sides with the given "pad" value""" + + def __init__(self, padding, fill=0): + assert isinstance(padding, numbers.Number) + assert isinstance(fill, numbers.Number) or isinstance(fill, str) or isinstance(fill, tuple) + self.padding = padding + self.fill = fill + + def __call__(self, imgs): + return [ImageOps.expand(img, border=self.padding, fill=self.fill) for img in imgs] + + +class JointLambda(object): + """Applies a lambda as a transform.""" + + def __init__(self, lambd): + assert isinstance(lambd, types.LambdaType) + self.lambd = lambd + + def __call__(self, imgs): + return [self.lambd(img) for img in imgs] + + + +class JointRandomCrop(object): + """Crops the given list of PIL.Image at a random location to have a region of + the given size. size can be a tuple (target_height, target_width) + or an integer, in which case the target will be of a square shape (size, size) + """ + + def __init__(self, size, padding=0): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __call__(self, imgs): + if self.padding > 0: + imgs = [ImageOps.expand(img, border=self.padding, fill=0) for img in imgs] + + w, h = imgs[0].size + th, tw = self.size + if w == tw and h == th: + return imgs + + x1 = random.randint(0, w - tw) + y1 = random.randint(0, h - th) + return [img.crop((x1, y1, x1 + tw, y1 + th)) for img in imgs] + + +class JointRandomHorizontalFlip(object): + """Randomly horizontally flips the given list of PIL.Image with a probability of 0.5 + """ + + def __call__(self, imgs): + if random.random() < 0.5: + return [img.transpose(Image.FLIP_LEFT_RIGHT) for img in imgs] + return imgs + + +class JointRandomSizedCrop(object): + """Random crop the given list of PIL.Image to a random size of (0.08 to 1.0) of the original size + and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio + This is popularly used to train the Inception networks + size: size of the smaller edge + interpolation: Default: PIL.Image.BILINEAR + """ + + def __init__(self, size, interpolation=Image.BILINEAR): + self.size = size + self.interpolation = interpolation + + def __call__(self, imgs): + for attempt in range(10): + area = imgs[0].size[0] * imgs[0].size[1] + target_area = random.uniform(0.08, 1.0) * area + aspect_ratio = random.uniform(3. / 4, 4. / 3) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if random.random() < 0.5: + w, h = h, w + + if w <= imgs[0].size[0] and h <= imgs[0].size[1]: + x1 = random.randint(0, imgs[0].size[0] - w) + y1 = random.randint(0, imgs[0].size[1] - h) + + imgs = [img.crop((x1, y1, x1 + w, y1 + h)) for img in imgs] + assert(imgs[0].size == (w, h)) + + return [img.resize((self.size, self.size), self.interpolation) for img in imgs] + + # Fallback + scale = JointScale(self.size, interpolation=self.interpolation) + crop = JointCenterCrop(self.size) + return crop(scale(img)) From 2ab4ee6f9ad08f85118a4ca5c16d5eddcb93e84e Mon Sep 17 00:00:00 2001 From: Felix Wu Date: Wed, 8 Mar 2017 01:00:33 -0500 Subject: [PATCH 2/5] fix bug --- torchvision/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/__init__.py b/torchvision/__init__.py index 5f8137910b4..5624054e7cd 100644 --- a/torchvision/__init__.py +++ b/torchvision/__init__.py @@ -1,4 +1,5 @@ from torchvision import models from torchvision import datasets from torchvision import transforms +from torchvision import joint_transforms from torchvision import utils From cf491d301f62ae9c77ff7250fb7def5cd55ec963 Mon Sep 17 00:00:00 2001 From: Felix Wu Date: Wed, 8 Mar 2017 01:15:57 -0500 Subject: [PATCH 3/5] add fix bugs in joint_transforms --- torchvision/joint_transforms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/joint_transforms.py b/torchvision/joint_transforms.py index 1c08c15b9e8..a5ffe3d1358 100644 --- a/torchvision/joint_transforms.py +++ b/torchvision/joint_transforms.py @@ -7,6 +7,7 @@ import numbers import types + class JointScale(object): """Rescales the input PIL.Image to the given 'size'. 'size' will be the size of the smaller edge. @@ -23,7 +24,7 @@ def __init__(self, size, interpolation=Image.BILINEAR): def __call__(self, imgs): w, h = imgs[0].size if (w <= h and w == self.size) or (h <= w and h == self.size): - return img + return imgs if w < h: ow = self.size oh = int(self.size * h / w) @@ -78,7 +79,6 @@ def __call__(self, imgs): return [self.lambd(img) for img in imgs] - class JointRandomCrop(object): """Crops the given list of PIL.Image at a random location to have a region of the given size. size can be a tuple (target_height, target_width) @@ -152,4 +152,4 @@ def __call__(self, imgs): # Fallback scale = JointScale(self.size, interpolation=self.interpolation) crop = JointCenterCrop(self.size) - return crop(scale(img)) + return crop(scale(imgs)) From 93c586dd6c8c97589064b2a4d7c96770cadeadbf Mon Sep 17 00:00:00 2001 From: Felix Wu Date: Sat, 18 Mar 2017 18:29:47 -0400 Subject: [PATCH 4/5] make function private --- torchvision/datasets/camvid.py | 61 +++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/torchvision/datasets/camvid.py b/torchvision/datasets/camvid.py index 706b6bdf1a4..70f0ddd5622 100644 --- a/torchvision/datasets/camvid.py +++ b/torchvision/datasets/camvid.py @@ -5,7 +5,7 @@ import torch.utils.data as data import numpy as np from PIL import Image -from torchvision.datasets.folder import is_image_file, default_loader +from .folder import is_image_file, default_loader classes = ['Sky', 'Building', 'Column-Pole', 'Road', @@ -38,7 +38,7 @@ ] -def make_dataset(dir): +def _make_dataset(dir): images = [] for root, _, fnames in sorted(os.walk(dir)): for fname in fnames: @@ -49,39 +49,46 @@ def make_dataset(dir): return images -def LabelToLongTensor(pic): - label = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) - label = label.view(pic.size[1], pic.size[0], 1) - label = label.transpose(0, 1).transpose(0, 2).squeeze().contiguous().long() - return label - - -def LabelToPILImage(label): - label = label.unsqueeze(0) - colored_label = torch.zeros(3, label.size(1), label.size(2)).byte() - for i, color in enumerate(class_color): - mask = label.eq(i) - for j in range(3): - colored_label[j].masked_fill_(mask, color[j]) - npimg = colored_label.numpy() - npimg = np.transpose(npimg, (1, 2, 0)) - mode = None - if npimg.shape[2] == 1: - npimg = npimg[:, :, 0] - mode = "L" - - return Image.fromarray(npimg, mode=mode) +class LabelToLongTensor(object): + def __call__(self, pic): + if isinstance(pic, np.ndarray): + # handle numpy array + label = torch.from_numpy(pic).long() + else: + label = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) + label = label.view(pic.size[1], pic.size[0], 1) + label = label.transpose(0, 1).transpose(0, 2).squeeze().contiguous().long() + return label + + +class LabelTensorToPILImage(object): + def __call__(self, label): + label = label.unsqueeze(0) + colored_label = torch.zeros(3, label.size(1), label.size(2)).byte() + for i, color in enumerate(class_color): + mask = label.eq(i) + for j in range(3): + colored_label[j].masked_fill_(mask, color[j]) + npimg = colored_label.numpy() + npimg = np.transpose(npimg, (1, 2, 0)) + mode = None + if npimg.shape[2] == 1: + npimg = npimg[:, :, 0] + mode = "L" + + return Image.fromarray(npimg, mode=mode) class CamVid(data.Dataset): def __init__(self, root, split='train', joint_transform=None, - transform=None, download=False, + transform=None, target_transform=LabelToLongTensor(), download=False, loader=default_loader): self.root = root assert split in ('train', 'val', 'test') self.split = split self.transform = transform + self.target_transform = target_transform self.joint_transform = joint_transform self.loader = loader self.class_weight = class_weight @@ -93,7 +100,7 @@ def __init__(self, root, split='train', joint_transform=None, if download: self.download() - self.imgs = make_dataset(os.path.join(self.root, self.split)) + self.imgs = _make_dataset(os.path.join(self.root, self.split)) def __getitem__(self, index): path = self.imgs[index] @@ -106,7 +113,7 @@ def __getitem__(self, index): if self.transform is not None: img = self.transform(img) - target = LabelToLongTensor(target) + target = self.target_transform(target) return img, target def __len__(self): From 3fde2d01e41c07953c323f1773855827b478f337 Mon Sep 17 00:00:00 2001 From: Felix Wu Date: Sat, 18 Mar 2017 18:38:06 -0400 Subject: [PATCH 5/5] fix bugs caused by merge --- torchvision/datasets/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index f3bfd8f00e1..8f1936335b0 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -6,8 +6,6 @@ from .mnist import MNIST from .svhn import SVHN from .phototour import PhotoTour -from .svhn import SVHN -from .phototour import PhotoTour from .camvid import CamVid __all__ = ('LSUN', 'LSUNClass',