Food 101 dataset (#5119)

jdsgomes · pmeier · web-flow · commit 65676b4ba1a9 · 2021-12-22T17:25:17.000Z
* Adding multiweight support for shufflenetv2 prototype models * Revert "Adding multiweight support for shufflenetv2 prototype models" This reverts commit 31fadbe. * Adding multiweight support for shufflenetv2 prototype models * Revert "Adding multiweight support for shufflenetv2 prototype models" This reverts commit 4e3d900. * Add Food101 Dataset Addresses #5108. cc @pmeier @NicolasHug * Remove unecessary Path contructor calls * Remove unecessary Path contructor callsi and fix types * Fix tests * Address PR comments from @pmeier * Fix bug in tests and in food101 dataset * Fix bug in tests and in food101 dataset * Update torchvision/datasets/food101.py Co-authored-by: Philip Meier <github.pmeier@posteo.de>
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -45,6 +45,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
     Flickr30k
     FlyingChairs
     FlyingThings3D
+    Food101
     HD1K
     HMDB51
     ImageNet
diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -2168,5 +2168,42 @@ def inject_fake_data(self, tmpdir, config):
         return num_sequences * (num_examples_per_sequence - 1)
 
 
+class Food101TestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.Food101
+    FEATURE_TYPES = (PIL.Image.Image, int)
+
+    ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
+
+    def inject_fake_data(self, tmpdir: str, config):
+        root_folder = pathlib.Path(tmpdir) / "food-101"
+        image_folder = root_folder / "images"
+        meta_folder = root_folder / "meta"
+
+        image_folder.mkdir(parents=True)
+        meta_folder.mkdir()
+
+        num_images_per_class = 5
+
+        metadata = {}
+        n_samples_per_class = 3 if config["split"] == "train" else 2
+        sampled_classes = ("apple_pie", "crab_cakes", "gyoza")
+        for cls in sampled_classes:
+            im_fnames = datasets_utils.create_image_folder(
+                image_folder,
+                cls,
+                file_name_fn=lambda idx: f"{idx}.jpg",
+                num_examples=num_images_per_class,
+            )
+            metadata[cls] = [
+                "/".join(fname.relative_to(image_folder).with_suffix("").parts)
+                for fname in random.choices(im_fnames, k=n_samples_per_class)
+            ]
+
+        with open(meta_folder / f"{config['split']}.json", "w") as file:
+            file.write(json.dumps(metadata))
+
+        return len(sampled_classes * n_samples_per_class)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py
@@ -7,6 +7,7 @@
 from .fakedata import FakeData
 from .flickr import Flickr8k, Flickr30k
 from .folder import ImageFolder, DatasetFolder
+from .food101 import Food101
 from .hmdb51 import HMDB51
 from .imagenet import ImageNet
 from .inaturalist import INaturalist
@@ -77,4 +78,5 @@
     "FlyingChairs",
     "FlyingThings3D",
     "HD1K",
+    "Food101",
 )
diff --git a/torchvision/datasets/food101.py b/torchvision/datasets/food101.py
@@ -0,0 +1,90 @@
+import json
+from pathlib import Path
+from typing import Any, Tuple, Callable, Optional
+
+import PIL.Image
+
+from .utils import verify_str_arg, download_and_extract_archive
+from .vision import VisionDataset
+
+
+class Food101(VisionDataset):
+    """`The Food-101 Data Set <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_.
+
+    The Food-101 is a challenging data set of 101 food categories, with 101'000 images.
+    For each class, 250 manually reviewed test images are provided as well as 750 training images.
+    On purpose, the training images were not cleaned, and thus still contain some amount of noise.
+    This comes mostly in the form of intense colors and sometimes wrong labels. All images were
+    rescaled to have a maximum side length of 512 pixels.
+
+
+    Args:
+        root (string): Root directory of the dataset.
+        split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
+        transform (callable, optional): A function/transform that  takes in an PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``.
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+    """
+
+    _URL = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
+    _MD5 = "85eeb15f3717b99a5da872d97d918f87"
+
+    def __init__(
+        self,
+        root: str,
+        split: str = "train",
+        download: bool = True,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._split = verify_str_arg(split, "split", ("train", "test"))
+        self._base_folder = Path(self.root) / "food-101"
+        self._meta_folder = self._base_folder / "meta"
+        self._images_folder = self._base_folder / "images"
+
+        if download:
+            self._download()
+
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+
+        self._labels = []
+        self._image_files = []
+        with open(self._meta_folder / f"{split}.json", "r") as f:
+            metadata = json.loads(f.read())
+
+        self.classes = sorted(metadata.keys())
+        self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
+
+        for class_label, im_rel_paths in metadata.items():
+            self._labels += [self.class_to_idx[class_label]] * len(im_rel_paths)
+            self._image_files += [
+                self._images_folder.joinpath(*f"{im_rel_path}.jpg".split("/")) for im_rel_path in im_rel_paths
+            ]
+
+    def __len__(self) -> int:
+        return len(self._image_files)
+
+    def __getitem__(self, idx) -> Tuple[Any, Any]:
+        image_file, label = self._image_files[idx], self._labels[idx]
+        image = PIL.Image.open(image_file).convert("RGB")
+
+        if self.transform:
+            image = self.transform(image)
+
+        if self.target_transform:
+            label = self.target_transform(label)
+
+        return image, label
+
+    def extra_repr(self) -> str:
+        return f"split={self._split}"
+
+    def _check_exists(self) -> bool:
+        return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder))
+
+    def _download(self) -> None:
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, download_root=self.root, md5=self._MD5)