remove decoder

pmeier · pmeier · commit 66ba84a66d00 · 2022-02-15T09:01:04.000+01:00
diff --git a/torchvision/prototype/datasets/_builtin/lsun.py b/torchvision/prototype/datasets/_builtin/lsun.py
@@ -1,21 +1,18 @@
-import functools
 import io
 import pathlib
 import re
-from typing import Any, Callable, Dict, List, Optional, Tuple, Iterator
+from typing import Any, Dict, List, Tuple, Iterator
 
-import torch
 from torchdata.datapipes.iter import IterDataPipe, Mapper, OnDiskCacheHolder, Concater, IterableWrapper
 from torchvision.prototype.datasets.utils import (
     Dataset,
     DatasetConfig,
     DatasetInfo,
     HttpResource,
     OnlineResource,
-    DatasetType,
 )
 from torchvision.prototype.datasets.utils._internal import hint_sharding, hint_shuffling
-from torchvision.prototype.features import Label
+from torchvision.prototype.features import Label, EncodedImage
 
 # We need lmdb.Environment as annotation, but lmdb is an optional requirement at import
 try:
@@ -79,7 +76,6 @@ class Lsun(Dataset):
     def _make_info(self) -> DatasetInfo:
         return DatasetInfo(
             "lsun",
-            type=DatasetType.IMAGE,
             categories=(
                 "bedroom",
                 "bridge",
@@ -140,37 +136,24 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]:
 
     _FOLDER_PATTERN = re.compile(r"(?P<category>\w*?)_(?P<split>(train|val))_lmdb")
 
-    def _collate_and_decode_sample(
-        self,
-        data: Tuple[str, bytes, io.BytesIO],
-        *,
-        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
-    ) -> Dict[str, Any]:
+    def _prepare_sample(self, data: Tuple[str, bytes, io.BytesIO]) -> Dict[str, Any]:
         path, key, buffer = data
 
         match = self._FOLDER_PATTERN.match(pathlib.Path(path).parent.name)
-        if match:
-            category = match["category"]
-            label = Label(self.categories.index(category), category=category)
-        else:
-            label = None
+        label = Label.from_category(match["category"], categories=self.categories) if match else None
 
         return dict(
             path=path,
             key=key,
-            image=decoder(buffer) if decoder else buffer,
+            image=EncodedImage.from_file(buffer),
             label=label,
         )
 
     def _filepath_fn(self, path: str) -> str:
         return str(pathlib.Path(path) / "keys.cache")
 
     def _make_datapipe(
-        self,
-        resource_dps: List[IterDataPipe],
-        *,
-        config: DatasetConfig,
-        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
+        self, resource_dps: List[IterDataPipe], *, config: DatasetConfig
     ) -> IterDataPipe[Dict[str, Any]]:
         dp = Concater(*resource_dps)
 
@@ -183,4 +166,4 @@ def _make_datapipe(
         dp = hint_sharding(dp)
         dp = hint_shuffling(dp)
         dp = LmdbReader(dp)
-        return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
+        return Mapper(dp, self._prepare_sample)