Skip to content

Commit 31bf712

Browse files
Haonan Sunfacebook-github-bot
Haonan Sun
authored andcommitted
Revert D33405355: [fbsync] add contribution instructions for prototype datasets (#5133)
Differential Revision: D33405355 Original commit changeset: 864ae4642a93 Original Phabricator Diff: D33405355 fbshipit-source-id: 0127385422452c5ab77d43b1605557484bb6a30b
1 parent 9e30f6c commit 31bf712

File tree

10 files changed

+19
-169
lines changed

10 files changed

+19
-169
lines changed

torchvision/prototype/datasets/_builtin/README.md

Lines changed: 0 additions & 127 deletions
This file was deleted.

torchvision/prototype/datasets/_builtin/caltech.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,8 @@ def _make_datapipe(
136136
return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
137137

138138
def _generate_categories(self, root: pathlib.Path) -> List[str]:
139-
resources = self.resources(self.default_config)
140-
141-
dp = resources[0].load(root)
139+
dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name)
142140
dp = Filter(dp, self._is_not_background_image)
143-
144141
return sorted({pathlib.Path(path).parent.name for path, _ in dp})
145142

146143

@@ -192,9 +189,6 @@ def _make_datapipe(
192189
return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
193190

194191
def _generate_categories(self, root: pathlib.Path) -> List[str]:
195-
resources = self.resources(self.default_config)
196-
197-
dp = resources[0].load(root)
192+
dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name)
198193
dir_names = {pathlib.Path(path).parent.name for path, _ in dp}
199-
200194
return [name.split(".")[1] for name in sorted(dir_names)]

torchvision/prototype/datasets/_builtin/cifar.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,9 @@ def _make_datapipe(
9292
return Mapper(dp, functools.partial(self._collate_and_decode, decoder=decoder))
9393

9494
def _generate_categories(self, root: pathlib.Path) -> List[str]:
95-
resources = self.resources(self.default_config)
96-
97-
dp = resources[0].load(root)
95+
dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name)
9896
dp = Filter(dp, path_comparator("name", self._META_FILE_NAME))
9997
dp = Mapper(dp, self._unpickle)
100-
10198
return cast(List[str], next(iter(dp))[self._CATEGORIES_KEY])
10299

103100

torchvision/prototype/datasets/_builtin/coco.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def _generate_categories(self, root: pathlib.Path) -> Tuple[Tuple[str, str]]:
238238
config = self.default_config
239239
resources = self.resources(config)
240240

241-
dp = resources[1].load(root)
241+
dp = resources[1].load(pathlib.Path(root) / self.name)
242242
dp = Filter(
243243
dp,
244244
functools.partial(self._filter_meta_files, split=config.split, year=config.year, annotations="instances"),

torchvision/prototype/datasets/_builtin/imagenet.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,7 @@ def _make_datapipe(
177177

178178
def _generate_categories(self, root: pathlib.Path) -> List[Tuple[str, ...]]:
179179
resources = self.resources(self.default_config)
180-
181-
devkit_dp = resources[1].load(root)
180+
devkit_dp = resources[1].load(root / self.name)
182181
devkit_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
183182

184183
meta = next(iter(devkit_dp))[1]

torchvision/prototype/datasets/_builtin/mnist.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import operator
55
import pathlib
66
import string
7-
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, cast, BinaryIO, Union, Sequence
7+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, cast, BinaryIO
88

99
import torch
1010
from torchdata.datapipes.iter import (
@@ -78,7 +78,7 @@ def __iter__(self) -> Iterator[torch.Tensor]:
7878

7979

8080
class _MNISTBase(Dataset):
81-
_URL_BASE: Union[str, Sequence[str]]
81+
_URL_BASE: str
8282

8383
@abc.abstractmethod
8484
def _files_and_checksums(self, config: DatasetConfig) -> Tuple[Tuple[str, str], Tuple[str, str]]:
@@ -90,15 +90,8 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]:
9090
labels_sha256,
9191
) = self._files_and_checksums(config)
9292

93-
url_bases = self._URL_BASE
94-
if isinstance(url_bases, str):
95-
url_bases = (url_bases,)
96-
97-
images_urls = [f"{url_base}/{images_file}" for url_base in url_bases]
98-
images = HttpResource(images_urls[0], sha256=images_sha256, mirrors=images_urls[1:])
99-
100-
labels_urls = [f"{url_base}/{labels_file}" for url_base in url_bases]
101-
labels = HttpResource(labels_urls[0], sha256=images_sha256, mirrors=labels_urls[1:])
93+
images = HttpResource(f"{self._URL_BASE}/{images_file}", sha256=images_sha256)
94+
labels = HttpResource(f"{self._URL_BASE}/{labels_file}", sha256=labels_sha256)
10295

10396
return [images, labels]
10497

@@ -158,10 +151,7 @@ def _make_info(self) -> DatasetInfo:
158151
),
159152
)
160153

161-
_URL_BASE: Union[str, Sequence[str]] = (
162-
"http://yann.lecun.com/exdb/mnist",
163-
"https://ossci-datasets.s3.amazonaws.com/mnist/",
164-
)
154+
_URL_BASE = "http://yann.lecun.com/exdb/mnist"
165155
_CHECKSUMS = {
166156
"train-images-idx3-ubyte.gz": "440fcabf73cc546fa21475e81ea370265605f56be210a4024d2ca8f203523609",
167157
"train-labels-idx1-ubyte.gz": "3552534a0a558bbed6aed32b30c495cca23d567ec52cac8be1a0730e8010255c",

torchvision/prototype/datasets/_builtin/sbd.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,7 @@ def _make_datapipe(
156156
return Mapper(dp, functools.partial(self._collate_and_decode_sample, config=config, decoder=decoder))
157157

158158
def _generate_categories(self, root: pathlib.Path) -> Tuple[str, ...]:
159-
resources = self.resources(self.default_config)
160-
161-
dp = resources[0].load(root)
159+
dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name)
162160
dp = Filter(dp, path_comparator("name", "category_names.m"))
163161
dp = LineReader(dp)
164162
dp = Mapper(dp, bytes.decode, input_col=1)

torchvision/prototype/datasets/generate_category_files.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212

1313
def main(*names, force=False):
14-
home = pathlib.Path(datasets.home())
14+
root = pathlib.Path(datasets.home())
1515

1616
for name in names:
1717
path = BUILTIN_DIR / f"{name}.categories"
@@ -20,14 +20,13 @@ def main(*names, force=False):
2020

2121
dataset = find(name)
2222
try:
23-
categories = dataset._generate_categories(home / name)
23+
categories = dataset._generate_categories(root)
2424
except NotImplementedError:
2525
continue
2626

27-
with open(path, "w") as file:
28-
writer = csv.writer(file, lineterminator="\n")
27+
with open(path, "w", newline="") as file:
2928
for category in categories:
30-
writer.writerow((category,) if isinstance(category, str) else category)
29+
csv.writer(file).writerow((category,) if isinstance(category, str) else category)
3130

3231

3332
def parse_args(argv=None):

torchvision/prototype/datasets/utils/_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import itertools
77
import os
88
import pathlib
9-
from typing import Any, Callable, Dict, List, Optional, Sequence, Union, Tuple, Collection
9+
from typing import Any, Callable, Dict, List, Optional, Sequence, Union, Tuple
1010

1111
import torch
1212
from torch.utils.data import IterDataPipe
@@ -33,7 +33,7 @@ def __init__(
3333
name: str,
3434
*,
3535
type: Union[str, DatasetType],
36-
dependencies: Collection[str] = (),
36+
dependencies: Sequence[str] = (),
3737
categories: Optional[Union[int, Sequence[str], str, pathlib.Path]] = None,
3838
citation: Optional[str] = None,
3939
homepage: Optional[str] = None,

torchvision/prototype/datasets/utils/_resource.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,14 @@ def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) ->
136136

137137
class HttpResource(OnlineResource):
138138
def __init__(
139-
self, url: str, *, file_name: Optional[str] = None, mirrors: Sequence[str] = (), **kwargs: Any
139+
self, url: str, *, file_name: Optional[str] = None, mirrors: Optional[Sequence[str]] = None, **kwargs: Any
140140
) -> None:
141141
super().__init__(file_name=file_name or pathlib.Path(urlparse(url).path).name, **kwargs)
142142
self.url = url
143143
self.mirrors = mirrors
144144

145145
def _download(self, root: pathlib.Path) -> None:
146-
for url in itertools.chain((self.url,), self.mirrors):
146+
for url in itertools.chain((self.url,), self.mirrors or ()):
147147
try:
148148
download_url(url, str(root), filename=self.file_name, md5=None)
149149
# TODO: make this more precise

0 commit comments

Comments
 (0)