Skip to content

Commit c530b62

Browse files
authored
fix prototype resource loading (#5447)
* fix prototype resource loading * revert unrelated change
1 parent e88a554 commit c530b62

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

torchvision/prototype/datasets/utils/_resource.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,30 @@ def load(
8888
root = pathlib.Path(root)
8989
path = root / self.file_name
9090
# Instead of the raw file, there might also be files with fewer suffixes after decompression or directories
91-
# with no suffixes at all. Thus, we look for all paths that share the same name without suffixes as the raw
92-
# file.
93-
path_candidates = {file for file in path.parent.glob(path.name.replace("".join(path.suffixes), "") + "*")}
94-
# If we don't find anything, we try to download the raw file.
95-
if not path_candidates:
96-
path_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
91+
# with no suffixes at all.
92+
stem = path.name.replace("".join(path.suffixes), "")
93+
94+
# In a first step, we check for a folder with the same stem as the raw file. If it exists, we use it since
95+
# extracted files give the best I/O performance. Note that OnlineResource._extract() makes sure that an archive
96+
# is always extracted in a folder with the corresponding file name.
97+
folder_candidate = path.parent / stem
98+
if folder_candidate.exists() and folder_candidate.is_dir():
99+
return self._loader(folder_candidate)
100+
101+
# If there is no folder, we look for all files that share the same stem as the raw file, but might have a
102+
# different suffix.
103+
file_candidates = {file for file in path.parent.glob(stem + ".*")}
104+
# If we don't find anything, we download the raw file.
105+
if not file_candidates:
106+
file_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
97107
# If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps.
98-
if path_candidates == {path}:
108+
if file_candidates == {path}:
99109
if self._preprocess is not None:
100110
path = self._preprocess(path)
101-
# Otherwise we use the path with the fewest suffixes. This gives us the extracted > decompressed > raw priority
102-
# that we want.
111+
# Otherwise, we use the path with the fewest suffixes. This gives us the decompressed > raw priority that we
112+
# want for the best I/O performance.
103113
else:
104-
path = min(path_candidates, key=lambda path: len(path.suffixes))
114+
path = min(file_candidates, key=lambda path: len(path.suffixes))
105115
return self._loader(path)
106116

107117
@abc.abstractmethod

0 commit comments

Comments
 (0)