@@ -88,20 +88,30 @@ def load(
88
88
root = pathlib .Path (root )
89
89
path = root / self .file_name
90
90
# Instead of the raw file, there might also be files with fewer suffixes after decompression or directories
91
- # with no suffixes at all. Thus, we look for all paths that share the same name without suffixes as the raw
92
- # file.
93
- path_candidates = {file for file in path .parent .glob (path .name .replace ("" .join (path .suffixes ), "" ) + "*" )}
94
- # If we don't find anything, we try to download the raw file.
95
- if not path_candidates :
96
- path_candidates = {self .download (root , skip_integrity_check = skip_integrity_check )}
91
+ # with no suffixes at all.
92
+ stem = path .name .replace ("" .join (path .suffixes ), "" )
93
+
94
+ # In a first step, we check for a folder with the same stem as the raw file. If it exists, we use it since
95
+ # extracted files give the best I/O performance. Note that OnlineResource._extract() makes sure that an archive
96
+ # is always extracted in a folder with the corresponding file name.
97
+ folder_candidate = path .parent / stem
98
+ if folder_candidate .exists () and folder_candidate .is_dir ():
99
+ return self ._loader (folder_candidate )
100
+
101
+ # If there is no folder, we look for all files that share the same stem as the raw file, but might have a
102
+ # different suffix.
103
+ file_candidates = {file for file in path .parent .glob (stem + ".*" )}
104
+ # If we don't find anything, we download the raw file.
105
+ if not file_candidates :
106
+ file_candidates = {self .download (root , skip_integrity_check = skip_integrity_check )}
97
107
# If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps.
98
- if path_candidates == {path }:
108
+ if file_candidates == {path }:
99
109
if self ._preprocess is not None :
100
110
path = self ._preprocess (path )
101
- # Otherwise we use the path with the fewest suffixes. This gives us the extracted > decompressed > raw priority
102
- # that we want .
111
+ # Otherwise, we use the path with the fewest suffixes. This gives us the decompressed > raw priority that we
112
+ # want for the best I/O performance .
103
113
else :
104
- path = min (path_candidates , key = lambda path : len (path .suffixes ))
114
+ path = min (file_candidates , key = lambda path : len (path .suffixes ))
105
115
return self ._loader (path )
106
116
107
117
@abc .abstractmethod
0 commit comments