1
1
import os
2
2
import shutil
3
+ import tarfile
3
4
import tempfile
5
+ from io import BytesIO
6
+ from pathlib import Path
7
+ from typing import Dict
4
8
5
9
import pytest
6
10
7
11
import fsspec
8
12
from fsspec .core import OpenFile
9
13
from fsspec .implementations .cached import WholeFileCacheFileSystem
14
+ from fsspec .implementations .tar import TarFileSystem
10
15
from fsspec .implementations .tests .test_archive import archive_data , temptar
11
16
12
17
@@ -171,7 +176,6 @@ def test_filesystem_cached(recipe, tmpdir):
171
176
ids = ["tar" , "tar-gz" , "tar-bz2" , "tar-xz" ],
172
177
)
173
178
def test_url_to_fs_direct (recipe , tmpdir ):
174
-
175
179
with temptar (archive_data , mode = recipe ["mode" ], suffix = recipe ["suffix" ]) as tf :
176
180
url = f"tar://inner::file://{ tf } "
177
181
fs , url = fsspec .core .url_to_fs (url = url )
@@ -189,8 +193,48 @@ def test_url_to_fs_direct(recipe, tmpdir):
189
193
ids = ["tar" , "tar-gz" , "tar-bz2" , "tar-xz" ],
190
194
)
191
195
def test_url_to_fs_cached (recipe , tmpdir ):
192
-
193
196
with temptar (archive_data , mode = recipe ["mode" ], suffix = recipe ["suffix" ]) as tf :
194
197
url = f"tar://inner::simplecache::file://{ tf } "
195
198
fs , url = fsspec .core .url_to_fs (url = url )
196
199
assert fs .cat ("b" ) == b"hello"
200
+
201
+
202
+ @pytest .mark .parametrize (
203
+ "compression" , ["" , "gz" , "bz2" , "xz" ], ids = ["tar" , "tar-gz" , "tar-bz2" , "tar-xz" ]
204
+ )
205
+ def test_ls_with_folders (compression : str , tmp_path : Path ):
206
+ """
207
+ Create a tar file that doesn't include the intermediate folder structure,
208
+ but make sure that the reading filesystem is still able to resolve the
209
+ intermediate folders, like the ZipFileSystem.
210
+ """
211
+ tar_data : Dict [str , bytes ] = {
212
+ "a.pdf" : b"Hello A!" ,
213
+ "b/c.pdf" : b"Hello C!" ,
214
+ "d/e/f.pdf" : b"Hello F!" ,
215
+ "d/g.pdf" : b"Hello G!" ,
216
+ }
217
+ if compression :
218
+ temp_archive_file = tmp_path / f"test_tar_file.tar.{ compression } "
219
+ else :
220
+ temp_archive_file = tmp_path / "test_tar_file.tar"
221
+ with open (temp_archive_file , "wb" ) as fd :
222
+ # We need to manually write the tarfile here, because temptar
223
+ # creates intermediate directories which is not how tars are always created
224
+ with tarfile .open (fileobj = fd , mode = f"w:{ compression } " ) as tf :
225
+ for tar_file_path , data in tar_data .items ():
226
+ content = data
227
+ info = tarfile .TarInfo (name = tar_file_path )
228
+ info .size = len (content )
229
+ tf .addfile (info , BytesIO (content ))
230
+ with open (temp_archive_file , "rb" ) as fd :
231
+ fs = TarFileSystem (fd )
232
+ assert fs .find ("/" , withdirs = True ) == [
233
+ "a.pdf" ,
234
+ "b/" ,
235
+ "b/c.pdf" ,
236
+ "d/" ,
237
+ "d/e/" ,
238
+ "d/e/f.pdf" ,
239
+ "d/g.pdf" ,
240
+ ]
0 commit comments