@@ -395,13 +395,17 @@ def filesystem(self):
395
395
import boto3
396
396
from pyarrow .fs import S3FileSystem
397
397
398
+ bucket = self .path [5 :].split ("/" )[0 ]
398
399
session = boto3 .session .Session ()
399
400
credentials = session .get_credentials ()
401
+ region = session .client ("s3" ).get_bucket_location (Bucket = bucket )[
402
+ "LocationConstraint"
403
+ ]
400
404
401
405
return S3FileSystem (
402
406
secret_key = credentials .secret_key ,
403
407
access_key = credentials .access_key ,
404
- region = "us-east-2" , # TODO
408
+ region = region ,
405
409
session_token = credentials .token ,
406
410
)
407
411
else :
@@ -510,8 +514,27 @@ def meta_and_filenames(path):
510
514
if str (path ).startswith ("s3://" ):
511
515
import s3fs
512
516
513
- s3 = s3fs .S3FileSystem ()
514
- filenames = s3 .ls (path )
517
+ filenames = s3fs .S3FileSystem ().ls (path )
518
+
519
+ import boto3
520
+ from pyarrow .fs import S3FileSystem
521
+
522
+ session = boto3 .session .Session ()
523
+ credentials = session .get_credentials ()
524
+
525
+ bucket = path [5 :].split ("/" )[0 ]
526
+ region = session .client ("s3" ).get_bucket_location (Bucket = bucket )[
527
+ "LocationConstraint"
528
+ ]
529
+
530
+ filesystem = S3FileSystem (
531
+ secret_key = credentials .secret_key ,
532
+ access_key = credentials .access_key ,
533
+ region = region ,
534
+ session_token = credentials .token ,
535
+ )
536
+ path = path [5 :]
537
+
515
538
else :
516
539
import glob
517
540
import os
@@ -521,14 +544,11 @@ def meta_and_filenames(path):
521
544
else :
522
545
filenames = [path ] # TODO: split by row group
523
546
524
- < << << << HEAD
525
- import dask .dataframe as dd
526
- meta = dd .read_parquet (path )._meta
527
- == == == =
528
- ds = pq .ParquetDataset (path )
547
+ filesystem = None
548
+
549
+ ds = pq .ParquetDataset (path , filesystem = filesystem )
529
550
t = pa .Table .from_pylist ([], schema = ds .schema )
530
551
meta = t .to_pandas (types_mapper = types_mapper )
531
- >> >> >> > d037c82 (Grab meta from arrow rather than dask .dataframe )
532
552
533
553
return meta , filenames
534
554
0 commit comments