@@ -448,12 +448,6 @@ def _filtered_task(self, i):
448
448
449
449
@staticmethod
450
450
def to_pandas (t : pa .Table ) -> pd .DataFrame :
451
- def types_mapper (pyarrow_dtype ):
452
- if pyarrow_dtype == pa .string ():
453
- return pd .StringDtype ("pyarrow" )
454
- if "decimal" in str (pyarrow_dtype ) or "date32" in str (pyarrow_dtype ):
455
- return pd .ArrowDtype (pyarrow_dtype )
456
-
457
451
df = t .to_pandas (
458
452
use_threads = False ,
459
453
ignore_metadata = False ,
@@ -504,6 +498,13 @@ def _simplify_up(self, parent):
504
498
# return Literal(sum(_lengths))
505
499
506
500
501
+ def types_mapper (pyarrow_dtype ):
502
+ if pyarrow_dtype == pa .string ():
503
+ return pd .StringDtype ("pyarrow" )
504
+ if "decimal" in str (pyarrow_dtype ) or "date32" in str (pyarrow_dtype ):
505
+ return pd .ArrowDtype (pyarrow_dtype )
506
+
507
+
507
508
@functools .lru_cache
508
509
def meta_and_filenames (path ):
509
510
if str (path ).startswith ("s3://" ):
@@ -520,9 +521,10 @@ def meta_and_filenames(path):
520
521
else :
521
522
filenames = [path ] # TODO: split by row group
522
523
523
- import dask .dataframe as dd
524
+ ds = pq .ParquetDataset (path )
525
+ t = pa .Table .from_pylist ([], schema = ds .schema )
526
+ meta = t .to_pandas (types_mapper = types_mapper )
524
527
525
- meta = dd .read_parquet (path )._meta
526
528
return meta , filenames
527
529
528
530
0 commit comments