diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 56131d000b176..3b8077c531996 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -173,9 +173,25 @@ def write( table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + filesystem = kwargs.pop("filesystem", None) + if ( + isinstance(path, str) + and storage_options is None + and filesystem is None + and Version(self.api.__version__) >= Version("5.0.0") + ): + try: + from pyarrow.fs import FileSystem + + filesystem, path = FileSystem.from_uri(path) + except Exception: + # fallback to use get_handle / fsspec for filesystems + # that pyarrow doesn't support + pass + path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle( path, - kwargs.pop("filesystem", None), + filesystem, storage_options=storage_options, mode="wb", is_dir=partition_cols is not None, @@ -470,6 +486,12 @@ def read_parquet( .. versionadded:: 1.3.0 + When using the 'pyarrow' engine, no storage options are provided + and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec`` + (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is preferred. + Provide the instantiated fsspec filesystem using the ``filesystem`` + keyword if you wish to use its implementation. + use_nullable_dtypes : bool, default False If True, use dtypes that use ``pd.NA`` as missing value indicator for the resulting DataFrame. (only applicable for the ``pyarrow``