From 100d9ebd16789fd28fd93818343d0fb822e2b368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 26 Jun 2022 15:25:32 -0400 Subject: [PATCH 1/3] TYP: Return annotations for io/{formats,json} --- pandas/core/frame.py | 3 +- pandas/io/formats/console.py | 9 +- pandas/io/formats/css.py | 3 +- pandas/io/formats/excel.py | 6 +- pandas/io/formats/format.py | 9 +- pandas/io/formats/html.py | 2 +- pandas/io/formats/info.py | 4 +- pandas/io/formats/style.py | 123 ++++++++++++++++++- pandas/io/json/_json.py | 201 +++++++++++++++++++++++++++++--- pandas/io/json/_table_schema.py | 5 +- 10 files changed, 330 insertions(+), 35 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4a278185b01b..66361a7d143f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1036,7 +1036,8 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: value = buf.getvalue() repr_width = max(len(line) for line in value.split("\n")) - return repr_width < width + # error: Unsupported operand types for < ("int" and "None") + return repr_width < width # type: ignore[operator] def _info_repr(self) -> bool: """ diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index bdd2b3d6e4c6a..2a6cbe0762903 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -1,11 +1,12 @@ """ Internal module for console introspection """ +from __future__ import annotations from shutil import get_terminal_size -def get_console_size(): +def get_console_size() -> tuple[int | None, int | None]: """ Return console size as tuple = (width, height). @@ -43,14 +44,14 @@ def get_console_size(): # Note if the User sets width/Height to None (auto-detection) # and we're in a script (non-inter), this will return (None,None) # caller needs to deal. - return (display_width or terminal_width, display_height or terminal_height) + return display_width or terminal_width, display_height or terminal_height # ---------------------------------------------------------------------- # Detect our environment -def in_interactive_session(): +def in_interactive_session() -> bool: """ Check if we're running in an interactive shell. @@ -75,7 +76,7 @@ def check_main(): return check_main() -def in_ipython_frontend(): +def in_ipython_frontend() -> bool: """ Check if we're inside an IPython zmq frontend. diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index 5335887785881..a6d2645590dde 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -7,6 +7,7 @@ from typing import ( Callable, Generator, + Iterator, ) import warnings @@ -369,7 +370,7 @@ def atomize(self, declarations) -> Generator[tuple[str, str], None, None]: expand_margin = _side_expander("margin-{:s}") expand_padding = _side_expander("padding-{:s}") - def parse(self, declarations_str: str): + def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]: """ Generates (prop, value) pairs from declarations. diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index effb0652aa2d3..8478b72d97a5e 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -197,7 +197,7 @@ def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]: # TODO: handle cell width and height: needs support in pandas.io.excel - def remove_none(d: dict[str, str]) -> None: + def remove_none(d: dict[str, str | None]) -> None: """Remove key where value is None, through nested dicts""" for k, v in list(d.items()): if v is None: @@ -528,7 +528,7 @@ def __init__( self.inf_rep = inf_rep @property - def header_style(self): + def header_style(self) -> dict[str, dict[str, str | bool]]: return { "font": {"bold": True}, "borders": { @@ -850,7 +850,7 @@ def write( freeze_panes=None, engine=None, storage_options: StorageOptions = None, - ): + ) -> None: """ writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index cf5e35f6ddcd1..4baf7d0bf855f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -22,10 +22,12 @@ Callable, Hashable, Iterable, + Iterator, List, Mapping, Sequence, cast, + overload, ) from unicodedata import east_asian_width @@ -1203,12 +1205,15 @@ def save_to_buffer( with get_buffer(buf, encoding=encoding) as f: f.write(string) if buf is None: - return f.getvalue() + # error: "WriteBuffer[str]" has no attribute "getvalue" + return f.getvalue() # type: ignore[attr-defined] return None @contextmanager -def get_buffer(buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None): +def get_buffer( + buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None +) -> Iterator[WriteBuffer[str]] | Iterator[StringIO]: """ Context manager to open, yield and close buffer for filenames or Path-like objects, otherwise yield buf unchanged. diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index dfd95b96c68e8..b6494682d308d 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -89,7 +89,7 @@ def render(self) -> list[str]: return self.elements @property - def should_show_dimensions(self): + def should_show_dimensions(self) -> bool: return self.fmt.should_show_dimensions @property diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index c0bdf37e5273a..07ec50a2cd6a8 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -566,7 +566,7 @@ def dtypes(self) -> Iterable[Dtype]: return [self.data.dtypes] @property - def dtype_counts(self): + def dtype_counts(self) -> Mapping[str, int]: from pandas.core.frame import DataFrame return _get_dataframe_dtype_counts(DataFrame(self.data)) @@ -1087,7 +1087,7 @@ def _fill_non_empty_info(self) -> None: if self.display_memory_usage: self.add_memory_usage_line() - def add_series_name_line(self): + def add_series_name_line(self) -> None: self._lines.append(f"Series name: {self.data.name}") @property diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 24669e84443a6..10b607da45ca8 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -12,6 +12,7 @@ Callable, Hashable, Sequence, + overload, ) import warnings @@ -591,6 +592,52 @@ def to_excel( engine=engine, ) + @overload + def to_latex( + self, + buf: FilePath | WriteBuffer[str], + *, + column_format: str | None = ..., + position: str | None = ..., + position_float: str | None = ..., + hrules: bool | None = ..., + clines: str | None = ..., + label: str | None = ..., + caption: str | tuple | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + multirow_align: str | None = ..., + multicol_align: str | None = ..., + siunitx: bool = ..., + environment: str | None = ..., + encoding: str | None = ..., + convert_css: bool = ..., + ) -> None: + ... + + @overload + def to_latex( + self, + buf: None = ..., + *, + column_format: str | None = ..., + position: str | None = ..., + position_float: str | None = ..., + hrules: bool | None = ..., + clines: str | None = ..., + label: str | None = ..., + caption: str | tuple | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + multirow_align: str | None = ..., + multicol_align: str | None = ..., + siunitx: bool = ..., + environment: str | None = ..., + encoding: str | None = ..., + convert_css: bool = ..., + ) -> str: + ... + def to_latex( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -610,7 +657,7 @@ def to_latex( environment: str | None = None, encoding: str | None = None, convert_css: bool = False, - ): + ) -> str | None: r""" Write Styler to a file, buffer or string in LaTeX format. @@ -1161,6 +1208,46 @@ def to_latex( ) return save_to_buffer(latex, buf=buf, encoding=encoding) + @overload + def to_html( + self, + buf: FilePath | WriteBuffer[str], + *, + table_uuid: str | None = ..., + table_attributes: str | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + bold_headers: bool = ..., + caption: str | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + encoding: str | None = ..., + doctype_html: bool = ..., + exclude_styles: bool = ..., + **kwargs, + ) -> None: + ... + + @overload + def to_html( + self, + buf: None = ..., + *, + table_uuid: str | None = ..., + table_attributes: str | None = ..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + bold_headers: bool = ..., + caption: str | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + encoding: str | None = ..., + doctype_html: bool = ..., + exclude_styles: bool = ..., + **kwargs, + ) -> str: + ... + @Substitution(buf=buf, encoding=encoding) def to_html( self, @@ -1178,7 +1265,7 @@ def to_html( doctype_html: bool = False, exclude_styles: bool = False, **kwargs, - ): + ) -> str | None: """ Write Styler to a file, buffer or string in HTML-CSS format. @@ -1292,10 +1379,38 @@ def to_html( html, buf=buf, encoding=(encoding if buf is not None else None) ) + @overload + def to_string( + self, + buf: FilePath | WriteBuffer[str], + *, + encoding=..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + delimiter: str = ..., + ) -> None: + ... + + @overload + def to_string( + self, + buf: None = ..., + *, + encoding=..., + sparse_index: bool | None = ..., + sparse_columns: bool | None = ..., + max_rows: int | None = ..., + max_columns: int | None = ..., + delimiter: str = ..., + ) -> str: + ... + @Substitution(buf=buf, encoding=encoding) def to_string( self, - buf=None, + buf: FilePath | WriteBuffer[str] | None = None, *, encoding=None, sparse_index: bool | None = None, @@ -1303,7 +1418,7 @@ def to_string( max_rows: int | None = None, max_columns: int | None = None, delimiter: str = " ", - ): + ) -> str | None: """ Write Styler to a file, buffer or string in text format. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index fbea7a71202eb..7260346faa307 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -11,7 +11,11 @@ from typing import ( Any, Callable, + Generic, + Literal, Mapping, + TypeVar, + overload, ) import numpy as np @@ -21,9 +25,12 @@ from pandas._typing import ( CompressionOptions, DtypeArg, + FilePath, IndexLabel, JSONSerializable, + ReadBuffer, StorageOptions, + WriteBuffer, ) from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -66,13 +73,53 @@ ) from pandas.io.parsers.readers import validate_integer +FrameSeriesStrT = TypeVar("FrameSeriesStrT", bound=Literal["frame", "series"]) + loads = json.loads dumps = json.dumps # interface to/from +@overload +def to_json( + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes], + obj: NDFrame, + orient: str | None = ..., + date_format: str = ..., + double_precision: int = ..., + force_ascii: bool = ..., + date_unit: str = ..., + default_handler: Callable[[Any], JSONSerializable] | None = ..., + lines: bool = ..., + compression: CompressionOptions = ..., + index: bool = ..., + indent: int = ..., + storage_options: StorageOptions = ..., +) -> None: + ... + + +@overload def to_json( - path_or_buf, + path_or_buf: None, + obj: NDFrame, + orient: str | None = ..., + date_format: str = ..., + double_precision: int = ..., + force_ascii: bool = ..., + date_unit: str = ..., + default_handler: Callable[[Any], JSONSerializable] | None = ..., + lines: bool = ..., + compression: CompressionOptions = ..., + index: bool = ..., + indent: int = ..., + storage_options: StorageOptions = ..., +) -> str: + ... + + +def to_json( + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] | None, obj: NDFrame, orient: str | None = None, date_format: str = "epoch", @@ -85,7 +132,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, -): +) -> str | None: if not index and orient not in ["split", "table"]: raise ValueError( @@ -131,6 +178,7 @@ def to_json( handles.handle.write(s) else: return s + return None class Writer(ABC): @@ -168,7 +216,7 @@ def __init__( def _format_axes(self): raise AbstractMethodError(self) - def write(self): + def write(self) -> str: iso_dates = self.date_format == "iso" return dumps( self.obj_to_write, @@ -313,6 +361,101 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: return {"schema": self.schema, "data": self.obj} +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient=..., + typ: Literal["frame"] = ..., + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates=..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit=..., + encoding=..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> JsonReader[Literal["frame"]]: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient=..., + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates=..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit=..., + encoding=..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> JsonReader[Literal["series"]]: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, + orient=..., + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates=..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit=..., + encoding=..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> Series: + ... + + +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + orient=..., + typ: Literal["frame"] = ..., + dtype: DtypeArg | None = ..., + convert_axes=..., + convert_dates=..., + keep_default_dates: bool = ..., + numpy: bool = ..., + precise_float: bool = ..., + date_unit=..., + encoding=..., + encoding_errors: str | None = ..., + lines: bool = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: + ... + + @doc( storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buf", @@ -322,9 +465,9 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: version="2.0", allowed_args=["path_or_buf"], stacklevel=3 ) def read_json( - path_or_buf=None, + path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], orient=None, - typ="frame", + typ: Literal["frame", "series"] = "frame", dtype: DtypeArg | None = None, convert_axes=None, convert_dates=True, @@ -339,7 +482,7 @@ def read_json( compression: CompressionOptions = "infer", nrows: int | None = None, storage_options: StorageOptions = None, -): +) -> DataFrame | Series | JsonReader: """ Convert a JSON string to pandas object. @@ -613,7 +756,7 @@ def read_json( return json_reader.read() -class JsonReader(abc.Iterator): +class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): """ JsonReader provides an interface for reading in a JSON file. @@ -626,7 +769,7 @@ def __init__( self, filepath_or_buffer, orient, - typ, + typ: FrameSeriesStrT, dtype, convert_axes, convert_dates, @@ -739,10 +882,23 @@ def _combine_lines(self, lines) -> str: f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' ) - def read(self): + @overload + def read(self: JsonReader[Literal["frame"]]) -> DataFrame: + ... + + @overload + def read(self: JsonReader[Literal["series"]]) -> Series: + ... + + @overload + def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: + ... + + def read(self) -> DataFrame | Series: """ Read the whole JSON input into a pandas object. """ + obj: DataFrame | Series if self.lines: if self.chunksize: obj = concat(self) @@ -759,7 +915,7 @@ def read(self): self.close() return obj - def _get_object_parser(self, json): + def _get_object_parser(self, json) -> DataFrame | Series: """ Parses a json document into a pandas object. """ @@ -786,7 +942,7 @@ def _get_object_parser(self, json): return obj - def close(self): + def close(self) -> None: """ If we opened a stream earlier, in _get_data_from_filepath, we should close it. @@ -796,7 +952,22 @@ def close(self): if self.handles is not None: self.handles.close() - def __next__(self): + def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]: + return self + + @overload + def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame: + ... + + @overload + def __next__(self: JsonReader[Literal["series"]]) -> Series: + ... + + @overload + def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: + ... + + def __next__(self) -> DataFrame | Series: if self.nrows: if self.nrows_seen >= self.nrows: self.close() @@ -816,10 +987,10 @@ def __next__(self): self.close() raise StopIteration - def __enter__(self): + def __enter__(self) -> JsonReader[FrameSeriesStrT]: return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() @@ -875,7 +1046,7 @@ def __init__( self.keep_default_dates = keep_default_dates self.obj: DataFrame | Series | None = None - def check_keys_split(self, decoded): + def check_keys_split(self, decoded) -> None: """ Checks that dict has only the appropriate keys for orient='split'. """ diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index c630f0d7613e0..44c5ce0e5ee83 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -115,8 +115,9 @@ def set_default_names(data): return data -def convert_pandas_type_to_json_field(arr): +def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]: dtype = arr.dtype + name: JSONSerializable if arr.name is None: name = "values" else: @@ -141,7 +142,7 @@ def convert_pandas_type_to_json_field(arr): return field -def convert_json_field_to_pandas_type(field): +def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: """ Converts a JSON field descriptor into its corresponding NumPy / pandas type From a5ac19a1d828274ff04aed6afdfca73617f1c90e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 26 Jun 2022 21:51:38 -0400 Subject: [PATCH 2/3] flake8 --- pandas/io/formats/format.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4baf7d0bf855f..08deafce3af98 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -27,7 +27,6 @@ Mapping, Sequence, cast, - overload, ) from unicodedata import east_asian_width From 7f13d592f6ae8fe6d7ae174be57ef141bae98c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 27 Jun 2022 16:48:19 -0400 Subject: [PATCH 3/3] explicitly check whether width is None --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66361a7d143f7..79ed8c7788628 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1008,7 +1008,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: # used by repr_html under IPython notebook or scripts ignore terminal # dims - if ignore_width or not console.in_interactive_session(): + if ignore_width or width is None or not console.in_interactive_session(): return True if get_option("display.width") is not None or console.in_ipython_frontend(): @@ -1036,8 +1036,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: value = buf.getvalue() repr_width = max(len(line) for line in value.split("\n")) - # error: Unsupported operand types for < ("int" and "None") - return repr_width < width # type: ignore[operator] + return repr_width < width def _info_repr(self) -> bool: """