Skip to content

Commit 04307e7

Browse files
authored
STY: Fix doctest and docstring formatting errors (#56408)
* STY: Fix doctest and docstring formatting errors * ensure stderr is output too * Fix more failures * Don't add redirects for single page, fix example * A few more * Remove e flag
1 parent 1ab4d03 commit 04307e7

File tree

14 files changed

+76
-75
lines changed

14 files changed

+76
-75
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
1515
# $ ./ci/code_checks.sh notebooks # check execution of documentation notebooks
1616

17+
set -uo pipefail
18+
1719
[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
1820
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
1921

doc/make.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,9 @@ def html(self):
236236
os.remove(zip_fname)
237237

238238
if ret_code == 0:
239-
if self.single_doc_html is not None and not self.no_browser:
240-
self._open_browser(self.single_doc_html)
239+
if self.single_doc_html is not None:
240+
if not self.no_browser:
241+
self._open_browser(self.single_doc_html)
241242
else:
242243
self._add_redirects()
243244
if self.whatsnew and not self.no_browser:

pandas/core/arrays/sparse/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
270270
Examples
271271
--------
272272
>>> import scipy.sparse
273-
>>> mat = scipy.sparse.eye(3)
273+
>>> mat = scipy.sparse.eye(3, dtype=float)
274274
>>> pd.DataFrame.sparse.from_spmatrix(mat)
275275
0 1 2
276276
0 1.0 0.0 0.0

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3955,7 +3955,7 @@ def to_csv(
39553955
>>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],
39563956
... 'mask': ['red', 'purple'],
39573957
... 'weapon': ['sai', 'bo staff']}})
3958-
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP
3958+
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP
39593959
39603960
Create 'out.zip' containing 'out.csv'
39613961
@@ -8972,7 +8972,7 @@ def clip(
89728972
89738973
Clips using specific lower and upper thresholds per column:
89748974
8975-
>>> df.clip([-2, -1], [4,5])
8975+
>>> df.clip([-2, -1], [4, 5])
89768976
col_0 col_1
89778977
0 4 -1
89788978
1 -2 -1

pandas/core/groupby/generic.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -470,10 +470,9 @@ def _aggregate_named(self, func, *args, **kwargs):
470470

471471
__examples_series_doc = dedent(
472472
"""
473-
>>> ser = pd.Series(
474-
... [390.0, 350.0, 30.0, 20.0],
475-
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
476-
... name="Max Speed")
473+
>>> ser = pd.Series([390.0, 350.0, 30.0, 20.0],
474+
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
475+
... name="Max Speed")
477476
>>> grouped = ser.groupby([1, 1, 2, 2])
478477
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
479478
Falcon 0.707107
@@ -1331,14 +1330,10 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
13311330
"""
13321331
Examples
13331332
--------
1334-
>>> df = pd.DataFrame(
1335-
... {
1336-
... "A": [1, 1, 2, 2],
1333+
>>> data = {"A": [1, 1, 2, 2],
13371334
... "B": [1, 2, 3, 4],
1338-
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
1339-
... }
1340-
... )
1341-
1335+
... "C": [0.362838, 0.227877, 1.267767, -0.562860]}
1336+
>>> df = pd.DataFrame(data)
13421337
>>> df
13431338
A B C
13441339
0 1 1 0.362838
@@ -1393,7 +1388,8 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
13931388
13941389
>>> df.groupby("A").agg(
13951390
... b_min=pd.NamedAgg(column="B", aggfunc="min"),
1396-
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
1391+
... c_sum=pd.NamedAgg(column="C", aggfunc="sum")
1392+
... )
13971393
b_min c_sum
13981394
A
13991395
1 1 0.590715
@@ -2154,7 +2150,7 @@ def idxmax(
21542150
21552151
>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
21562152
... 'co2_emissions': [37.2, 19.66, 1712]},
2157-
... index=['Pork', 'Wheat Products', 'Beef'])
2153+
... index=['Pork', 'Wheat Products', 'Beef'])
21582154
21592155
>>> df
21602156
consumption co2_emissions
@@ -2236,7 +2232,7 @@ def idxmin(
22362232
22372233
>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
22382234
... 'co2_emissions': [37.2, 19.66, 1712]},
2239-
... index=['Pork', 'Wheat Products', 'Beef'])
2235+
... index=['Pork', 'Wheat Products', 'Beef'])
22402236
22412237
>>> df
22422238
consumption co2_emissions
@@ -2319,9 +2315,9 @@ def value_counts(
23192315
Examples
23202316
--------
23212317
>>> df = pd.DataFrame({
2322-
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
2323-
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
2324-
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
2318+
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
2319+
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
2320+
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
23252321
... })
23262322
23272323
>>> df

pandas/core/groupby/groupby.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ class providing the base-class of operations.
232232
""",
233233
"dataframe_examples": """
234234
>>> df = pd.DataFrame({'A': 'a a b'.split(),
235-
... 'B': [1,2,3],
236-
... 'C': [4,6,5]})
235+
... 'B': [1, 2, 3],
236+
... 'C': [4, 6, 5]})
237237
>>> g1 = df.groupby('A', group_keys=False)
238238
>>> g2 = df.groupby('A', group_keys=True)
239239
@@ -313,7 +313,7 @@ class providing the base-class of operations.
313313
314314
The resulting dtype will reflect the return value of the passed ``func``.
315315
316-
>>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2)
316+
>>> g1.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
317317
a 0.0
318318
a 2.0
319319
b 1.0
@@ -322,7 +322,7 @@ class providing the base-class of operations.
322322
In the above, the groups are not part of the index. We can have them included
323323
by using ``g2`` where ``group_keys=True``:
324324
325-
>>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2)
325+
>>> g2.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
326326
a a 0.0
327327
a 2.0
328328
b b 1.0
@@ -421,14 +421,18 @@ class providing the base-class of operations.
421421
functions that expect Series, DataFrames, GroupBy or Resampler objects.
422422
Instead of writing
423423
424-
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
424+
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
425+
>>> g = lambda x, arg1: x * 5 / arg1
426+
>>> f = lambda x: x ** 4
427+
>>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"])
428+
>>> h(g(f(df.groupby('group')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP
425429
426430
You can write
427431
428432
>>> (df.groupby('group')
429433
... .pipe(f)
430-
... .pipe(g, arg1=a)
431-
... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP
434+
... .pipe(g, arg1=1)
435+
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP
432436
433437
which is much more readable.
434438

pandas/core/indexes/multi.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,8 @@ def levels(self) -> FrozenList:
862862
Examples
863863
--------
864864
>>> index = pd.MultiIndex.from_product([['mammal'],
865-
... ('goat', 'human', 'cat', 'dog')], names=['Category', 'Animals'])
865+
... ('goat', 'human', 'cat', 'dog')],
866+
... names=['Category', 'Animals'])
866867
>>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])
867868
>>> leg_num
868869
Legs

pandas/core/resample.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ def fillna(self, method, limit: int | None = None):
859859
Missing values present before the upsampling are not affected.
860860
861861
>>> sm = pd.Series([1, None, 3],
862-
... index=pd.date_range('20180101', periods=3, freq='h'))
862+
... index=pd.date_range('20180101', periods=3, freq='h'))
863863
>>> sm
864864
2018-01-01 00:00:00 1.0
865865
2018-01-01 01:00:00 NaN
@@ -1028,21 +1028,16 @@ def interpolate(
10281028
Examples
10291029
--------
10301030
1031-
>>> import datetime as dt
1032-
>>> timesteps = [
1033-
... dt.datetime(2023, 3, 1, 7, 0, 0),
1034-
... dt.datetime(2023, 3, 1, 7, 0, 1),
1035-
... dt.datetime(2023, 3, 1, 7, 0, 2),
1036-
... dt.datetime(2023, 3, 1, 7, 0, 3),
1037-
... dt.datetime(2023, 3, 1, 7, 0, 4)]
1031+
>>> start = "2023-03-01T07:00:00"
1032+
>>> timesteps = pd.date_range(start, periods=5, freq="s")
10381033
>>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps)
10391034
>>> series
10401035
2023-03-01 07:00:00 1
10411036
2023-03-01 07:00:01 -1
10421037
2023-03-01 07:00:02 2
10431038
2023-03-01 07:00:03 1
10441039
2023-03-01 07:00:04 3
1045-
dtype: int64
1040+
Freq: s, dtype: int64
10461041
10471042
Upsample the dataframe to 0.5Hz by providing the period time of 2s.
10481043

pandas/core/shared_docs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@
797797
... 'B': ['a', 'b', 'c', 'd', 'e'],
798798
... 'C': ['f', 'g', 'h', 'i', 'j']}})
799799
800-
>>> df.replace(to_replace='^[a-g]', value = 'e', regex=True)
800+
>>> df.replace(to_replace='^[a-g]', value='e', regex=True)
801801
A B C
802802
0 0 e e
803803
1 1 e e
@@ -808,7 +808,7 @@
808808
If ``value`` is not ``None`` and `to_replace` is a dictionary, the dictionary
809809
keys will be the DataFrame columns that the replacement will be applied.
810810
811-
>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value = 'e', regex=True)
811+
>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value='e', regex=True)
812812
A B C
813813
0 0 e f
814814
1 1 e g

pandas/core/window/rolling.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,14 +2439,14 @@ def var(
24392439
create_section_header("Examples"),
24402440
dedent(
24412441
"""\
2442-
>>> ser = pd.Series([1, 5, 2, 7, 12, 6])
2442+
>>> ser = pd.Series([1, 5, 2, 7, 15, 6])
24432443
>>> ser.rolling(3).skew().round(6)
24442444
0 NaN
24452445
1 NaN
24462446
2 1.293343
24472447
3 -0.585583
2448-
4 0.000000
2449-
5 1.545393
2448+
4 0.670284
2449+
5 1.652317
24502450
dtype: float64
24512451
"""
24522452
),
@@ -2794,12 +2794,12 @@ def cov(
27942794
27952795
>>> v1 = [3, 3, 3, 5, 8]
27962796
>>> v2 = [3, 4, 4, 4, 8]
2797-
>>> # numpy returns a 2X2 array, the correlation coefficient
2798-
>>> # is the number at entry [0][1]
2799-
>>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")
2800-
0.333333
2801-
>>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")
2802-
0.916949
2797+
>>> np.corrcoef(v1[:-1], v2[:-1])
2798+
array([[1. , 0.33333333],
2799+
[0.33333333, 1. ]])
2800+
>>> np.corrcoef(v1[1:], v2[1:])
2801+
array([[1. , 0.9169493],
2802+
[0.9169493, 1. ]])
28032803
>>> s1 = pd.Series(v1)
28042804
>>> s2 = pd.Series(v2)
28052805
>>> s1.rolling(4).corr(s2)
@@ -2813,15 +2813,18 @@ def cov(
28132813
The below example shows a similar rolling calculation on a
28142814
DataFrame using the pairwise option.
28152815
2816-
>>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
2817-
[46., 31.], [50., 36.]])
2818-
>>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
2819-
[[1. 0.6263001]
2820-
[0.6263001 1. ]]
2821-
>>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
2822-
[[1. 0.5553681]
2823-
[0.5553681 1. ]]
2824-
>>> df = pd.DataFrame(matrix, columns=['X','Y'])
2816+
>>> matrix = np.array([[51., 35.],
2817+
... [49., 30.],
2818+
... [47., 32.],
2819+
... [46., 31.],
2820+
... [50., 36.]])
2821+
>>> np.corrcoef(matrix[:-1, 0], matrix[:-1, 1])
2822+
array([[1. , 0.6263001],
2823+
[0.6263001, 1. ]])
2824+
>>> np.corrcoef(matrix[1:, 0], matrix[1:, 1])
2825+
array([[1. , 0.55536811],
2826+
[0.55536811, 1. ]])
2827+
>>> df = pd.DataFrame(matrix, columns=['X', 'Y'])
28252828
>>> df
28262829
X Y
28272830
0 51.0 35.0

pandas/io/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ def read_sql(
680680
681681
pandas now supports reading via ADBC drivers
682682
683-
>>> from adbc_driver_postgresql import dbapi
683+
>>> from adbc_driver_postgresql import dbapi # doctest:+SKIP
684684
>>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP
685685
... pd.read_sql('SELECT int_column FROM test_data', conn)
686686
int_column

pandas/plotting/_core.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,10 @@ def hist_frame(
241241
.. plot::
242242
:context: close-figs
243243
244-
>>> df = pd.DataFrame({
245-
... 'length': [1.5, 0.5, 1.2, 0.9, 3],
246-
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
247-
... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
244+
>>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],
245+
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}
246+
>>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']
247+
>>> df = pd.DataFrame(data, index=index)
248248
>>> hist = df.hist(bins=3)
249249
"""
250250
plot_backend = _get_plot_backend(backend)
@@ -607,10 +607,10 @@ def boxplot_frame_groupby(
607607
>>> import itertools
608608
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
609609
>>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
610-
>>> data = np.random.randn(len(index),4)
610+
>>> data = np.random.randn(len(index), 4)
611611
>>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
612612
>>> grouped = df.groupby(level='lvl1')
613-
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
613+
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP
614614
615615
The ``subplots=False`` option shows the boxplots in a single figure.
616616
@@ -1400,9 +1400,7 @@ def hist(
14001400
.. plot::
14011401
:context: close-figs
14021402
1403-
>>> df = pd.DataFrame(
1404-
... np.random.randint(1, 7, 6000),
1405-
... columns = ['one'])
1403+
>>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
14061404
>>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
14071405
>>> ax = df.plot.hist(bins=12, alpha=0.5)
14081406

pandas/plotting/_misc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def bootstrap_plot(
439439
:context: close-figs
440440
441441
>>> s = pd.Series(np.random.uniform(size=100))
442-
>>> pd.plotting.bootstrap_plot(s)
442+
>>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
443443
<Figure size 640x480 with 6 Axes>
444444
"""
445445
plot_backend = _get_plot_backend("matplotlib")

scripts/validate_docstrings.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,12 @@ def validate_pep8(self):
228228
file.name,
229229
]
230230
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
231-
stdout = response.stdout
232-
stdout = stdout.replace(file.name, "")
233-
messages = stdout.strip("\n").splitlines()
234-
if messages:
235-
error_messages.extend(messages)
231+
for output in ("stdout", "stderr"):
232+
out = getattr(response, output)
233+
out = out.replace(file.name, "")
234+
messages = out.strip("\n").splitlines()
235+
if messages:
236+
error_messages.extend(messages)
236237
finally:
237238
file.close()
238239
os.unlink(file.name)

0 commit comments

Comments
 (0)