Skip to content

Commit d76bcea

Browse files
authored
Merge branch 'main' into test-build-test
2 parents d9f8597 + 083d030 commit d76bcea

File tree

15 files changed

+80
-23
lines changed

15 files changed

+80
-23
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ RUN apt-get install -y build-essential
88
RUN apt-get install -y libhdf5-dev
99

1010
RUN python -m pip install --upgrade pip
11-
RUN python -m pip install --use-deprecated=legacy-resolver \
11+
RUN python -m pip install \
1212
-r https://github.com/raw/pandas-dev/pandas/main/requirements-dev.txt
1313
CMD ["/bin/bash"]

doc/source/whatsnew/v2.0.0.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
3838
* :func:`read_csv`
3939
* :func:`read_excel`
4040
* :func:`read_sql`
41+
* :func:`read_sql_query`
42+
* :func:`read_sql_table`
4143

4244
Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
4345
to select the nullable dtypes implementation.
@@ -394,7 +396,7 @@ If installed, we now require:
394396
+-----------------+-----------------+----------+---------+
395397
| Package | Minimum Version | Required | Changed |
396398
+=================+=================+==========+=========+
397-
| mypy (dev) | 0.990 | | X |
399+
| mypy (dev) | 0.991 | | X |
398400
+-----------------+-----------------+----------+---------+
399401
| python-dateutil | 2.8.2 | X | X |
400402
+-----------------+-----------------+----------+---------+
@@ -836,6 +838,7 @@ Indexing
836838
- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
837839
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
838840
- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
841+
- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
839842
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
840843
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
841844
- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
@@ -879,7 +882,7 @@ I/O
879882
- Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
880883
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
881884
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
882-
-
885+
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
883886

884887
Period
885888
^^^^^^

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ dependencies:
8080
- flake8=6.0.0
8181
- flake8-bugbear=22.7.1 # used by flake8, find likely bugs
8282
- isort>=5.2.1 # check that imports are in the right order
83-
- mypy=0.990
83+
- mypy=0.991
8484
- pre-commit>=2.15.0
8585
- pycodestyle # used by flake8
8686
- pyupgrade

pandas/_libs/src/ujson/python/objToJSON.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,18 @@ static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
332332
return PyBytes_AS_STRING(obj);
333333
}
334334

335-
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
335+
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
336336
size_t *_outLen) {
337-
return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
337+
char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj,
338+
(Py_ssize_t *)_outLen);
339+
if (encoded == NULL) {
340+
/* Something went wrong.
341+
Set errorMsg(to tell encoder to stop),
342+
and let Python exception propagate. */
343+
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
344+
enc->errorMsg = "Encoding failed.";
345+
}
346+
return encoded;
338347
}
339348

340349
/* JSON callback. returns a char* and mutates the pointer to *len */

pandas/core/indexes/base.py

+5
Original file line numberDiff line numberDiff line change
@@ -6158,6 +6158,11 @@ def _maybe_cast_slice_bound(self, label, side: str_t):
61586158
# We are a plain index here (sub-class override this method if they
61596159
# wish to have special treatment for floats/ints, e.g. Float64Index and
61606160
# datetimelike Indexes
6161+
# Special case numeric EA Indexes, since they are not handled by NumericIndex
6162+
6163+
if is_extension_array_dtype(self.dtype) and is_numeric_dtype(self.dtype):
6164+
return self._maybe_cast_indexer(label)
6165+
61616166
# reject them, if index does not contain label
61626167
if (is_float(label) or is_integer(label)) and label not in self:
61636168
self._raise_invalid_indexer("slice", label)

pandas/io/formats/latex.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,7 @@ def _get_strcols(self) -> list[list[str]]:
160160
def pad_empties(x):
161161
for pad in reversed(x):
162162
if pad:
163-
break
164-
return [x[0]] + [i if i else " " * len(pad) for i in x[1:]]
163+
return [x[0]] + [i if i else " " * len(pad) for i in x[1:]]
165164

166165
gen = (pad_empties(i) for i in out)
167166

pandas/io/formats/printing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,9 @@ def best_len(values: list[str]) -> int:
417417
for max_items in reversed(range(1, len(value) + 1)):
418418
pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
419419
if len(pprinted_seq) < max_space:
420+
head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
421+
tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
420422
break
421-
head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
422-
tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
423423

424424
summary = ""
425425
line = space2

pandas/io/formats/style.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2304,8 +2304,8 @@ def set_sticky(
23042304
"selector": f"thead tr:nth-child({obj.nlevels+1}) th",
23052305
"props": props
23062306
+ (
2307-
f"top:{(i+1) * pixel_size}px; height:{pixel_size}px; "
2308-
"z-index:2;"
2307+
f"top:{(len(levels_)) * pixel_size}px; "
2308+
f"height:{pixel_size}px; z-index:2;"
23092309
),
23102310
}
23112311
)

pandas/io/sql.py

+20
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ def read_sql_table(
224224
parse_dates: list[str] | dict[str, str] | None = ...,
225225
columns: list[str] | None = ...,
226226
chunksize: None = ...,
227+
use_nullable_dtypes: bool = ...,
227228
) -> DataFrame:
228229
...
229230

@@ -238,6 +239,7 @@ def read_sql_table(
238239
parse_dates: list[str] | dict[str, str] | None = ...,
239240
columns: list[str] | None = ...,
240241
chunksize: int = ...,
242+
use_nullable_dtypes: bool = ...,
241243
) -> Iterator[DataFrame]:
242244
...
243245

@@ -251,6 +253,7 @@ def read_sql_table(
251253
parse_dates: list[str] | dict[str, str] | None = None,
252254
columns: list[str] | None = None,
253255
chunksize: int | None = None,
256+
use_nullable_dtypes: bool = False,
254257
) -> DataFrame | Iterator[DataFrame]:
255258
"""
256259
Read SQL database table into a DataFrame.
@@ -287,6 +290,12 @@ def read_sql_table(
287290
chunksize : int, default None
288291
If specified, returns an iterator where `chunksize` is the number of
289292
rows to include in each chunk.
293+
use_nullable_dtypes : bool = False
294+
Whether to use nullable dtypes as default when reading data. If
295+
set to True, nullable dtypes are used for all dtypes that have a nullable
296+
implementation, even if no nulls are present.
297+
298+
.. versionadded:: 2.0
290299
291300
Returns
292301
-------
@@ -318,6 +327,7 @@ def read_sql_table(
318327
parse_dates=parse_dates,
319328
columns=columns,
320329
chunksize=chunksize,
330+
use_nullable_dtypes=use_nullable_dtypes,
321331
)
322332

323333
if table is not None:
@@ -336,6 +346,7 @@ def read_sql_query(
336346
parse_dates: list[str] | dict[str, str] | None = ...,
337347
chunksize: None = ...,
338348
dtype: DtypeArg | None = ...,
349+
use_nullable_dtypes: bool = ...,
339350
) -> DataFrame:
340351
...
341352

@@ -350,6 +361,7 @@ def read_sql_query(
350361
parse_dates: list[str] | dict[str, str] | None = ...,
351362
chunksize: int = ...,
352363
dtype: DtypeArg | None = ...,
364+
use_nullable_dtypes: bool = ...,
353365
) -> Iterator[DataFrame]:
354366
...
355367

@@ -363,6 +375,7 @@ def read_sql_query(
363375
parse_dates: list[str] | dict[str, str] | None = None,
364376
chunksize: int | None = None,
365377
dtype: DtypeArg | None = None,
378+
use_nullable_dtypes: bool = False,
366379
) -> DataFrame | Iterator[DataFrame]:
367380
"""
368381
Read SQL query into a DataFrame.
@@ -406,6 +419,12 @@ def read_sql_query(
406419
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
407420
408421
.. versionadded:: 1.3.0
422+
use_nullable_dtypes : bool = False
423+
Whether to use nullable dtypes as default when reading data. If
424+
set to True, nullable dtypes are used for all dtypes that have a nullable
425+
implementation, even if no nulls are present.
426+
427+
.. versionadded:: 2.0
409428
410429
Returns
411430
-------
@@ -430,6 +449,7 @@ def read_sql_query(
430449
parse_dates=parse_dates,
431450
chunksize=chunksize,
432451
dtype=dtype,
452+
use_nullable_dtypes=use_nullable_dtypes,
433453
)
434454

435455

pandas/tests/frame/test_iteration.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,4 @@ def test_sequence_like_with_categorical(self):
159159
str(s)
160160

161161
for c, col in df.items():
162-
str(s)
162+
str(col)

pandas/tests/io/json/test_ujson.py

+9
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ def test_encode_unicode_4bytes_utf8highest(self):
291291
assert enc == json.dumps(four_bytes_input)
292292
assert dec == json.loads(enc)
293293

294+
def test_encode_unicode_error(self):
295+
string = "'\udac0'"
296+
msg = (
297+
r"'utf-8' codec can't encode character '\\udac0' "
298+
r"in position 1: surrogates not allowed"
299+
)
300+
with pytest.raises(UnicodeEncodeError, match=msg):
301+
ujson.dumps([string])
302+
294303
def test_encode_array_in_array(self):
295304
arr_in_arr_input = [[[[]]]]
296305
output = ujson.encode(arr_in_arr_input)

pandas/tests/io/test_sql.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -2276,21 +2276,22 @@ def test_get_engine_auto_error_message(self):
22762276
pass
22772277
# TODO(GH#36893) fill this in when we add more engines
22782278

2279-
def test_read_sql_nullable_dtypes(self, string_storage):
2279+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
2280+
def test_read_sql_nullable_dtypes(self, string_storage, func):
22802281
# GH#50048
22812282
table = "test"
22822283
df = self.nullable_data()
22832284
df.to_sql(table, self.conn, index=False, if_exists="replace")
22842285

22852286
with pd.option_context("mode.string_storage", string_storage):
2286-
result = pd.read_sql(
2287+
result = getattr(pd, func)(
22872288
f"Select * from {table}", self.conn, use_nullable_dtypes=True
22882289
)
22892290
expected = self.nullable_expected(string_storage)
22902291
tm.assert_frame_equal(result, expected)
22912292

22922293
with pd.option_context("mode.string_storage", string_storage):
2293-
iterator = pd.read_sql(
2294+
iterator = getattr(pd, func)(
22942295
f"Select * from {table}",
22952296
self.conn,
22962297
use_nullable_dtypes=True,
@@ -2300,20 +2301,21 @@ def test_read_sql_nullable_dtypes(self, string_storage):
23002301
for result in iterator:
23012302
tm.assert_frame_equal(result, expected)
23022303

2303-
def test_read_sql_nullable_dtypes_table(self, string_storage):
2304+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
2305+
def test_read_sql_nullable_dtypes_table(self, string_storage, func):
23042306
# GH#50048
23052307
table = "test"
23062308
df = self.nullable_data()
23072309
df.to_sql(table, self.conn, index=False, if_exists="replace")
23082310

23092311
with pd.option_context("mode.string_storage", string_storage):
2310-
result = pd.read_sql(table, self.conn, use_nullable_dtypes=True)
2312+
result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True)
23112313
expected = self.nullable_expected(string_storage)
23122314
tm.assert_frame_equal(result, expected)
23132315

23142316
with pd.option_context("mode.string_storage", string_storage):
2315-
iterator = pd.read_sql(
2316-
f"Select * from {table}",
2317+
iterator = getattr(pd, func)(
2318+
table,
23172319
self.conn,
23182320
use_nullable_dtypes=True,
23192321
chunksize=3,
@@ -2463,7 +2465,8 @@ class Test(BaseModel):
24632465
def nullable_expected(self, storage) -> DataFrame:
24642466
return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
24652467

2466-
def test_read_sql_nullable_dtypes_table(self, string_storage):
2468+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
2469+
def test_read_sql_nullable_dtypes_table(self, string_storage, func):
24672470
# GH#50048 Not supported for sqlite
24682471
pass
24692472

pandas/tests/series/indexing/test_indexing.py

+9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pandas import (
1111
NA,
1212
DataFrame,
13+
Index,
1314
IndexSlice,
1415
MultiIndex,
1516
Series,
@@ -366,6 +367,14 @@ def test_loc_setitem_nested_data_enlargement():
366367
tm.assert_series_equal(ser, expected)
367368

368369

370+
def test_loc_ea_numeric_index_oob_slice_end():
371+
# GH#50161
372+
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
373+
result = ser.loc[2:3]
374+
expected = Series(1, index=Index([2], dtype="Int64"))
375+
tm.assert_series_equal(result, expected)
376+
377+
369378
def test_getitem_bool_int_key():
370379
# GH#48653
371380
ser = Series({True: 1, False: 0})

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ disable = [
217217
"too-many-public-methods",
218218
"too-many-return-statements",
219219
"too-many-statements",
220+
"undefined-loop-variable",
220221
"unexpected-keyword-arg",
221222
"ungrouped-imports",
222223
"unsubscriptable-object",
@@ -276,7 +277,6 @@ disable = [
276277
"signature-differs",
277278
"super-init-not-called",
278279
"try-except-raise",
279-
"undefined-loop-variable",
280280
"unnecessary-lambda",
281281
"unspecified-encoding",
282282
"unused-argument",

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ cpplint
5757
flake8==6.0.0
5858
flake8-bugbear==22.7.1
5959
isort>=5.2.1
60-
mypy==0.990
60+
mypy==0.991
6161
pre-commit>=2.15.0
6262
pycodestyle
6363
pyupgrade

0 commit comments

Comments
 (0)