Skip to content

Commit fb6bb7e

Browse files
committed
Changing pybind11::str to only hold PyUnicodeObject (NOT also bytes).
The corresponding behavior changes are captured by changes in the tests. A significant effort was made to keep the test diffs minimal but also comprehensive and easy to read. Note: Unlike PR #2256 (dropped), this PR only changes exactly one behavior. The two other behavior changes discussed under PR #2256 are avoided here (1. disabling implicit decoding from bytes to unicode; 2. list_caster behavior change). Based on this PR, those can be easily implemented if and when desired.
1 parent c7cc5fd commit fb6bb7e

File tree

4 files changed

+28
-12
lines changed

4 files changed

+28
-12
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,14 @@ struct pyobject_caster {
16311631

16321632
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16331633
bool load(handle src, bool /* convert */) {
1634+
#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
1635+
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
1636+
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
1637+
if (!str_from_bytes) throw error_already_set();
1638+
value = reinterpret_steal<type>(str_from_bytes);
1639+
return true;
1640+
}
1641+
#endif
16341642
if (!isinstance<type>(src))
16351643
return false;
16361644
value = reinterpret_borrow<type>(src);

include/pybind11/pytypes.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) {
738738
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
739739
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
740740

741-
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
742-
743741
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
744742

745743
class kwargs_proxy : public handle {
@@ -885,7 +883,7 @@ class bytes;
885883

886884
class str : public object {
887885
public:
888-
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
886+
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)
889887

890888
str(const char *c, size_t n)
891889
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {

include/pybind11/stl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
144144
using value_conv = make_caster<Value>;
145145

146146
bool load(handle src, bool convert) {
147-
if (!isinstance<sequence>(src) || isinstance<str>(src))
147+
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
148148
return false;
149149
auto s = reinterpret_borrow<sequence>(src);
150150
value.clear();

tests/test_pytypes.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -254,13 +254,21 @@ def test_pybind11_str_raw_str():
254254
valid_orig = u"DZ"
255255
valid_utf8 = valid_orig.encode("utf-8")
256256
valid_cvt = cvt(valid_utf8)
257-
assert type(valid_cvt) == bytes # Probably surprising.
258-
assert valid_cvt == b'\xc7\xb1'
257+
assert type(valid_cvt) == type(u"") # Py2 unicode, Py3 str, flake8 compatible
258+
if str is bytes:
259+
assert valid_cvt == valid_orig
260+
else:
261+
assert valid_cvt == u"b'\\xc7\\xb1'"
259262

260263
malformed_utf8 = b'\x80'
261-
malformed_cvt = cvt(malformed_utf8)
262-
assert type(malformed_cvt) == bytes # Probably surprising.
263-
assert malformed_cvt == b'\x80'
264+
if str is bytes:
265+
with pytest.raises(UnicodeDecodeError) as excinfo:
266+
cvt(malformed_utf8)
267+
assert "invalid start byte" in str(excinfo)
268+
else:
269+
malformed_cvt = cvt(malformed_utf8)
270+
assert type(valid_cvt) == type(u"")
271+
assert malformed_cvt == u"b'\\x80'"
264272

265273

266274
def test_implicit_casting():
@@ -397,19 +405,21 @@ def test_isinstance_string_types():
397405
assert not m.isinstance_pybind11_bytes(u"")
398406

399407
assert m.isinstance_pybind11_str(u"")
400-
assert m.isinstance_pybind11_str(b"") # Probably surprising.
408+
assert not m.isinstance_pybind11_str(b"")
401409

402410

403411
def test_pass_bytes_or_unicode_to_string_types():
404412
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
405413
with pytest.raises(TypeError):
406414
m.pass_to_pybind11_bytes(u"Str") # NO implicit encode
407415

408-
assert m.pass_to_pybind11_str(b"Bytes") == 5
416+
assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode
409417
assert m.pass_to_pybind11_str(u"Str") == 3
410418

411419
assert m.pass_to_std_string(b"Bytes") == 5
412420
assert m.pass_to_std_string(u"Str") == 3
413421

414422
malformed_utf8 = b"\x80"
415-
assert m.pass_to_pybind11_str(malformed_utf8) == 1 # NO decoding error
423+
with pytest.raises(UnicodeDecodeError) as excinfo:
424+
m.pass_to_pybind11_str(malformed_utf8)
425+
assert 'invalid start byte' in str(excinfo.value)

0 commit comments

Comments
 (0)