Skip to content

Commit 1d553e4

Browse files
committed
Changing pybind11::str to only hold PyUnicodeObject (NOT also bytes).
The corresponding behavior changes are captured by changes in the tests. A significant effort was made to keep the test diffs minimal but also comprehensive and easy to read. Note: Unlike PR #2256 (dropped), this PR only changes exactly one behavior. The two other behavior changes discussed under PR #2256 are avoided here (1. disabling implicit decoding from bytes to unicode; 2. list_caster behavior change). Based on this PR, those can be easily implemented if and when desired.
1 parent 299b46e commit 1d553e4

File tree

4 files changed

+28
-14
lines changed

4 files changed

+28
-14
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,14 @@ struct pyobject_caster {
16311631

16321632
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16331633
bool load(handle src, bool /* convert */) {
1634+
#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
1635+
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
1636+
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
1637+
if (!str_from_bytes) throw error_already_set();
1638+
value = reinterpret_steal<type>(str_from_bytes);
1639+
return true;
1640+
}
1641+
#endif
16341642
if (!isinstance<type>(src))
16351643
return false;
16361644
value = reinterpret_borrow<type>(src);

include/pybind11/pytypes.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) {
738738
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
739739
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
740740

741-
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
742-
743741
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
744742

745743
class kwargs_proxy : public handle {
@@ -885,7 +883,7 @@ class bytes;
885883

886884
class str : public object {
887885
public:
888-
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
886+
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)
889887

890888
str(const char *c, size_t n)
891889
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {

include/pybind11/stl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
144144
using value_conv = make_caster<Value>;
145145

146146
bool load(handle src, bool convert) {
147-
if (!isinstance<sequence>(src) || isinstance<str>(src))
147+
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
148148
return false;
149149
auto s = reinterpret_borrow<sequence>(src);
150150
value.clear();

tests/test_pytypes.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def test_constructors():
190190
"""C++ default and converting constructors are equivalent to type calls in Python"""
191191
types = [bytes, str, bool, int, float, tuple, list, dict, set]
192192
expected = {t.__name__: t() for t in types}
193-
if str is bytes: # Python 2.
193+
if pytest.PY2:
194194
# Note that bytes.__name__ == 'str' in Python 2.
195195
# pybind11::str is unicode even under Python 2.
196196
expected["bytes"] = bytes()
@@ -211,7 +211,7 @@ def test_constructors():
211211
}
212212
inputs = {k.__name__: v for k, v in data.items()}
213213
expected = {k.__name__: k(v) for k, v in data.items()}
214-
if str is bytes: # Similar to the above. See comments above.
214+
if pytest.PY2: # Similar to the above. See comments above.
215215
inputs["bytes"] = b'41'
216216
inputs["str"] = 42
217217
expected["bytes"] = b'41'
@@ -254,13 +254,20 @@ def test_pybind11_str_raw_str():
254254
valid_orig = u"DZ"
255255
valid_utf8 = valid_orig.encode("utf-8")
256256
valid_cvt = cvt(valid_utf8)
257-
assert type(valid_cvt) == bytes # Probably surprising.
258-
assert valid_cvt == b'\xc7\xb1'
257+
assert type(valid_cvt) is unicode if pytest.PY2 else str # noqa: F821
258+
if pytest.PY2:
259+
assert valid_cvt == valid_orig
260+
else:
261+
assert valid_cvt == u"b'\\xc7\\xb1'"
259262

260263
malformed_utf8 = b'\x80'
261-
malformed_cvt = cvt(malformed_utf8)
262-
assert type(malformed_cvt) == bytes # Probably surprising.
263-
assert malformed_cvt == b'\x80'
264+
if pytest.PY2:
265+
with pytest.raises(UnicodeDecodeError):
266+
cvt(malformed_utf8)
267+
else:
268+
malformed_cvt = cvt(malformed_utf8)
269+
assert type(malformed_cvt) is unicode if pytest.PY2 else str # noqa: F821
270+
assert malformed_cvt == u"b'\\x80'"
264271

265272

266273
def test_implicit_casting():
@@ -397,19 +404,20 @@ def test_isinstance_string_types():
397404
assert not m.isinstance_pybind11_bytes(u"")
398405

399406
assert m.isinstance_pybind11_str(u"")
400-
assert m.isinstance_pybind11_str(b"") # Probably surprising.
407+
assert not m.isinstance_pybind11_str(b"")
401408

402409

403410
def test_pass_bytes_or_unicode_to_string_types():
404411
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
405412
with pytest.raises(TypeError):
406413
m.pass_to_pybind11_bytes(u"Str") # NO implicit encode
407414

408-
assert m.pass_to_pybind11_str(b"Bytes") == 5
415+
assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode
409416
assert m.pass_to_pybind11_str(u"Str") == 3
410417

411418
assert m.pass_to_std_string(b"Bytes") == 5
412419
assert m.pass_to_std_string(u"Str") == 3
413420

414421
malformed_utf8 = b"\x80"
415-
assert m.pass_to_pybind11_str(malformed_utf8) == 1 # NO decoding error
422+
with pytest.raises(UnicodeDecodeError):
423+
m.pass_to_pybind11_str(malformed_utf8)

0 commit comments

Comments
 (0)