Skip to content

Commit 61882fb

Browse files
committed
Changing pybind11::str to only hold PyUnicodeObject (NOT also bytes).
The corresponding behavior changes are captured by changes in the tests. A significant effort was made to keep the test diffs minimal but also comprehensive and easy to read. Note: Unlike PR #2256 (dropped), this PR only changes exactly one behavior. The two other behavior changes discussed under PR #2256 are avoided here (1. disabling implicit decoding from bytes to unicode; 2. list_caster behavior change). Based on this PR, those can be easily implemented if and when desired.
1 parent 8e71f89 commit 61882fb

File tree

4 files changed

+28
-14
lines changed

4 files changed

+28
-14
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,14 @@ struct pyobject_caster {
16311631

16321632
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16331633
bool load(handle src, bool /* convert */) {
1634+
#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
1635+
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
1636+
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
1637+
if (!str_from_bytes) throw error_already_set();
1638+
value = reinterpret_steal<type>(str_from_bytes);
1639+
return true;
1640+
}
1641+
#endif
16341642
if (!isinstance<type>(src))
16351643
return false;
16361644
value = reinterpret_borrow<type>(src);

include/pybind11/pytypes.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,6 @@ inline bool PyIterable_Check(PyObject *obj) {
752752
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
753753
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
754754

755-
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
756-
757755
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
758756

759757
class kwargs_proxy : public handle {
@@ -899,7 +897,7 @@ class bytes;
899897

900898
class str : public object {
901899
public:
902-
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
900+
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)
903901

904902
str(const char *c, size_t n)
905903
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {

include/pybind11/stl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
144144
using value_conv = make_caster<Value>;
145145

146146
bool load(handle src, bool convert) {
147-
if (!isinstance<sequence>(src) || isinstance<str>(src))
147+
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
148148
return false;
149149
auto s = reinterpret_borrow<sequence>(src);
150150
value.clear();

tests/test_pytypes.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_constructors():
192192
"""C++ default and converting constructors are equivalent to type calls in Python"""
193193
types = [bytes, str, bool, int, float, tuple, list, dict, set]
194194
expected = {t.__name__: t() for t in types}
195-
if str is bytes: # Python 2.
195+
if pytest.PY2:
196196
# Note that bytes.__name__ == 'str' in Python 2.
197197
# pybind11::str is unicode even under Python 2.
198198
expected["bytes"] = bytes()
@@ -213,7 +213,7 @@ def test_constructors():
213213
}
214214
inputs = {k.__name__: v for k, v in data.items()}
215215
expected = {k.__name__: k(v) for k, v in data.items()}
216-
if str is bytes: # Similar to the above. See comments above.
216+
if pytest.PY2: # Similar to the above. See comments above.
217217
inputs["bytes"] = b'41'
218218
inputs["str"] = 42
219219
expected["bytes"] = b'41'
@@ -256,13 +256,20 @@ def test_pybind11_str_raw_str():
256256
valid_orig = u"DZ"
257257
valid_utf8 = valid_orig.encode("utf-8")
258258
valid_cvt = cvt(valid_utf8)
259-
assert type(valid_cvt) == bytes # Probably surprising.
260-
assert valid_cvt == b'\xc7\xb1'
259+
assert type(valid_cvt) is unicode if pytest.PY2 else str # noqa: F821
260+
if pytest.PY2:
261+
assert valid_cvt == valid_orig
262+
else:
263+
assert valid_cvt == u"b'\\xc7\\xb1'"
261264

262265
malformed_utf8 = b'\x80'
263-
malformed_cvt = cvt(malformed_utf8)
264-
assert type(malformed_cvt) == bytes # Probably surprising.
265-
assert malformed_cvt == b'\x80'
266+
if pytest.PY2:
267+
with pytest.raises(UnicodeDecodeError):
268+
cvt(malformed_utf8)
269+
else:
270+
malformed_cvt = cvt(malformed_utf8)
271+
assert type(malformed_cvt) is unicode if pytest.PY2 else str # noqa: F821
272+
assert malformed_cvt == u"b'\\x80'"
266273

267274

268275
def test_implicit_casting():
@@ -397,19 +404,20 @@ def test_isinstance_string_types():
397404
assert not m.isinstance_pybind11_bytes(u"")
398405

399406
assert m.isinstance_pybind11_str(u"")
400-
assert m.isinstance_pybind11_str(b"") # Probably surprising.
407+
assert not m.isinstance_pybind11_str(b"")
401408

402409

403410
def test_pass_bytes_or_unicode_to_string_types():
404411
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
405412
with pytest.raises(TypeError):
406413
m.pass_to_pybind11_bytes(u"Str") # NO implicit encode
407414

408-
assert m.pass_to_pybind11_str(b"Bytes") == 5
415+
assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode
409416
assert m.pass_to_pybind11_str(u"Str") == 3
410417

411418
assert m.pass_to_std_string(b"Bytes") == 5
412419
assert m.pass_to_std_string(u"Str") == 3
413420

414421
malformed_utf8 = b"\x80"
415-
assert m.pass_to_pybind11_str(malformed_utf8) == 1 # NO decoding error
422+
with pytest.raises(UnicodeDecodeError):
423+
m.pass_to_pybind11_str(malformed_utf8)

0 commit comments

Comments
 (0)