From 8633c4cefde8dcedb715a20cdd0aa9705ea94be7 Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Tue, 29 Sep 2015 17:57:49 -0300 Subject: [PATCH 1/4] Fix encoding errors for parametrized tests with unicode parameters in py2 Fix #1085 --- _pytest/python.py | 2 +- testing/python/metafunc.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/_pytest/python.py b/_pytest/python.py index 0ad18b3efde..4fbf35c5a8c 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1093,7 +1093,7 @@ def _idval(val, argname, idx, idfn): # convertible to ascii, return it as an str() object instead try: return str(val) - except UnicodeDecodeError: + except UnicodeError: # fallthrough pass return str(argname)+str(idx) diff --git a/testing/python/metafunc.py b/testing/python/metafunc.py index d0df62f81ed..3a05af9b705 100644 --- a/testing/python/metafunc.py +++ b/testing/python/metafunc.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import re import pytest, py @@ -118,6 +119,24 @@ class A: assert metafunc._calls[2].id == "x1-a" assert metafunc._calls[3].id == "x1-b" + @pytest.mark.skipif('sys.version_info[0] >= 3') + def test_unicode_idval_python2(self): + """unittest for the expected behavior to obtain ids for parametrized + unicode values in Python 2: if convertible to ascii, they should appear + as ascii values, otherwise fallback to hide the value behind the name + of the parametrized variable name. #1086 + """ + from _pytest.python import _idval + values = [ + (u'', ''), + (u'ascii', 'ascii'), + (u'ação', 'a6'), + (u'josé@blah.com', 'a6'), + (u'δοκ.ιμή@παράδειγμα.δοκιμή', 'a6'), + ] + for val, expected in values: + assert _idval(val, 'a', 6, None) == expected + @pytest.mark.issue250 def test_idmaker_autoname(self): from _pytest.python import idmaker From b64470443f7d06912b376e507498662d56508850 Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Tue, 29 Sep 2015 18:20:30 -0300 Subject: [PATCH 2/4] Fix SystemError when using unicode_escape on Python 3 Fix #1087 --- _pytest/python.py | 14 +++++++++----- testing/python/metafunc.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/_pytest/python.py b/_pytest/python.py index 4fbf35c5a8c..da41bf8e976 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1053,10 +1053,14 @@ def _escape_bytes(val): want to return escaped bytes for any byte, even if they match a utf-8 string. """ - # source: http://goo.gl/bGsnwC - import codecs - encoded_bytes, _ = codecs.escape_encode(val) - return encoded_bytes.decode('ascii') + if val: + # source: http://goo.gl/bGsnwC + import codecs + encoded_bytes, _ = codecs.escape_encode(val) + return encoded_bytes.decode('ascii') + else: + # empty bytes crashes codecs.escape_encode (#1087) + return '' else: def _escape_bytes(val): """ @@ -1064,7 +1068,7 @@ def _escape_bytes(val): is a full ascii string, otherwise escape it into its binary form. """ try: - return val.encode('ascii') + return val.decode('ascii') except UnicodeDecodeError: return val.encode('string-escape') diff --git a/testing/python/metafunc.py b/testing/python/metafunc.py index 3a05af9b705..111ca615ae1 100644 --- a/testing/python/metafunc.py +++ b/testing/python/metafunc.py @@ -137,6 +137,23 @@ def test_unicode_idval_python2(self): for val, expected in values: assert _idval(val, 'a', 6, None) == expected + def test_bytes_idval(self): + """unittest for the expected behavior to obtain ids for parametrized + bytes values: + - python2: non-ascii strings are considered bytes and formatted using + "binary escape", where any byte < 127 is escaped into its hex form. + - python3: bytes objects are always escaped using "binary escape". + """ + from _pytest.python import _idval + values = [ + (b'', ''), + (b'\xc3\xb4\xff\xe4', '\\xc3\\xb4\\xff\\xe4'), + (b'ascii', 'ascii'), + (u'αρά'.encode('utf-8'), '\\xce\\xb1\\xcf\\x81\\xce\\xac'), + ] + for val, expected in values: + assert _idval(val, 'a', 6, None) == expected + @pytest.mark.issue250 def test_idmaker_autoname(self): from _pytest.python import idmaker From 6ae16eba36e8923a8bf3c7d09a315fee70f187f3 Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Tue, 29 Sep 2015 22:37:02 -0300 Subject: [PATCH 3/4] add entries for #1085 and #1087 to the CHANGELOG --- CHANGELOG | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index b0141f25797..44053343bdd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,13 @@ 2.8.2.dev --------- +- fix #1085: proper handling of encoding errors when passing encoded byte + strings to pytest.parametrize in Python 2. + Thanks Themanwithoutaplan for the report and Bruno Oliveira for the PR. + +- fix #1087: handling SystemError when passing empty byte strings to + pytest.parametrize in Python 3. + Thanks Paul Kehrer for the report and Bruno Oliveira for the PR. 2.8.1 ----- From c9480c5b8bc2ef68df64771f0e6dc75832db764e Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Wed, 30 Sep 2015 17:02:19 -0300 Subject: [PATCH 4/4] Move imports outside _escape_bytes as suggested in review --- _pytest/python.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/_pytest/python.py b/_pytest/python.py index da41bf8e976..4dc3dd7ad65 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1041,6 +1041,8 @@ def addcall(self, funcargs=None, id=_notexists, param=_notexists): if _PY3: + import codecs + def _escape_bytes(val): """ If val is pure ascii, returns it as a str(), otherwise escapes @@ -1055,7 +1057,6 @@ def _escape_bytes(val): """ if val: # source: http://goo.gl/bGsnwC - import codecs encoded_bytes, _ = codecs.escape_encode(val) return encoded_bytes.decode('ascii') else: @@ -1064,7 +1065,7 @@ def _escape_bytes(val): else: def _escape_bytes(val): """ - In py2 bytes and str are the same, so return it unchanged if it + In py2 bytes and str are the same type, so return it unchanged if it is a full ascii string, otherwise escape it into its binary form. """ try: