Skip to content

Commit eb2e6f2

Browse files
committed
Use a list for ambiguous files
1 parent d606977 commit eb2e6f2

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

Lib/test/test_tokenize.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,7 +1804,7 @@ def test_backslash_continuation(self):
18041804
u.prev_row = 2
18051805
u.add_whitespace((4, 4))
18061806
self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
1807-
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
1807+
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n', compare_tokens_only=True)
18081808

18091809
def test_iter_compat(self):
18101810
u = tokenize.Untokenizer()
@@ -1838,16 +1838,16 @@ def contains_ambiguous_backslash(source):
18381838

18391839
class TestRoundtrip(TestCase):
18401840

1841-
def check_roundtrip(self, f):
1841+
def check_roundtrip(self, f, *, compare_tokens_only=False):
18421842
"""
18431843
Test roundtrip for `untokenize`. `f` is an open file or a string.
18441844
The source code in f is tokenized to both 5- and 2-tuples.
18451845
Both sequences are converted back to source code via
18461846
tokenize.untokenize(), and the latter tokenized again to 2-tuples.
18471847
The test fails if the 3 pair tokenizations do not match.
18481848
1849-
If the source code can be untokenized unambiguously, the
1850-
untokenized code must match the original code exactly.
1849+
If `compare_tokens_only` is False, the exact output of `untokenize`
1850+
is compared against the original source code.
18511851
18521852
When untokenize bugs are fixed, untokenize with 5-tuples should
18531853
reproduce code that does not contain a backslash continuation
@@ -1872,7 +1872,9 @@ def check_roundtrip(self, f):
18721872
tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
18731873
self.assertEqual(tokens2_from5, tokens2)
18741874

1875-
if not contains_ambiguous_backslash(code):
1875+
if compare_tokens_only:
1876+
self.assertTrue(contains_ambiguous_backslash(code))
1877+
else:
18761878
# The BOM does not produce a token so there is no way to preserve it.
18771879
code_without_bom = code.removeprefix(b'\xef\xbb\xbf')
18781880
readline = iter(code_without_bom.splitlines(keepends=True)).__next__
@@ -2019,6 +2021,8 @@ def test_random_files(self):
20192021
import glob, random
20202022
tempdir = os.path.dirname(__file__) or os.curdir
20212023
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
2024+
# Known files which cannot be untokenized exactly
2025+
known_ambiguous_files = [os.path.join(tempdir, "test_traceback.py")]
20222026

20232027
if not support.is_resource_enabled("cpu"):
20242028
testfiles = random.sample(testfiles, 10)
@@ -2028,7 +2032,8 @@ def test_random_files(self):
20282032
print('tokenize', testfile)
20292033
with open(testfile, 'rb') as f:
20302034
with self.subTest(file=testfile):
2031-
self.check_roundtrip(f)
2035+
compare_tokens_only = testfile in known_ambiguous_files
2036+
self.check_roundtrip(f, compare_tokens_only=compare_tokens_only)
20322037
self.check_line_extraction(f)
20332038

20342039

0 commit comments

Comments
 (0)