@@ -1804,7 +1804,7 @@ def test_backslash_continuation(self):
1804
1804
u .prev_row = 2
1805
1805
u .add_whitespace ((4 , 4 ))
1806
1806
self .assertEqual (u .tokens , ['\\ \n ' , '\\ \n \\ \n ' , ' ' ])
1807
- TestRoundtrip .check_roundtrip (self , 'a\n b\n c\n \\ \n c\n ' )
1807
+ TestRoundtrip .check_roundtrip (self , 'a\n b\n c\n \\ \n c\n ' , compare_tokens_only = True )
1808
1808
1809
1809
def test_iter_compat (self ):
1810
1810
u = tokenize .Untokenizer ()
@@ -1838,16 +1838,16 @@ def contains_ambiguous_backslash(source):
1838
1838
1839
1839
class TestRoundtrip (TestCase ):
1840
1840
1841
- def check_roundtrip (self , f ):
1841
+ def check_roundtrip (self , f , * , compare_tokens_only = False ):
1842
1842
"""
1843
1843
Test roundtrip for `untokenize`. `f` is an open file or a string.
1844
1844
The source code in f is tokenized to both 5- and 2-tuples.
1845
1845
Both sequences are converted back to source code via
1846
1846
tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1847
1847
The test fails if the 3 pair tokenizations do not match.
1848
1848
1849
- If the source code can be untokenized unambiguously, the
1850
- untokenized code must match the original code exactly .
1849
+ If `compare_tokens_only` is False, the exact output of `untokenize`
1850
+ is compared against the original source code .
1851
1851
1852
1852
When untokenize bugs are fixed, untokenize with 5-tuples should
1853
1853
reproduce code that does not contain a backslash continuation
@@ -1872,7 +1872,9 @@ def check_roundtrip(self, f):
1872
1872
tokens2_from5 = [tok [:2 ] for tok in tokenize .tokenize (readline5 )]
1873
1873
self .assertEqual (tokens2_from5 , tokens2 )
1874
1874
1875
- if not contains_ambiguous_backslash (code ):
1875
+ if compare_tokens_only :
1876
+ self .assertTrue (contains_ambiguous_backslash (code ))
1877
+ else :
1876
1878
# The BOM does not produce a token so there is no way to preserve it.
1877
1879
code_without_bom = code .removeprefix (b'\xef \xbb \xbf ' )
1878
1880
readline = iter (code_without_bom .splitlines (keepends = True )).__next__
@@ -2019,6 +2021,8 @@ def test_random_files(self):
2019
2021
import glob , random
2020
2022
tempdir = os .path .dirname (__file__ ) or os .curdir
2021
2023
testfiles = glob .glob (os .path .join (glob .escape (tempdir ), "test*.py" ))
2024
+ # Known files which cannot be untokenized exactly
2025
+ known_ambiguous_files = [os .path .join (tempdir , "test_traceback.py" )]
2022
2026
2023
2027
if not support .is_resource_enabled ("cpu" ):
2024
2028
testfiles = random .sample (testfiles , 10 )
@@ -2028,7 +2032,8 @@ def test_random_files(self):
2028
2032
print ('tokenize' , testfile )
2029
2033
with open (testfile , 'rb' ) as f :
2030
2034
with self .subTest (file = testfile ):
2031
- self .check_roundtrip (f )
2035
+ compare_tokens_only = testfile in known_ambiguous_files
2036
+ self .check_roundtrip (f , compare_tokens_only = compare_tokens_only )
2032
2037
self .check_line_extraction (f )
2033
2038
2034
2039
0 commit comments