Revert "Use a list for ambiguous files"

tomasr8 · pablogsal · commit e2c9bb7af911 · 2025-01-21T19:33:24.000Z
This reverts commit eb2e6f2.
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -1804,7 +1804,7 @@ def test_backslash_continuation(self):
         u.prev_row = 2
         u.add_whitespace((4, 4))
         self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
-        TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n', compare_tokens_only=True)
+        TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
 
     def test_iter_compat(self):
         u = tokenize.Untokenizer()
@@ -1838,16 +1838,16 @@ def contains_ambiguous_backslash(source):
 
 class TestRoundtrip(TestCase):
 
-    def check_roundtrip(self, f, *, compare_tokens_only=False):
+    def check_roundtrip(self, f):
         """
         Test roundtrip for `untokenize`. `f` is an open file or a string.
         The source code in f is tokenized to both 5- and 2-tuples.
         Both sequences are converted back to source code via
         tokenize.untokenize(), and the latter tokenized again to 2-tuples.
         The test fails if the 3 pair tokenizations do not match.
 
-        If `compare_tokens_only` is False, the exact output of `untokenize`
-        is compared against the original source code.
+        If the source code can be untokenized unambiguously, the
+        untokenized code must match the original code exactly.
 
         When untokenize bugs are fixed, untokenize with 5-tuples should
         reproduce code that does not contain a backslash continuation
@@ -1872,9 +1872,7 @@ def check_roundtrip(self, f, *, compare_tokens_only=False):
         tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
         self.assertEqual(tokens2_from5, tokens2)
 
-        if compare_tokens_only:
-            self.assertTrue(contains_ambiguous_backslash(code))
-        else:
+        if not contains_ambiguous_backslash(code):
             # The BOM does not produce a token so there is no way to preserve it.
             code_without_bom = code.removeprefix(b'\xef\xbb\xbf')
             readline = iter(code_without_bom.splitlines(keepends=True)).__next__
@@ -2021,8 +2019,6 @@ def test_random_files(self):
         import glob, random
         tempdir = os.path.dirname(__file__) or os.curdir
         testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
-        # Known files which cannot be untokenized exactly
-        known_ambiguous_files = [os.path.join(tempdir, "test_traceback.py")]
 
         if not support.is_resource_enabled("cpu"):
             testfiles = random.sample(testfiles, 10)
@@ -2032,8 +2028,7 @@ def test_random_files(self):
                 print('tokenize', testfile)
             with open(testfile, 'rb') as f:
                 with self.subTest(file=testfile):
-                    compare_tokens_only = testfile in known_ambiguous_files
-                    self.check_roundtrip(f, compare_tokens_only=compare_tokens_only)
+                    self.check_roundtrip(f)
                     self.check_line_extraction(f)