Skip to content

Commit 3bf6dd8

Browse files
authored
fix: Fixed CodeGenTokenizationTest::test_truncation failing test (#32850)
* Fixed failing CodeGenTokenizationTest::test_truncation. * [run_slow] Codegen * [run_slow] codegen
1 parent 9578c25 commit 3bf6dd8

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

tests/models/codegen/test_tokenization_codegen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,12 @@ def test_truncation(self):
254254
tokenizer = CodeGenTokenizer.from_pretrained("Salesforce/codegen-350M-mono")
255255

256256
text = "\nif len_a > len_b:\n result = a\nelse:\n result = b\n\n\n\n#"
257-
expected_trucated_text = "\nif len_a > len_b: result = a\nelse: result = b"
257+
expected_truncated_text = "\nif len_a > len_b:\n result = a\nelse:\n result = b"
258258

259259
input_ids = tokenizer.encode(text)
260260
truncation_pattern = ["^#", re.escape("<|endoftext|>"), "^'''", '^"""', "\n\n\n"]
261261
decoded_text = tokenizer.decode(input_ids, truncate_before_pattern=truncation_pattern)
262-
self.assertEqual(decoded_text, expected_trucated_text)
262+
self.assertEqual(decoded_text, expected_truncated_text)
263263
# TODO @ArthurZ outputs of the fast tokenizer are different in this case, un-related to the PR
264264

265265
# tokenizer has no padding token

0 commit comments

Comments
 (0)