Skip to content

Commit 2d9ca06

Browse files
author
Crucifixion-Fxl
committed
[Bugfix] Migrate to REGEX Library to prevent catastrophic backtracking
Signed-off-by: Crucifixion-Fxl <[email protected]>
1 parent 47fda6d commit 2d9ca06

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+65
-61
lines changed

.github/scripts/cleanup_pr_body.sh

100755100644
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ sed -i '/\*\*BEFORE SUBMITTING, PLEASE READ.*\*\*/,$d' "${NEW}"
2626

2727
# Remove HTML <details> section that includes <summary> text of "PR Checklist (Click to Expand)"
2828
python3 - <<EOF
29-
import re
29+
import regex as re
3030
3131
with open("${NEW}", "r") as file:
3232
content = file.read()

benchmarks/benchmark_serving_structured_output.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def process_one_metric(
672672
def evaluate(ret, args):
673673
def _eval_correctness_json(expected, actual):
674674
# extract json string from string using regex
675-
import re
675+
import regex as re
676676

677677
actual = actual.replace("\n", "").replace(" ", "").strip()
678678
try:
@@ -687,9 +687,9 @@ def _eval_correctness_choice(expected, actual):
687687
return actual in args.choice
688688

689689
def _eval_correctness_regex(expected, actual):
690-
import re
690+
import regex as re
691691

692-
return re.match(args.regex, actual) is not None
692+
return re.match(args.re, actual) is not None
693693

694694
def _eval_correctness(expected, actual):
695695
if args.structure_type == "guided_json":

benchmarks/kernels/graph_machete_bench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
import math
44
import pickle
5-
import re
65
from collections import defaultdict
76

87
import matplotlib.pyplot as plt
98
import pandas as pd
9+
import regex as re
1010
import seaborn as sns
1111
from torch.utils.benchmark import Measurement as TMeasurement
1212

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
import datetime
1616
import logging
1717
import os
18-
import re
1918
import sys
2019
from pathlib import Path
2120

21+
import regex as re
2222
import requests
2323

2424
logger = logging.getLogger(__name__)

docs/source/generate_examples.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import itertools
4-
import re
54
from dataclasses import dataclass, field
65
from pathlib import Path
76

7+
import regex as re
8+
89
ROOT_DIR = Path(__file__).parent.parent.parent.resolve()
910
ROOT_DIR_RELATIVE = '../../../..'
1011
EXAMPLE_DIR = ROOT_DIR / "examples"

examples/offline_inference/prithvi_geospatial_mae.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
import argparse
2121
import datetime
2222
import os
23-
import re
2423
from typing import Union
2524

2625
import albumentations
2726
import numpy as np
2827
import rasterio
28+
import regex as re
2929
import torch
3030
from einops import rearrange
3131
from terratorch.datamodules import Sen1Floods11NonGeoDataModule

requirements/common.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
regex # Replace re for higher-performance regex matching
12
cachetools
23
psutil
34
sentencepiece # Required for LLaMA tokenizer.

requirements/docs.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ myst-parser==3.0.1 # `myst-parser==4.0.1` breaks inline code in titles
88
msgspec
99
snowballstemmer<3 # https://github.com/snowballstem/snowball/issues/229
1010
commonmark # Required by sphinx-argparse when using :markdownhelp:
11+
regex # Replace re for higher-performance regex matching
1112

1213
# Custom autodoc2 is necessary for faster docstring processing
1314
# see: https://github.com/sphinx-extensions2/sphinx-autodoc2/issues/33#issuecomment-2856386035

requirements/nightly_torch_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ matplotlib # required for qwen-vl test
3838
# required for Multi-Modal Models Test (Standard)
3939
num2words # required for smolvlm test
4040
pqdm
41-
timm # required for internvl test
41+
timm # required for internvl test

setup.py

100755100644
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import json
66
import logging
77
import os
8-
import re
98
import subprocess
109
import sys
1110
from pathlib import Path
1211
from shutil import which
1312

13+
import regex as re
1414
import torch
1515
from packaging.version import Version, parse
1616
from setuptools import Extension, setup
@@ -389,7 +389,6 @@ def run(self) -> None:
389389
# vllm_flash_attn python code:
390390
# Regex from
391391
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
392-
import re
393392
compiled_regex = re.compile(
394393
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
395394
file_members += list(

0 commit comments

Comments
 (0)