Skip to content

Commit 49b2fab

Browse files
committed
Stop using Python's eval() for -m and -k
Previously, the expressions given to the `-m` and `-k` options were evaluated with `eval`. This causes a few issues: - Python keywords cannot be used. - Constants like numbers, None, True, False are not handled correctly. - Various syntax like numeric operators and `X if Y else Z` is supported unintentionally. - `eval()` is somewhat dangerous for arbitrary input. - Can fail in many ways so requires `except Exception`. The format we want to support is quite simple, so change to a custom parser. This fixes the issues above, and gives us full control of the format, so can be documented comprehensively and even be extended in the future if we wish. Fixes #1141. Fixes #3573. Fixes #5881. Fixes #6822. Fixes #7112.
1 parent cbca9f1 commit 49b2fab

File tree

4 files changed

+213
-75
lines changed

4 files changed

+213
-75
lines changed

changelog/TBD.breaking.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Expressions given to the ``-m`` and ``-k`` options are not longer evaluated using Python's ``eval()``.
2+
3+
The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against.
4+
Python constants, keywords or operators are no longer evaluated differently.

doc/en/example/markers.rst

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,14 @@ Or select multiple nodes:
141141
Using ``-k expr`` to select tests based on their name
142142
-------------------------------------------------------
143143

144-
.. versionadded: 2.0/2.3.4
144+
.. versionadded:: 2.0/2.3.4
145145

146146
You can use the ``-k`` command line option to specify an expression
147147
which implements a substring match on the test names instead of the
148148
exact match on markers that ``-m`` provides. This makes it easy to
149149
select tests based on their names:
150150

151-
.. versionadded: 5.4
151+
.. versionchanged:: 5.4
152152

153153
The expression matching is now case-insensitive.
154154

@@ -198,20 +198,8 @@ Or to select "http" and "quick" tests:
198198
199199
===================== 2 passed, 2 deselected in 0.12s ======================
200200
201-
.. note::
202-
203-
If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y``
204-
need to be simple non-keyword names. For example, ``"pass"`` or ``"from"``
205-
will result in SyntaxErrors because ``"-k"`` evaluates the expression using
206-
Python's `eval`_ function.
207-
208-
.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval
209-
201+
You can use ``and``, ``or``, ``not`` and parentheses.
210202

211-
However, if the ``"-k"`` argument is a simple string, no such restrictions
212-
apply. Also ``"-k 'not STRING'"`` has no restrictions. You can also
213-
specify numbers like ``"-k 1.3"`` to match tests which are parametrized
214-
with the float ``"1.3"``.
215203

216204
Registering markers
217205
-------------------------------------

src/_pytest/mark/legacy.py

Lines changed: 167 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22
this is a place where we put datastructures used by legacy apis
33
we hope to remove
44
"""
5-
import keyword
5+
import enum
6+
import re
7+
from typing import Callable
8+
from typing import Iterator
9+
from typing import Optional
10+
from typing import Sequence
611
from typing import Set
712

813
import attr
@@ -11,35 +16,174 @@
1116
from _pytest.config import UsageError
1217

1318
if TYPE_CHECKING:
19+
from typing import NoReturn
20+
1421
from _pytest.nodes import Item # noqa: F401 (used in type string)
1522

1623

24+
# The grammar for match expressions is:
25+
#
26+
# expr: and_expr ('or' and_expr)*
27+
# and_expr: not_expr ('and' not_expr)*
28+
# not_expr: 'not' not_expr | '(' expr ')' | ident
29+
# ident: (\w|\+|-|\.|\[|\])+
30+
31+
32+
class TokenType(enum.Enum):
33+
LPAREN = "left parenthesis"
34+
RPAREN = "right parenthesis"
35+
OR = "or"
36+
AND = "and"
37+
NOT = "not"
38+
IDENT = "identifier"
39+
EOF = "end of input"
40+
41+
42+
@attr.s(frozen=True, slots=True)
43+
class Token:
44+
type = attr.ib(type=TokenType)
45+
value = attr.ib(type=str)
46+
column = attr.ib(type=int)
47+
48+
49+
class ParseError(Exception):
50+
def __init__(self, column: int, message: str) -> None:
51+
self.column = column
52+
self.message = message
53+
54+
def __str__(self) -> str:
55+
return "at column {}: {}".format(self.column, self.message)
56+
57+
58+
class Scanner:
59+
__slots__ = ("tokens", "current")
60+
61+
def __init__(self, input: str) -> None:
62+
self.tokens = self.lex(input)
63+
self.current = next(self.tokens)
64+
65+
def lex(self, input: str) -> Iterator[Token]:
66+
pos = 0
67+
while pos < len(input):
68+
if input[pos] in (" ", "\t"):
69+
pos += 1
70+
elif input[pos] == "(":
71+
yield Token(TokenType.LPAREN, "(", pos + 1)
72+
pos += 1
73+
elif input[pos] == ")":
74+
yield Token(TokenType.RPAREN, ")", pos + 1)
75+
pos += 1
76+
else:
77+
match = re.match(r"(:?\w|\+|-|\.|\[|\])+", input[pos:])
78+
if match:
79+
value = match.group(0)
80+
if value == "and":
81+
yield Token(TokenType.AND, value, pos + 1)
82+
elif value == "or":
83+
yield Token(TokenType.OR, value, pos + 1)
84+
elif value == "not":
85+
yield Token(TokenType.NOT, value, pos + 1)
86+
else:
87+
yield Token(TokenType.IDENT, value, pos + 1)
88+
pos += len(value)
89+
else:
90+
raise ParseError(
91+
pos + 1, 'unexpected character "{}"'.format(input[pos]),
92+
)
93+
yield Token(TokenType.EOF, "", pos + 1)
94+
95+
def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
96+
if self.current.type is type:
97+
token = self.current
98+
if token.type is not TokenType.EOF:
99+
self.current = next(self.tokens)
100+
return token
101+
if reject:
102+
self.reject((type,))
103+
return None
104+
105+
def reject(self, expected: Sequence[TokenType]) -> "NoReturn":
106+
raise ParseError(
107+
self.current.column,
108+
"expected {}; got {}".format(
109+
" OR ".join(type.value for type in expected), self.current.type.value,
110+
),
111+
)
112+
113+
114+
def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
115+
ret = and_expr(s, matcher)
116+
while s.accept(TokenType.OR):
117+
rhs = and_expr(s, matcher)
118+
ret = ret or rhs
119+
return ret
120+
121+
122+
def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
123+
ret = not_expr(s, matcher)
124+
while s.accept(TokenType.AND):
125+
rhs = not_expr(s, matcher)
126+
ret = ret and rhs
127+
return ret
128+
129+
130+
def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
131+
if s.accept(TokenType.NOT):
132+
return not not_expr(s, matcher)
133+
if s.accept(TokenType.LPAREN):
134+
ret = expr(s, matcher)
135+
s.accept(TokenType.RPAREN, reject=True)
136+
return ret
137+
ident = s.accept(TokenType.IDENT)
138+
if ident:
139+
return matcher(ident.value)
140+
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))
141+
142+
143+
def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
144+
"""Evaluate a match expression as used by -k and -m.
145+
146+
:param input: The input expression - one line.
147+
:param matcher: Given an identifier, should return whether it matches or not.
148+
149+
Returns whether the entire expression matches or not.
150+
"""
151+
s = Scanner(input)
152+
ret = expr(s, matcher)
153+
s.accept(TokenType.EOF, reject=True)
154+
return ret
155+
156+
157+
# The actual matchers:
158+
159+
17160
@attr.s
18-
class MarkMapping:
19-
"""Provides a local mapping for markers where item access
20-
resolves to True if the marker is present. """
161+
class MarkMatcher:
162+
"""A matcher for markers which are present."""
21163

22164
own_mark_names = attr.ib()
23165

24166
@classmethod
25-
def from_item(cls, item):
167+
def from_item(cls, item) -> "MarkMatcher":
26168
mark_names = {mark.name for mark in item.iter_markers()}
27169
return cls(mark_names)
28170

29-
def __getitem__(self, name):
171+
def __call__(self, name: str) -> bool:
30172
return name in self.own_mark_names
31173

32174

33175
@attr.s
34-
class KeywordMapping:
35-
"""Provides a local mapping for keywords.
36-
Given a list of names, map any substring of one of these names to True.
176+
class KeywordMatcher:
177+
"""A matcher for keywords.
178+
179+
Given a list of names, matches any substring of one of these names. The
180+
string inclusion check is case-insensitive.
37181
"""
38182

39183
_names = attr.ib(type=Set[str])
40184

41185
@classmethod
42-
def from_item(cls, item: "Item") -> "KeywordMapping":
186+
def from_item(cls, item: "Item") -> "KeywordMatcher":
43187
mapped_names = set()
44188

45189
# Add the names of the current item and any parent items
@@ -62,12 +206,7 @@ def from_item(cls, item: "Item") -> "KeywordMapping":
62206

63207
return cls(mapped_names)
64208

65-
def __getitem__(self, subname: str) -> bool:
66-
"""Return whether subname is included within stored names.
67-
68-
The string inclusion check is case-insensitive.
69-
70-
"""
209+
def __call__(self, subname: str) -> bool:
71210
subname = subname.lower()
72211
names = (name.lower() for name in self._names)
73212

@@ -77,18 +216,17 @@ def __getitem__(self, subname: str) -> bool:
77216
return False
78217

79218

80-
python_keywords_allowed_list = ["or", "and", "not"]
81-
82-
83-
def matchmark(colitem, markexpr):
219+
def matchmark(colitem, markexpr: str) -> bool:
84220
"""Tries to match on any marker names, attached to the given colitem."""
85221
try:
86-
return eval(markexpr, {}, MarkMapping.from_item(colitem))
87-
except Exception:
88-
raise UsageError("Wrong expression passed to '-m': {}".format(markexpr))
222+
return evaluate(markexpr, MarkMatcher.from_item(colitem))
223+
except ParseError as e:
224+
raise UsageError(
225+
"Wrong expression passed to '-m': {}: {}".format(markexpr, e)
226+
) from None
89227

90228

91-
def matchkeyword(colitem, keywordexpr):
229+
def matchkeyword(colitem, keywordexpr: str) -> bool:
92230
"""Tries to match given keyword expression to given collector item.
93231
94232
Will match on the name of colitem, including the names of its parents.
@@ -97,20 +235,9 @@ def matchkeyword(colitem, keywordexpr):
97235
Additionally, matches on names in the 'extra_keyword_matches' set of
98236
any item, as well as names directly assigned to test functions.
99237
"""
100-
mapping = KeywordMapping.from_item(colitem)
101-
if " " not in keywordexpr:
102-
# special case to allow for simple "-k pass" and "-k 1.3"
103-
return mapping[keywordexpr]
104-
elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]:
105-
return not mapping[keywordexpr[4:]]
106-
for kwd in keywordexpr.split():
107-
if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list:
108-
raise UsageError(
109-
"Python keyword '{}' not accepted in expressions passed to '-k'".format(
110-
kwd
111-
)
112-
)
113238
try:
114-
return eval(keywordexpr, {}, mapping)
115-
except Exception:
116-
raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr))
239+
return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
240+
except ParseError as e:
241+
raise UsageError(
242+
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
243+
) from None

0 commit comments

Comments
 (0)