Skip to content

Commit cceadff

Browse files
committed
test: add phystokens.py to the mypy train
1 parent 9b4c05d commit cceadff

File tree

2 files changed

+46
-40
lines changed

2 files changed

+46
-40
lines changed

coverage/phystokens.py

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77
import io
88
import keyword
99
import re
10+
import sys
1011
import token
1112
import tokenize
1213

14+
from typing import Iterable, List, Optional, Set, Tuple
15+
1316
from coverage import env
14-
from coverage.misc import contract
1517

1618

17-
def phys_tokens(toks):
19+
TokenInfos = Iterable[tokenize.TokenInfo]
20+
21+
22+
def _phys_tokens(toks: TokenInfos) -> TokenInfos:
1823
"""Return all physical tokens, even line continuations.
1924
2025
tokenize.generate_tokens() doesn't return a token for the backslash that
@@ -24,9 +29,9 @@ def phys_tokens(toks):
2429
Returns the same values as generate_tokens()
2530
2631
"""
27-
last_line = None
32+
last_line: Optional[str] = None
2833
last_lineno = -1
29-
last_ttext = None
34+
last_ttext: str = ""
3035
for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
3136
if last_lineno != elineno:
3237
if last_line and last_line.endswith("\\\n"):
@@ -57,35 +62,35 @@ def phys_tokens(toks):
5762
# Figure out what column the backslash is in.
5863
ccol = len(last_line.split("\n")[-2]) - 1
5964
# Yield the token, with a fake token type.
60-
yield (
65+
yield tokenize.TokenInfo(
6166
99999, "\\\n",
6267
(slineno, ccol), (slineno, ccol+2),
6368
last_line
6469
)
6570
last_line = ltext
6671
if ttype not in (tokenize.NEWLINE, tokenize.NL):
6772
last_ttext = ttext
68-
yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
73+
yield tokenize.TokenInfo(ttype, ttext, (slineno, scol), (elineno, ecol), ltext)
6974
last_lineno = elineno
7075

7176

7277
class MatchCaseFinder(ast.NodeVisitor):
7378
"""Helper for finding match/case lines."""
74-
def __init__(self, source):
79+
def __init__(self, source: str) -> None:
7580
# This will be the set of line numbers that start match or case statements.
76-
self.match_case_lines = set()
81+
self.match_case_lines: Set[int] = set()
7782
self.visit(ast.parse(source))
7883

79-
def visit_Match(self, node):
80-
"""Invoked by ast.NodeVisitor.visit"""
81-
self.match_case_lines.add(node.lineno)
82-
for case in node.cases:
83-
self.match_case_lines.add(case.pattern.lineno)
84-
self.generic_visit(node)
84+
if sys.version_info >= (3, 10):
85+
def visit_Match(self, node: ast.Match) -> None:
86+
"""Invoked by ast.NodeVisitor.visit"""
87+
self.match_case_lines.add(node.lineno)
88+
for case in node.cases:
89+
self.match_case_lines.add(case.pattern.lineno)
90+
self.generic_visit(node)
8591

8692

87-
@contract(source='unicode')
88-
def source_token_lines(source):
93+
def source_token_lines(source: str) -> Iterable[List[Tuple[str, str]]]:
8994
"""Generate a series of lines, one for each line in `source`.
9095
9196
Each line is a list of pairs, each pair is a token::
@@ -102,7 +107,7 @@ def source_token_lines(source):
102107
"""
103108

104109
ws_tokens = {token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL}
105-
line = []
110+
line: List[Tuple[str, str]] = []
106111
col = 0
107112

108113
source = source.expandtabs(8).replace('\r\n', '\n')
@@ -111,7 +116,7 @@ def source_token_lines(source):
111116
if env.PYBEHAVIOR.soft_keywords:
112117
match_case_lines = MatchCaseFinder(source).match_case_lines
113118

114-
for ttype, ttext, (sline, scol), (_, ecol), _ in phys_tokens(tokgen):
119+
for ttype, ttext, (sline, scol), (_, ecol), _ in _phys_tokens(tokgen):
115120
mark_start = True
116121
for part in re.split('(\n)', ttext):
117122
if part == '\n':
@@ -132,17 +137,20 @@ def source_token_lines(source):
132137
if keyword.iskeyword(ttext):
133138
# Hard keywords are always keywords.
134139
tok_class = "key"
135-
elif env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
136-
# Soft keywords appear at the start of the line, on lines that start
137-
# match or case statements.
138-
if len(line) == 0:
139-
is_start_of_line = True
140-
elif (len(line) == 1) and line[0][0] == "ws":
141-
is_start_of_line = True
142-
else:
143-
is_start_of_line = False
144-
if is_start_of_line and sline in match_case_lines:
145-
tok_class = "key"
140+
elif sys.version_info >= (3, 10): # PYVERSIONS
141+
# Need the version_info check to keep mypy from borking
142+
# on issoftkeyword here.
143+
if env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
144+
# Soft keywords appear at the start of the line,
145+
# on lines that start match or case statements.
146+
if len(line) == 0:
147+
is_start_of_line = True
148+
elif (len(line) == 1) and line[0][0] == "ws":
149+
is_start_of_line = True
150+
else:
151+
is_start_of_line = False
152+
if is_start_of_line and sline in match_case_lines:
153+
tok_class = "key"
146154
line.append((tok_class, part))
147155
mark_end = True
148156
scol = 0
@@ -164,12 +172,11 @@ class CachedTokenizer:
164172
actually tokenize twice.
165173
166174
"""
167-
def __init__(self):
168-
self.last_text = None
169-
self.last_tokens = None
175+
def __init__(self) -> None:
176+
self.last_text: Optional[str] = None
177+
self.last_tokens: List[tokenize.TokenInfo] = []
170178

171-
@contract(text='unicode')
172-
def generate_tokens(self, text):
179+
def generate_tokens(self, text: str) -> TokenInfos:
173180
"""A stand-in for `tokenize.generate_tokens`."""
174181
if text != self.last_text:
175182
self.last_text = text
@@ -185,8 +192,7 @@ def generate_tokens(self, text):
185192
generate_tokens = CachedTokenizer().generate_tokens
186193

187194

188-
@contract(source='bytes')
189-
def source_encoding(source):
195+
def source_encoding(source: bytes) -> str:
190196
"""Determine the encoding for `source`, according to PEP 263.
191197
192198
`source` is a byte string: the text of the program.

tox.ini

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ deps =
7676
setenv =
7777
{[testenv]setenv}
7878
LINTABLE=coverage tests doc ci igor.py setup.py __main__.py
79-
TYPEABLE=coverage/files.py coverage/numbits.py
79+
TYPEABLE=coverage/files.py coverage/numbits.py coverage/phystokens.py
8080

8181
commands =
8282
python -m tabnanny {env:LINTABLE}
@@ -85,13 +85,13 @@ commands =
8585
python -m cogapp -cP --check --verbosity=1 doc/*.rst
8686
python -m cogapp -cP --check --verbosity=1 .github/workflows/*.yml
8787
#doc8 -q --ignore-path 'doc/_*' doc CHANGES.rst README.rst
88+
mypy {env:TYPEABLE}
89+
python -m pylint --notes= {env:LINTABLE}
90+
check-manifest --ignore 'doc/sample_html/*,.treerc'
8891
# If 'build -q' becomes a thing (https://github.com/pypa/build/issues/188),
8992
# this can be simplifed:
9093
python igor.py quietly "python -m build"
9194
twine check dist/*
92-
mypy {env:TYPEABLE}
93-
python -m pylint --notes= {env:LINTABLE}
94-
check-manifest --ignore 'doc/sample_html/*,.treerc'
9595

9696
[gh-actions]
9797
# PYVERSIONS

0 commit comments

Comments
 (0)