7
7
import io
8
8
import keyword
9
9
import re
10
+ import sys
10
11
import token
11
12
import tokenize
12
13
14
+ from typing import Iterable , List , Optional , Set , Tuple
15
+
13
16
from coverage import env
14
- from coverage .misc import contract
15
17
16
18
17
- def phys_tokens (toks ):
19
+ TokenInfos = Iterable [tokenize .TokenInfo ]
20
+
21
+
22
+ def _phys_tokens (toks : TokenInfos ) -> TokenInfos :
18
23
"""Return all physical tokens, even line continuations.
19
24
20
25
tokenize.generate_tokens() doesn't return a token for the backslash that
@@ -24,9 +29,9 @@ def phys_tokens(toks):
24
29
Returns the same values as generate_tokens()
25
30
26
31
"""
27
- last_line = None
32
+ last_line : Optional [ str ] = None
28
33
last_lineno = - 1
29
- last_ttext = None
34
+ last_ttext : str = ""
30
35
for ttype , ttext , (slineno , scol ), (elineno , ecol ), ltext in toks :
31
36
if last_lineno != elineno :
32
37
if last_line and last_line .endswith ("\\ \n " ):
@@ -57,35 +62,35 @@ def phys_tokens(toks):
57
62
# Figure out what column the backslash is in.
58
63
ccol = len (last_line .split ("\n " )[- 2 ]) - 1
59
64
# Yield the token, with a fake token type.
60
- yield (
65
+ yield tokenize . TokenInfo (
61
66
99999 , "\\ \n " ,
62
67
(slineno , ccol ), (slineno , ccol + 2 ),
63
68
last_line
64
69
)
65
70
last_line = ltext
66
71
if ttype not in (tokenize .NEWLINE , tokenize .NL ):
67
72
last_ttext = ttext
68
- yield ttype , ttext , (slineno , scol ), (elineno , ecol ), ltext
73
+ yield tokenize . TokenInfo ( ttype , ttext , (slineno , scol ), (elineno , ecol ), ltext )
69
74
last_lineno = elineno
70
75
71
76
72
77
class MatchCaseFinder (ast .NodeVisitor ):
73
78
"""Helper for finding match/case lines."""
74
- def __init__ (self , source ) :
79
+ def __init__ (self , source : str ) -> None :
75
80
# This will be the set of line numbers that start match or case statements.
76
- self .match_case_lines = set ()
81
+ self .match_case_lines : Set [ int ] = set ()
77
82
self .visit (ast .parse (source ))
78
83
79
- def visit_Match (self , node ):
80
- """Invoked by ast.NodeVisitor.visit"""
81
- self .match_case_lines .add (node .lineno )
82
- for case in node .cases :
83
- self .match_case_lines .add (case .pattern .lineno )
84
- self .generic_visit (node )
84
+ if sys .version_info >= (3 , 10 ):
85
+ def visit_Match (self , node : ast .Match ) -> None :
86
+ """Invoked by ast.NodeVisitor.visit"""
87
+ self .match_case_lines .add (node .lineno )
88
+ for case in node .cases :
89
+ self .match_case_lines .add (case .pattern .lineno )
90
+ self .generic_visit (node )
85
91
86
92
87
- @contract (source = 'unicode' )
88
- def source_token_lines (source ):
93
+ def source_token_lines (source : str ) -> Iterable [List [Tuple [str , str ]]]:
89
94
"""Generate a series of lines, one for each line in `source`.
90
95
91
96
Each line is a list of pairs, each pair is a token::
@@ -102,7 +107,7 @@ def source_token_lines(source):
102
107
"""
103
108
104
109
ws_tokens = {token .INDENT , token .DEDENT , token .NEWLINE , tokenize .NL }
105
- line = []
110
+ line : List [ Tuple [ str , str ]] = []
106
111
col = 0
107
112
108
113
source = source .expandtabs (8 ).replace ('\r \n ' , '\n ' )
@@ -111,7 +116,7 @@ def source_token_lines(source):
111
116
if env .PYBEHAVIOR .soft_keywords :
112
117
match_case_lines = MatchCaseFinder (source ).match_case_lines
113
118
114
- for ttype , ttext , (sline , scol ), (_ , ecol ), _ in phys_tokens (tokgen ):
119
+ for ttype , ttext , (sline , scol ), (_ , ecol ), _ in _phys_tokens (tokgen ):
115
120
mark_start = True
116
121
for part in re .split ('(\n )' , ttext ):
117
122
if part == '\n ' :
@@ -132,17 +137,20 @@ def source_token_lines(source):
132
137
if keyword .iskeyword (ttext ):
133
138
# Hard keywords are always keywords.
134
139
tok_class = "key"
135
- elif env .PYBEHAVIOR .soft_keywords and keyword .issoftkeyword (ttext ):
136
- # Soft keywords appear at the start of the line, on lines that start
137
- # match or case statements.
138
- if len (line ) == 0 :
139
- is_start_of_line = True
140
- elif (len (line ) == 1 ) and line [0 ][0 ] == "ws" :
141
- is_start_of_line = True
142
- else :
143
- is_start_of_line = False
144
- if is_start_of_line and sline in match_case_lines :
145
- tok_class = "key"
140
+ elif sys .version_info >= (3 , 10 ): # PYVERSIONS
141
+ # Need the version_info check to keep mypy from borking
142
+ # on issoftkeyword here.
143
+ if env .PYBEHAVIOR .soft_keywords and keyword .issoftkeyword (ttext ):
144
+ # Soft keywords appear at the start of the line,
145
+ # on lines that start match or case statements.
146
+ if len (line ) == 0 :
147
+ is_start_of_line = True
148
+ elif (len (line ) == 1 ) and line [0 ][0 ] == "ws" :
149
+ is_start_of_line = True
150
+ else :
151
+ is_start_of_line = False
152
+ if is_start_of_line and sline in match_case_lines :
153
+ tok_class = "key"
146
154
line .append ((tok_class , part ))
147
155
mark_end = True
148
156
scol = 0
@@ -164,12 +172,11 @@ class CachedTokenizer:
164
172
actually tokenize twice.
165
173
166
174
"""
167
- def __init__ (self ):
168
- self .last_text = None
169
- self .last_tokens = None
175
+ def __init__ (self ) -> None :
176
+ self .last_text : Optional [ str ] = None
177
+ self .last_tokens : List [ tokenize . TokenInfo ] = []
170
178
171
- @contract (text = 'unicode' )
172
- def generate_tokens (self , text ):
179
+ def generate_tokens (self , text : str ) -> TokenInfos :
173
180
"""A stand-in for `tokenize.generate_tokens`."""
174
181
if text != self .last_text :
175
182
self .last_text = text
@@ -185,8 +192,7 @@ def generate_tokens(self, text):
185
192
generate_tokens = CachedTokenizer ().generate_tokens
186
193
187
194
188
- @contract (source = 'bytes' )
189
- def source_encoding (source ):
195
+ def source_encoding (source : bytes ) -> str :
190
196
"""Determine the encoding for `source`, according to PEP 263.
191
197
192
198
`source` is a byte string: the text of the program.
0 commit comments