@@ -56,112 +56,11 @@ def exact_type(self):
56
56
else :
57
57
return self .type
58
58
59
- def group (* choices ): return '(' + '|' .join (choices ) + ')'
60
- def any (* choices ): return group (* choices ) + '*'
61
- def maybe (* choices ): return group (* choices ) + '?'
62
-
63
- # Note: we use unicode matching for names ("\w") but ascii matching for
64
- # number literals.
65
- Whitespace = r'[ \f\t]*'
66
- Comment = r'#[^\r\n]*'
67
- Ignore = Whitespace + any (r'\\\r?\n' + Whitespace ) + maybe (Comment )
68
- Name = r'\w+'
69
-
70
- Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
71
- Binnumber = r'0[bB](?:_?[01])+'
72
- Octnumber = r'0[oO](?:_?[0-7])+'
73
- Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
74
- Intnumber = group (Hexnumber , Binnumber , Octnumber , Decnumber )
75
- Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
76
- Pointfloat = group (r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?' ,
77
- r'\.[0-9](?:_?[0-9])*' ) + maybe (Exponent )
78
- Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
79
- Floatnumber = group (Pointfloat , Expfloat )
80
- Imagnumber = group (r'[0-9](?:_?[0-9])*[jJ]' , Floatnumber + r'[jJ]' )
81
- Number = group (Imagnumber , Floatnumber , Intnumber )
82
-
83
- # Return the empty string, plus all of the valid string prefixes.
84
- def _all_string_prefixes ():
85
- # The valid string prefixes. Only contain the lower case versions,
86
- # and don't contain any permutations (include 'fr', but not
87
- # 'rf'). The various permutations will be generated.
88
- _valid_string_prefixes = ['b' , 'r' , 'u' , 'f' , 'br' , 'fr' ]
89
- # if we add binary f-strings, add: ['fb', 'fbr']
90
- result = {'' }
91
- for prefix in _valid_string_prefixes :
92
- for t in _itertools .permutations (prefix ):
93
- # create a list with upper and lower versions of each
94
- # character
95
- for u in _itertools .product (* [(c , c .upper ()) for c in t ]):
96
- result .add ('' .join (u ))
97
- return result
98
-
99
- @functools .lru_cache
100
- def _compile (expr ):
101
- return re .compile (expr , re .UNICODE )
102
-
103
- # Note that since _all_string_prefixes includes the empty string,
104
- # StringPrefix can be the empty string (making it optional).
105
- StringPrefix = group (* _all_string_prefixes ())
106
-
107
- # Tail end of ' string.
108
- Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
109
- # Tail end of " string.
110
- Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
111
- # Tail end of ''' string.
112
- Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
113
- # Tail end of """ string.
114
- Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
115
- Triple = group (StringPrefix + "'''" , StringPrefix + '"""' )
116
- # Single-line ' or " string.
117
- String = group (StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'" ,
118
- StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"' )
119
-
120
- # Sorting in reverse order puts the long operators before their prefixes.
121
- # Otherwise if = came before ==, == would get recognized as two instances
122
- # of =.
123
- Special = group (* map (re .escape , sorted (EXACT_TOKEN_TYPES , reverse = True )))
124
- Funny = group (r'\r?\n' , Special )
125
-
126
- PlainToken = group (Number , Funny , String , Name )
127
- Token = Ignore + PlainToken
128
-
129
- # First (or only) line of ' or " string.
130
- ContStr = group (StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131
- group ("'" , r'\\\r?\n' ),
132
- StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133
- group ('"' , r'\\\r?\n' ))
134
- PseudoExtras = group (r'\\\r?\n|\Z' , Comment , Triple )
135
- PseudoToken = Whitespace + group (PseudoExtras , Number , Funny , ContStr , Name )
136
-
137
- # For a given string prefix plus quotes, endpats maps it to a regex
138
- # to match the remainder of that string. _prefix can be empty, for
139
- # a normal single or triple quoted string (with no prefix).
140
- endpats = {}
141
- for _prefix in _all_string_prefixes ():
142
- endpats [_prefix + "'" ] = Single
143
- endpats [_prefix + '"' ] = Double
144
- endpats [_prefix + "'''" ] = Single3
145
- endpats [_prefix + '"""' ] = Double3
146
- del _prefix
147
-
148
- # A set of all of the single and triple quoted string prefixes,
149
- # including the opening quotes.
150
- single_quoted = set ()
151
- triple_quoted = set ()
152
- for t in _all_string_prefixes ():
153
- for u in (t + '"' , t + "'" ):
154
- single_quoted .add (u )
155
- for u in (t + '"""' , t + "'''" ):
156
- triple_quoted .add (u )
157
- del t , u
158
-
159
- tabsize = 8
160
59
161
60
class TokenError (Exception ): pass
162
61
163
- class StopTokenizing (Exception ): pass
164
62
63
+ class StopTokenizing (Exception ): pass
165
64
166
65
class Untokenizer :
167
66
0 commit comments