43
43
44
44
45
45
class HttpParser :
46
- """HTTP request/response parser."""
46
+ """HTTP request/response parser.
47
+
48
+ TODO: Make me zero-copy by using memoryview.
49
+ Currently due to chunk/buffer handling we
50
+ are not able to utilize memoryview
51
+ efficiently.
52
+
53
+ For this to happen we must store `buffer`
54
+ as List[memoryview] instead of raw bytes and
55
+ update parser to work accordingly.
56
+ """
47
57
48
58
def __init__ (self , parser_type : int ) -> None :
49
59
self .type : int = parser_type
50
60
self .state : int = httpParserStates .INITIALIZED
51
61
62
+ self .host : Optional [bytes ] = None
63
+ self .port : Optional [int ] = None
64
+ self .path : Optional [bytes ] = None
65
+ self .method : Optional [bytes ] = None
66
+ self .code : Optional [bytes ] = None
67
+ self .reason : Optional [bytes ] = None
68
+ self .version : Optional [bytes ] = None
69
+
52
70
# Total size of raw bytes passed for parsing
53
71
self .total_size : int = 0
54
72
55
73
# Buffer to hold unprocessed bytes
56
74
self .buffer : bytes = b''
57
75
76
+ # Keys are lower case header names
77
+ # Values are 2-tuple containing original
78
+ # header and it's value as received.
58
79
self .headers : Dict [bytes , Tuple [bytes , bytes ]] = {}
59
80
self .body : Optional [bytes ] = None
60
81
61
- self .method : Optional [bytes ] = None
62
- self .url : Optional [urlparse .SplitResultBytes ] = None
63
- self .code : Optional [bytes ] = None
64
- self .reason : Optional [bytes ] = None
65
- self .version : Optional [bytes ] = None
66
-
67
82
self .chunk_parser : Optional [ChunkParser ] = None
68
83
69
- # This cleans up developer APIs as Python urlparse.urlsplit behaves differently
70
- # for incoming proxy request and incoming web request. Web request is the one
71
- # which is broken.
72
- self .host : Optional [bytes ] = None
73
- self .port : Optional [int ] = None
74
- self .path : Optional [bytes ] = None
84
+ # TODO: Deprecate me, we don't need this in core.
85
+ #
86
+ # Deprecated since v2.4.0
87
+ #
88
+ # This is mostly for developers so that they can directly
89
+ # utilize a url object, but is unnecessary as parser
90
+ # provides all the necessary parsed information.
91
+ #
92
+ # But developers can utilize urlsplit or whatever
93
+ # library they are using when necessary. This will certainly
94
+ # give some performance boost as url parsing won't be needed
95
+ # for every request/response object.
96
+ #
97
+ # (except query string and fragments)
98
+ self ._url : Optional [urlparse .SplitResultBytes ] = None
75
99
76
100
@classmethod
77
101
def request (cls : Type [T ], raw : bytes ) -> T :
@@ -116,157 +140,51 @@ def set_url(self, url: bytes) -> None:
116
140
# with urlsplit, which expects a fully qualified url.
117
141
if self .method == httpMethods .CONNECT :
118
142
url = b'https://' + url
119
- self .url = urlparse .urlsplit (url )
120
- self .set_line_attributes ()
121
-
122
- def set_line_attributes (self ) -> None :
123
- if self .type == httpParserTypes .REQUEST_PARSER :
124
- if self .method == httpMethods .CONNECT and self .url :
125
- self .host = self .url .hostname
126
- self .port = 443 if self .url .port is None else self .url .port
127
- elif self .url :
128
- self .host , self .port = self .url .hostname , self .url .port \
129
- if self .url .port else DEFAULT_HTTP_PORT
130
- else :
131
- raise KeyError (
132
- 'Invalid request. Method: %r, Url: %r' %
133
- (self .method , self .url ),
134
- )
135
- self .path = self .build_path ()
143
+ self ._url = urlparse .urlsplit (url )
144
+ self ._set_line_attributes ()
136
145
137
146
def is_chunked_encoded (self ) -> bool :
138
147
return b'transfer-encoding' in self .headers and \
139
148
self .headers [b'transfer-encoding' ][1 ].lower () == b'chunked'
140
149
150
+ def content_expected (self ) -> bool :
151
+ return b'content-length' in self .headers and int (self .header (b'content-length' )) > 0
152
+
141
153
def body_expected (self ) -> bool :
142
- return (
143
- b'content-length' in self .headers and
144
- int (self .header (b'content-length' )) > 0
145
- ) or \
146
- self .is_chunked_encoded ()
154
+ return self .content_expected () or self .is_chunked_encoded ()
147
155
148
156
def parse (self , raw : bytes ) -> None :
149
157
"""Parses Http request out of raw bytes.
150
158
151
- Check HttpParser state after parse has successfully returned."""
159
+ Check for `HttpParser.state` after `parse` has successfully returned.
160
+ """
152
161
self .total_size += len (raw )
153
162
raw = self .buffer + raw
154
- self .buffer = b''
155
-
156
- more = len (raw ) > 0
163
+ self .buffer , more = b'' , len (raw ) > 0
157
164
while more and self .state != httpParserStates .COMPLETE :
158
- if self .state in (
159
- httpParserStates .HEADERS_COMPLETE ,
160
- httpParserStates .RCVING_BODY ,
161
- ):
162
- if b'content-length' in self .headers :
163
- self .state = httpParserStates .RCVING_BODY
164
- if self .body is None :
165
- self .body = b''
166
- total_size = int (self .header (b'content-length' ))
167
- received_size = len (self .body )
168
- self .body += raw [:total_size - received_size ]
169
- if self .body and \
170
- len (self .body ) == int (self .header (b'content-length' )):
171
- self .state = httpParserStates .COMPLETE
172
- more , raw = len (raw ) > 0 , raw [total_size - received_size :]
173
- elif self .is_chunked_encoded ():
174
- if not self .chunk_parser :
175
- self .chunk_parser = ChunkParser ()
176
- raw = self .chunk_parser .parse (raw )
177
- if self .chunk_parser .state == chunkParserStates .COMPLETE :
178
- self .body = self .chunk_parser .body
179
- self .state = httpParserStates .COMPLETE
180
- more = False
181
- else :
182
- raise NotImplementedError (
183
- 'Parser shouldn\' t have reached here. ' +
184
- 'This can happen when content length header is missing but their is a body in the payload' ,
185
- )
186
- else :
187
- more , raw = self .process (raw )
165
+ # gte with HEADERS_COMPLETE also encapsulated RCVING_BODY state
166
+ more , raw = self ._process_body (raw ) \
167
+ if self .state >= httpParserStates .HEADERS_COMPLETE else \
168
+ self ._process_line_and_headers (raw )
188
169
self .buffer = raw
189
170
190
- def process (self , raw : bytes ) -> Tuple [bool , bytes ]:
191
- """Returns False when no CRLF could be found in received bytes."""
192
- line , raw = find_http_line (raw )
193
- if line is None :
194
- return False , raw
195
-
196
- if self .state == httpParserStates .INITIALIZED :
197
- self .process_line (line )
198
- self .state = httpParserStates .LINE_RCVD
199
- elif self .state in (httpParserStates .LINE_RCVD , httpParserStates .RCVING_HEADERS ):
200
- if self .state == httpParserStates .LINE_RCVD :
201
- # LINE_RCVD state is equivalent to RCVING_HEADERS
202
- self .state = httpParserStates .RCVING_HEADERS
203
- if line .strip () == b'' : # Blank line received.
204
- self .state = httpParserStates .HEADERS_COMPLETE
205
- else :
206
- self .process_header (line )
207
-
208
- # When server sends a response line without any header or body e.g.
209
- # HTTP/1.1 200 Connection established\r\n\r\n
210
- if self .state == httpParserStates .LINE_RCVD and \
211
- self .type == httpParserTypes .RESPONSE_PARSER and \
212
- raw == CRLF :
213
- self .state = httpParserStates .COMPLETE
214
- elif self .state == httpParserStates .HEADERS_COMPLETE and \
215
- not self .body_expected () and \
216
- raw == b'' :
217
- self .state = httpParserStates .COMPLETE
218
-
219
- return len (raw ) > 0 , raw
220
-
221
- def process_line (self , raw : bytes ) -> None :
222
- line = raw .split (WHITESPACE )
223
- if self .type == httpParserTypes .REQUEST_PARSER :
224
- self .method = line [0 ].upper ()
225
- self .set_url (line [1 ])
226
- self .version = line [2 ]
227
- else :
228
- self .version = line [0 ]
229
- self .code = line [1 ]
230
- self .reason = WHITESPACE .join (line [2 :])
231
-
232
- def process_header (self , raw : bytes ) -> None :
233
- parts = raw .split (COLON )
234
- key = parts [0 ].strip ()
235
- value = COLON .join (parts [1 :]).strip ()
236
- self .add_headers ([(key , value )])
237
-
238
- def build_path (self ) -> bytes :
239
- if not self .url :
240
- return b'/None'
241
- url = self .url .path
242
- if url == b'' :
243
- url = b'/'
244
- if not self .url .query == b'' :
245
- url += b'?' + self .url .query
246
- if not self .url .fragment == b'' :
247
- url += b'#' + self .url .fragment
248
- return url
249
-
250
171
def build (self , disable_headers : Optional [List [bytes ]] = None , for_proxy : bool = False ) -> bytes :
251
172
"""Rebuild the request object."""
252
173
assert self .method and self .version and self .path and self .type == httpParserTypes .REQUEST_PARSER
253
174
if disable_headers is None :
254
175
disable_headers = DEFAULT_DISABLE_HEADERS
255
- body : Optional [bytes ] = ChunkParser .to_chunks (self .body ) \
256
- if self .is_chunked_encoded () and self .body else \
257
- self .body
176
+ body : Optional [bytes ] = self ._get_body_or_chunks ()
258
177
path = self .path
259
178
if for_proxy :
260
- assert self .url and self .host and self .port and self .path
179
+ assert self ._url and self .host and self .port and self .path
261
180
path = (
262
- self .url .scheme +
181
+ self ._url .scheme +
263
182
COLON + SLASH + SLASH +
264
183
self .host +
265
184
COLON +
266
185
str (self .port ).encode () +
267
186
self .path
268
187
) if self .method != httpMethods .CONNECT else (self .host + COLON + str (self .port ).encode ())
269
-
270
188
return build_http_request (
271
189
self .method , path , self .version ,
272
190
headers = {} if not self .headers else {
@@ -278,16 +196,15 @@ def build(self, disable_headers: Optional[List[bytes]] = None, for_proxy: bool =
278
196
279
197
def build_response (self ) -> bytes :
280
198
"""Rebuild the response object."""
281
- assert self .code and self .version and self .body and self . type == httpParserTypes .RESPONSE_PARSER
199
+ assert self .code and self .version and self .type == httpParserTypes .RESPONSE_PARSER
282
200
return build_http_response (
283
201
status_code = int (self .code ),
284
202
protocol_version = self .version ,
285
203
reason = self .reason ,
286
204
headers = {} if not self .headers else {
287
205
self .headers [k ][0 ]: self .headers [k ][1 ] for k in self .headers
288
206
},
289
- body = self .body if not self .is_chunked_encoded (
290
- ) else ChunkParser .to_chunks (self .body ),
207
+ body = self ._get_body_or_chunks (),
291
208
)
292
209
293
210
def has_host (self ) -> bool :
@@ -305,3 +222,110 @@ def is_connection_upgrade(self) -> bool:
305
222
return self .version == HTTP_1_1 and \
306
223
self .has_header (b'Connection' ) and \
307
224
self .has_header (b'Upgrade' )
225
+
226
+ def _process_body (self , raw : bytes ) -> Tuple [bool , bytes ]:
227
+ if b'content-length' in self .headers :
228
+ self .state = httpParserStates .RCVING_BODY
229
+ if self .body is None :
230
+ self .body = b''
231
+ total_size = int (self .header (b'content-length' ))
232
+ received_size = len (self .body )
233
+ self .body += raw [:total_size - received_size ]
234
+ if self .body and \
235
+ len (self .body ) == int (self .header (b'content-length' )):
236
+ self .state = httpParserStates .COMPLETE
237
+ more , raw = len (raw ) > 0 , raw [total_size - received_size :]
238
+ elif self .is_chunked_encoded ():
239
+ if not self .chunk_parser :
240
+ self .chunk_parser = ChunkParser ()
241
+ raw = self .chunk_parser .parse (raw )
242
+ if self .chunk_parser .state == chunkParserStates .COMPLETE :
243
+ self .body = self .chunk_parser .body
244
+ self .state = httpParserStates .COMPLETE
245
+ more = False
246
+ else :
247
+ raise NotImplementedError (
248
+ 'Parser shouldn\' t have reached here. ' +
249
+ 'This can happen when content length header is missing but their is a body in the payload' ,
250
+ )
251
+ return more , raw
252
+
253
+ def _process_line_and_headers (self , raw : bytes ) -> Tuple [bool , bytes ]:
254
+ """Returns False when no CRLF could be found in received bytes."""
255
+ line , raw = find_http_line (raw )
256
+ if line is None :
257
+ return False , raw
258
+
259
+ if self .state == httpParserStates .INITIALIZED :
260
+ self ._process_line (line )
261
+ self .state = httpParserStates .LINE_RCVD
262
+ elif self .state in (httpParserStates .LINE_RCVD , httpParserStates .RCVING_HEADERS ):
263
+ if self .state == httpParserStates .LINE_RCVD :
264
+ # LINE_RCVD state is equivalent to RCVING_HEADERS
265
+ self .state = httpParserStates .RCVING_HEADERS
266
+ if line .strip () == b'' : # Blank line received.
267
+ self .state = httpParserStates .HEADERS_COMPLETE
268
+ else :
269
+ self ._process_header (line )
270
+
271
+ # When server sends a response line without any header or body e.g.
272
+ # HTTP/1.1 200 Connection established\r\n\r\n
273
+ if self .state == httpParserStates .LINE_RCVD and \
274
+ self .type == httpParserTypes .RESPONSE_PARSER and \
275
+ raw == CRLF :
276
+ self .state = httpParserStates .COMPLETE
277
+ elif self .state == httpParserStates .HEADERS_COMPLETE and \
278
+ not self .body_expected () and \
279
+ raw == b'' :
280
+ self .state = httpParserStates .COMPLETE
281
+
282
+ return len (raw ) > 0 , raw
283
+
284
+ def _process_line (self , raw : bytes ) -> None :
285
+ line = raw .split (WHITESPACE )
286
+ if self .type == httpParserTypes .REQUEST_PARSER :
287
+ self .method = line [0 ].upper ()
288
+ self .set_url (line [1 ])
289
+ self .version = line [2 ]
290
+ else :
291
+ self .version = line [0 ]
292
+ self .code = line [1 ]
293
+ self .reason = WHITESPACE .join (line [2 :])
294
+
295
+ def _process_header (self , raw : bytes ) -> None :
296
+ parts = raw .split (COLON )
297
+ key = parts [0 ].strip ()
298
+ value = COLON .join (parts [1 :]).strip ()
299
+ self .add_headers ([(key , value )])
300
+
301
+ def _get_body_or_chunks (self ) -> Optional [bytes ]:
302
+ return ChunkParser .to_chunks (self .body ) \
303
+ if self .body and self .is_chunked_encoded () else \
304
+ self .body
305
+
306
+ def _set_line_attributes (self ) -> None :
307
+ if self .type == httpParserTypes .REQUEST_PARSER :
308
+ if self .method == httpMethods .CONNECT and self ._url :
309
+ self .host = self ._url .hostname
310
+ self .port = 443 if self ._url .port is None else self ._url .port
311
+ elif self ._url :
312
+ self .host , self .port = self ._url .hostname , self ._url .port \
313
+ if self ._url .port else DEFAULT_HTTP_PORT
314
+ else :
315
+ raise KeyError (
316
+ 'Invalid request. Method: %r, Url: %r' %
317
+ (self .method , self ._url ),
318
+ )
319
+ self .path = self ._build_path ()
320
+
321
+ def _build_path (self ) -> bytes :
322
+ if not self ._url :
323
+ return b'/None'
324
+ url = self ._url .path
325
+ if url == b'' :
326
+ url = b'/'
327
+ if not self ._url .query == b'' :
328
+ url += b'?' + self ._url .query
329
+ if not self ._url .fragment == b'' :
330
+ url += b'#' + self ._url .fragment
331
+ return url
0 commit comments