Skip to content

Commit 15ccd01

Browse files
committed
* lib/net/http/response.rb: Automatically inflate gzip and
deflate-encoded response bodies. [Feature #6942] * lib/net/http/generic_request.rb: Automatically accept gzip and deflate content-encoding for requests. [Feature #6494] * lib/net/http/request.rb: Updated documentation for #6494. * lib/net/http.rb: Updated documentation for #6492 and #6494, removed Content-Encoding handling now present in Net::HTTPResponse. * test/net/http/test_httpresponse.rb: Tests for #6492 * test/net/http/test_http_request.rb: Tests for #6494 * test/open-uri/test_open-uri.rb (test_content_encoding): Updated test for automatic content-encoding handling. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36473 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 47352b9 commit 15ccd01

File tree

6 files changed

+369
-41
lines changed

6 files changed

+369
-41
lines changed

lib/net/http.rb

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,14 @@ class HTTPHeaderSyntaxError < StandardError; end
283283
# See Net::HTTP::Proxy for further details and examples such as proxies that
284284
# require a username and password.
285285
#
286+
# === Compression
287+
#
288+
# Net::HTTP automatically adds Accept-Encoding for compression of response
289+
# bodies and automatically decompresses gzip and deflate responses unless a
290+
# Range header was sent.
291+
#
292+
# Compression can be disabled through the Accept-Encoding: identity header.
293+
#
286294
# == HTTP Request Classes
287295
#
288296
# Here is the HTTP request class hierarchy.
@@ -602,7 +610,6 @@ def initialize(address, port = nil)
602610
@use_ssl = false
603611
@ssl_context = nil
604612
@enable_post_connection_check = true
605-
@compression = nil
606613
@sspi_enabled = false
607614
SSL_IVNAMES.each do |ivname|
608615
instance_variable_set ivname, nil
@@ -1052,28 +1059,10 @@ def get(path, initheader = {}, dest = nil, &block) # :yield: +body_segment+
10521059
initheader = initheader.merge({
10531060
"accept-encoding" => "gzip;q=1.0,deflate;q=0.6,identity;q=0.3"
10541061
})
1055-
@compression = true
10561062
end
10571063
end
10581064
request(Get.new(path, initheader)) {|r|
1059-
if r.key?("content-encoding") and @compression
1060-
@compression = nil # Clear it till next set.
1061-
the_body = r.read_body dest, &block
1062-
case r["content-encoding"]
1063-
when "gzip"
1064-
r.body= Zlib::GzipReader.new(StringIO.new(the_body), encoding: "ASCII-8BIT").read
1065-
r.delete("content-encoding")
1066-
when "deflate"
1067-
r.body= Zlib::Inflate.inflate(the_body);
1068-
r.delete("content-encoding")
1069-
when "identity"
1070-
; # nothing needed
1071-
else
1072-
; # Don't do anything dramatic, unless we need to later
1073-
end
1074-
else
1075-
r.read_body dest, &block
1076-
end
1065+
r.read_body dest, &block
10771066
res = r
10781067
}
10791068
res

lib/net/http/generic_request.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ def initialize(m, reqbody, resbody, path, initheader = nil)
1414
raise ArgumentError, "no HTTP request path given" unless path
1515
raise ArgumentError, "HTTP request path is empty" if path.empty?
1616
@path = path
17+
18+
if @response_has_body and Net::HTTP::HAVE_ZLIB then
19+
if !initheader ||
20+
!initheader.keys.any? { |k|
21+
%w[accept-encoding range].include? k.downcase
22+
} then
23+
initheader = initheader ? initheader.dup : {}
24+
initheader["accept-encoding"] =
25+
"gzip;q=1.0,deflate;q=0.6,identity;q=0.3"
26+
end
27+
end
28+
1729
initialize_http_header initheader
1830
self['Accept'] ||= '*/*'
1931
self['User-Agent'] ||= 'Ruby'

lib/net/http/request.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
# subclasses: Net::HTTP::Get, Net::HTTP::Post, Net::HTTP::Head.
55
#
66
class Net::HTTPRequest < Net::HTTPGenericRequest
7-
# Creates HTTP request object.
7+
# Creates an HTTP request object for +path+.
8+
#
9+
# +initheader+ are the default headers to use. Net::HTTP adds
10+
# Accept-Encoding to enable compression of the response body unless
11+
# Accept-Encoding or Range are supplied in +initheader+.
12+
813
def initialize(path, initheader = nil)
914
super self.class::METHOD,
1015
self.class::REQUEST_HAS_BODY,

lib/net/http/response.rb

Lines changed: 129 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -222,25 +222,70 @@ def body=(value)
222222

223223
private
224224

225-
def read_body_0(dest)
226-
if chunked?
227-
read_chunked dest
228-
return
229-
end
230-
clen = content_length()
231-
if clen
232-
@socket.read clen, dest, true # ignore EOF
233-
return
225+
##
226+
# Checks for a supported Content-Encoding header and yields an Inflate
227+
# wrapper for this response's socket when zlib is present. If the
228+
# Content-Encoding is unsupported or zlib is missing the plain socket is
229+
# yielded.
230+
#
231+
# If a Content-Range header is present a plain socket is yielded as the
232+
# bytes in the range may not be a complete deflate block.
233+
234+
def inflater # :nodoc:
235+
return yield @socket unless Net::HTTP::HAVE_ZLIB
236+
return yield @socket if self['content-range']
237+
238+
case self['content-encoding']
239+
when 'deflate', 'gzip', 'x-gzip' then
240+
self.delete 'content-encoding'
241+
242+
inflate_body_io = Inflater.new(@socket)
243+
244+
begin
245+
yield inflate_body_io
246+
ensure
247+
inflate_body_io.finish
248+
end
249+
when 'none', 'identity' then
250+
self.delete 'content-encoding'
251+
252+
yield @socket
253+
else
254+
yield @socket
234255
end
235-
clen = range_length()
236-
if clen
237-
@socket.read clen, dest
238-
return
256+
end
257+
258+
def read_body_0(dest)
259+
inflater do |inflate_body_io|
260+
if chunked?
261+
read_chunked dest, inflate_body_io
262+
return
263+
end
264+
265+
@socket = inflate_body_io
266+
267+
clen = content_length()
268+
if clen
269+
@socket.read clen, dest, true # ignore EOF
270+
return
271+
end
272+
clen = range_length()
273+
if clen
274+
@socket.read clen, dest
275+
return
276+
end
277+
@socket.read_all dest
239278
end
240-
@socket.read_all dest
241279
end
242280

243-
def read_chunked(dest)
281+
##
282+
# read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF,
283+
# etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip
284+
# encoded.
285+
#
286+
# See RFC 2616 section 3.6.1 for definitions
287+
288+
def read_chunked(dest, chunk_data_io) # :nodoc:
244289
len = nil
245290
total = 0
246291
while true
@@ -250,7 +295,7 @@ def read_chunked(dest)
250295
len = hexlen.hex
251296
break if len == 0
252297
begin
253-
@socket.read len, dest
298+
chunk_data_io.read len, dest
254299
ensure
255300
total += len
256301
@socket.read 2 # \r\n
@@ -266,14 +311,80 @@ def stream_check
266311
end
267312

268313
def procdest(dest, block)
269-
raise ArgumentError, 'both arg and block given for HTTP method' \
270-
if dest and block
314+
raise ArgumentError, 'both arg and block given for HTTP method' if
315+
dest and block
271316
if block
272317
Net::ReadAdapter.new(block)
273318
else
274319
dest || ''
275320
end
276321
end
277322

323+
##
324+
# Inflater is a wrapper around Net::BufferedIO that transparently inflates
325+
# zlib and gzip streams.
326+
327+
class Inflater # :nodoc:
328+
329+
##
330+
# Creates a new Inflater wrapping +socket+
331+
332+
def initialize socket
333+
@socket = socket
334+
# zlib with automatic gzip detection
335+
@inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS)
336+
end
337+
338+
##
339+
# Finishes the inflate stream.
340+
341+
def finish
342+
@inflate.finish
343+
end
344+
345+
##
346+
# Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
347+
#
348+
# This allows a large response body to be inflated without storing the
349+
# entire body in memory.
350+
351+
def inflate_adapter(dest)
352+
block = proc do |compressed_chunk|
353+
@inflate.inflate(compressed_chunk) do |chunk|
354+
dest << chunk
355+
end
356+
end
357+
358+
Net::ReadAdapter.new(block)
359+
end
360+
361+
##
362+
# Reads +clen+ bytes from the socket, inflates them, then writes them to
363+
# +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read
364+
#
365+
# Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes.
366+
# At this time there is no way for a user of Net::HTTPResponse to read a
367+
# specific number of bytes from the HTTP response body, so this internal
368+
# API does not return the same number of bytes as were requested.
369+
#
370+
# See https://bugs.ruby-lang.org/issues/6492 for further discussion.
371+
372+
def read clen, dest, ignore_eof = false
373+
temp_dest = inflate_adapter(dest)
374+
375+
data = @socket.read clen, temp_dest, ignore_eof
376+
end
377+
378+
##
379+
# Reads the rest of the socket, inflates it, then writes it to +dest+.
380+
381+
def read_all dest
382+
temp_dest = inflate_adapter(dest)
383+
384+
@socket.read_all temp_dest
385+
end
386+
387+
end
388+
278389
end
279390

test/net/http/test_http_request.rb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
require 'net/http'
2+
require 'test/unit'
3+
require 'stringio'
4+
5+
class HTTPRequestTest < Test::Unit::TestCase
6+
7+
def test_initialize_GET
8+
req = Net::HTTP::Get.new '/'
9+
10+
assert_equal 'GET', req.method
11+
refute req.request_body_permitted?
12+
assert req.response_body_permitted?
13+
14+
expected = {
15+
'accept' => %w[*/*],
16+
'user-agent' => %w[Ruby],
17+
}
18+
19+
expected['accept-encoding'] = %w[gzip;q=1.0,deflate;q=0.6,identity;q=0.3] if
20+
Net::HTTP::HAVE_ZLIB
21+
22+
assert_equal expected, req.to_hash
23+
end
24+
25+
def test_initialize_GET_range
26+
req = Net::HTTP::Get.new '/', 'Range' => 'bytes=0-9'
27+
28+
assert_equal 'GET', req.method
29+
refute req.request_body_permitted?
30+
assert req.response_body_permitted?
31+
32+
expected = {
33+
'accept' => %w[*/*],
34+
'user-agent' => %w[Ruby],
35+
'range' => %w[bytes=0-9],
36+
}
37+
38+
assert_equal expected, req.to_hash
39+
end
40+
41+
def test_initialize_HEAD
42+
req = Net::HTTP::Head.new '/'
43+
44+
assert_equal 'HEAD', req.method
45+
refute req.request_body_permitted?
46+
refute req.response_body_permitted?
47+
48+
expected = {
49+
'accept' => %w[*/*],
50+
'user-agent' => %w[Ruby],
51+
}
52+
53+
assert_equal expected, req.to_hash
54+
end
55+
56+
end
57+

0 commit comments

Comments
 (0)