Skip to content

Commit b7bf693

Browse files
authored
request-uri handling: use REQUEST_URI if available to maintain %-encoding when constructing WbUrl (#315)
geventserver: use custom handler to set raw 'REQUEST_URI' when running default gevent wsgi server. (uwsgi already sets REQUEST_URI) testing: add REQUEST_URI check to proxy tests as real server is being used (webtest tests decodes %-encoding) bump version to 2.0.4
1 parent 33cca0b commit b7bf693

File tree

5 files changed

+44
-13
lines changed

5 files changed

+44
-13
lines changed

pywb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '2.0.3'
1+
__version__ = '2.0.4'
22

33
DEFAULT_CONFIG = 'pywb/default_config.yaml'
44

pywb/apps/cli.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,13 @@ def run(self):
8181
return self
8282

8383
def run_gevent(self):
84-
from gevent.pywsgi import WSGIServer
84+
from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
8585
logging.info('Starting Gevent Server on ' + str(self.r.port))
86-
WSGIServer((self.r.bind, self.r.port), self.application).serve_forever()
86+
ge = GeventServer(self.application,
87+
port=self.r.port,
88+
hostname=self.r.bind,
89+
handler_class=RequestURIWSGIHandler,
90+
direct=True)
8791

8892

8993
#=============================================================================

pywb/apps/frontendapp.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,16 @@ def serve_content(self, environ, coll='$root', url='', timemap_output='', record
257257

258258
self.setup_paths(environ, coll, record)
259259

260-
wb_url_str = to_native_str(url)
260+
request_uri = environ.get('REQUEST_URI')
261+
script_name = environ.get('SCRIPT_NAME', '') + '/'
262+
if request_uri and request_uri.startswith(script_name):
263+
wb_url_str = request_uri[len(script_name):]
261264

262-
if environ.get('QUERY_STRING'):
263-
wb_url_str += '?' + environ.get('QUERY_STRING')
265+
else:
266+
wb_url_str = to_native_str(url)
267+
268+
if environ.get('QUERY_STRING'):
269+
wb_url_str += '?' + environ.get('QUERY_STRING')
264270

265271
metadata = self.get_metadata(coll)
266272
if record:

pywb/utils/geventserver.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
from gevent.wsgi import WSGIServer
1+
from gevent.wsgi import WSGIServer, WSGIHandler
22
from gevent import spawn
33
import logging
44

55

66
# ============================================================================
77
class GeventServer(object):
8-
def __init__(self, app, port=0, hostname='localhost', handler_class=None):
8+
def __init__(self, app, port=0, hostname='localhost', handler_class=None,
9+
direct=False):
910
self.port = port
10-
self.make_server(app, port, hostname, handler_class)
11+
self.make_server(app, port, hostname, handler_class, direct=direct)
1112

1213
def stop(self):
1314
if self.server:
@@ -22,15 +23,25 @@ def _run(self, server, port):
2223
logging.debug('server failed to start on ' + str(port))
2324
traceback.print_exc()
2425

25-
def make_server(self, app, port, hostname, handler_class):
26+
def make_server(self, app, port, hostname, handler_class, direct=False):
2627
server = WSGIServer((hostname, port), app, handler_class=handler_class)
2728
server.init_socket()
2829
self.port = server.address[1]
2930

3031
self.server = server
31-
self.ge = spawn(self._run, server, self.port)
32+
if direct:
33+
self.ge = None
34+
self._run(server, self.port)
35+
else:
36+
self.ge = spawn(self._run, server, self.port)
3237

3338
def join(self):
3439
self.ge.join()
3540

3641

42+
# ============================================================================
43+
class RequestURIWSGIHandler(WSGIHandler):
44+
def get_environ(self):
45+
environ = super(RequestURIWSGIHandler, self).get_environ()
46+
environ['REQUEST_URI'] = self.path
47+
return environ

tests/test_proxy.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests
22

33
from .base_config_test import CollsDirMixin
4-
from pywb.utils.geventserver import GeventServer
4+
from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
55
from pywb.apps.frontendapp import FrontEndApp
66
from pywb.manager.manager import main as manager
77

@@ -34,7 +34,7 @@ def setup_class(cls, coll='pywb', config_file='config_test.yaml', recording=Fals
3434
cls.app = FrontEndApp(config_file=config_file,
3535
custom_config={'proxy': opts})
3636

37-
cls.server = GeventServer(cls.app)
37+
cls.server = GeventServer(cls.app, handler_class=RequestURIWSGIHandler)
3838
cls.proxies = cls.proxy_dict(cls.server.port)
3939

4040
@classmethod
@@ -127,3 +127,13 @@ def test_proxy_replay_recorded(self, scheme):
127127
assert 'is_live = false' in res.text
128128
assert 'httpbin(1)' in res.text
129129

130+
def test_proxy_record_keep_percent(self, scheme):
131+
self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/'
132+
133+
res = requests.get('{0}://example.com/%2A%2Ffoobar'.format(scheme),
134+
proxies=self.proxies,
135+
verify=self.root_ca_file)
136+
137+
# ensure %-encoded url stays as is
138+
assert '"{0}://example.com/%2A%2Ffoobar"'.format(scheme) in res.text
139+

0 commit comments

Comments
 (0)