Skip to content

Add support for dynamic reverse proxy routing #1180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2315,9 +2315,9 @@ usage: -m [-h] [--tunnel-hostname TUNNEL_HOSTNAME] [--tunnel-port TUNNEL_PORT]
[--filtered-client-ips FILTERED_CLIENT_IPS]
[--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG]

proxy.py v2.4.2.dev11+g0beb02d.d20220420
proxy.py v2.4.3.dev14+gc6b2de6.d20220605

options:
optional arguments:
-h, --help show this help message and exit
--tunnel-hostname TUNNEL_HOSTNAME
Default: None. Remote hostname or IP address to which
Expand All @@ -2334,11 +2334,11 @@ options:
--tunnel-remote-port TUNNEL_REMOTE_PORT
Default: 8899. Remote port which will be forwarded
locally for proxy.
--threadless Default: True. Enabled by default on Python 3.8+ (mac,
linux). When disabled a new thread is spawned to
--threadless Default: False. Enabled by default on Python 3.8+
(mac, linux). When disabled a new thread is spawned to
handle each client connection.
--threaded Default: False. Disabled by default on Python < 3.8
and windows. When enabled a new thread is spawned to
--threaded Default: True. Disabled by default on Python < 3.8 and
windows. When enabled a new thread is spawned to
handle each client connection.
--num-workers NUM_WORKERS
Defaults to number of CPU cores.
Expand Down Expand Up @@ -2434,8 +2434,8 @@ options:
Default: None. Signing certificate to use for signing
dynamically generated HTTPS certificates. If used,
must also pass --ca-key-file and --ca-signing-key-file
--ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv310/lib/
python3.10/site-packages/certifi/cacert.pem. Provide
--ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv373/lib/
python3.7/site-packages/certifi/cacert.pem. Provide
path to custom CA bundle for peer certificate
verification
--ca-signing-key-file CA_SIGNING_KEY_FILE
Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@
(_py_class_role, 'T'),
(_py_class_role, 'HostPort'),
(_py_class_role, 'TcpOrTlsSocket'),
(_py_class_role, 're.Pattern'),
(_py_obj_role, 'proxy.core.work.threadless.T'),
(_py_obj_role, 'proxy.core.work.work.T'),
(_py_obj_role, 'proxy.core.base.tcp_server.T'),
Expand Down
9 changes: 9 additions & 0 deletions proxy/common/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
:copyright: (c) 2013-present by Abhinav Singh and contributors.
:license: BSD, see LICENSE for more details.
"""
import re
import ssl
import sys
import queue
import socket
import ipaddress
Expand All @@ -30,3 +32,10 @@
IpAddress = Union[ipaddress.IPv4Address, ipaddress.IPv6Address]
TcpOrTlsSocket = Union[ssl.SSLSocket, socket.socket]
HostPort = Tuple[str, int]

if sys.version_info.minor == 6:
RePattern = Any
elif sys.version_info.minor in (7, 8):
RePattern = re.Pattern # type: ignore
else:
RePattern = re.Pattern[Any] # type: ignore
38 changes: 35 additions & 3 deletions proxy/http/server/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
import argparse
import mimetypes
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union, Optional

from proxy.http.url import Url
from ..parser import HttpParser
from ..responses import NOT_FOUND_RESPONSE_PKT, okResponse
from ..websocket import WebsocketFrame
from ..connection import HttpClientConnection
from ...core.event import EventQueue
from ..descriptors import DescriptorsHandlerMixin
from ...common.types import RePattern
from ...common.utils import bytes_


Expand Down Expand Up @@ -127,6 +129,36 @@ class ReverseProxyBasePlugin(ABC):
"""ReverseProxy base plugin class."""

@abstractmethod
def routes(self) -> List[Tuple[str, List[bytes]]]:
"""Return List(path, List(upstream)) reverse proxy config."""
def routes(self) -> List[Union[str, Tuple[str, List[bytes]]]]:
"""List of routes registered by plugin.

There are 2 types of routes:

1) Dynamic routes (str): Should be a regular expression
2) Static routes (tuple): Contain 2 elements, a route regular expression
and list of upstream urls to serve when the route matches.

Static routes doesn't require you to implement the `handle_route` method.
Reverse proxy core will automatically pick one of the configured upstream URL
and serve it out-of-box.

Dynamic routes are helpful when you want to dynamically match and serve upstream urls.
To handle dynamic routes, you must implement the `handle_route` method, which
must return the url to serve."""
raise NotImplementedError() # pragma: no cover

def handle_route(self, request: HttpParser, pattern: RePattern) -> Url:
"""Implement this method if you have configured dynamic routes."""
pass

def regexes(self) -> List[str]:
"""Helper method to return list of route regular expressions."""
routes = []
for route in self.routes():
if isinstance(route, str):
routes.append(route)
elif isinstance(route, tuple):
routes.append(route[0])
else:
raise ValueError('Invalid route type')
return routes
53 changes: 25 additions & 28 deletions proxy/http/server/reverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,37 +38,39 @@ class ReverseProxy(TcpUpstreamConnectionHandler, HttpWebServerBasePlugin):
def __init__(self, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)
self.choice: Optional[Url] = None
self.reverse: Dict[str, List[bytes]] = {}
self.plugins: List['ReverseProxyBasePlugin'] = []
for klass in self.flags.plugins[b'ReverseProxyBasePlugin']:
plugin: 'ReverseProxyBasePlugin' = klass()
self.plugins.append(plugin)

def handle_upstream_data(self, raw: memoryview) -> None:
self.client.queue(raw)

def routes(self) -> List[Tuple[int, str]]:
reverse: List[Tuple[str, List[bytes]]] = []
for klass in self.flags.plugins[b'ReverseProxyBasePlugin']:
instance: 'ReverseProxyBasePlugin' = klass()
reverse.extend(instance.routes())
r = []
for (route, upstreams) in reverse:
r.append((httpProtocolTypes.HTTP, route))
r.append((httpProtocolTypes.HTTPS, route))
self.reverse[route] = upstreams
for plugin in self.plugins:
for route in plugin.regexes():
r.append((httpProtocolTypes.HTTP, route))
r.append((httpProtocolTypes.HTTPS, route))
return r

def handle_request(self, request: HttpParser) -> None:
# TODO: Core must be capable of dispatching a context
# with each invocation of handle request callback.
#
# Example, here we don't know which of our registered
# route actually matched.
#
for route in self.reverse:
pattern = re.compile(route)
if pattern.match(text_(request.path)):
self.choice = Url.from_bytes(
random.choice(self.reverse[route]),
)
break
for plugin in self.plugins:
for route in plugin.routes():
if isinstance(route, tuple):
pattern = re.compile(route[0])
if pattern.match(text_(request.path)):
self.choice = Url.from_bytes(
random.choice(route[1]),
)
break
elif isinstance(route, str):
pattern = re.compile(route)
if pattern.match(text_(request.path)):
self.choice = plugin.handle_route(request, pattern)
break
else:
raise ValueError('Invalid route')
assert self.choice and self.choice.hostname
port = self.choice.port or \
DEFAULT_HTTP_PORT \
Expand All @@ -85,12 +87,7 @@ def handle_request(self, request: HttpParser) -> None:
),
as_non_blocking=True,
)
# Update Host header
# if request.has_header(b'Host'):
# request.del_header(b'Host')
# request.add_header(
# b'Host', ('%s:%d' % self.upstream.addr).encode('utf-8'),
# )
request.path = self.choice.remainder
self.upstream.queue(memoryview(request.build()))
except ConnectionRefusedError:
raise HttpProtocolException( # pragma: no cover
Expand Down
1 change: 1 addition & 0 deletions proxy/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
onmessage
httpbin
localhost
Lua
"""
from .cache import CacheResponsesPlugin, BaseCacheResponsesPlugin
from .shortlink import ShortLinkPlugin
Expand Down
60 changes: 43 additions & 17 deletions proxy/plugin/reverse_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,19 @@

:copyright: (c) 2013-present by Abhinav Singh and contributors.
:license: BSD, see LICENSE for more details.
"""
from typing import List, Tuple

from ..http.server import ReverseProxyBasePlugin
.. spelling::

Lua
"""
import re
from typing import List, Tuple, Union

# TODO: We must use nginx python parser and
# make this plugin nginx.conf complaint.
REVERSE_PROXY_LOCATION: str = r'/get$'
# Randomly choose either http or https upstream endpoint.
#
# This is just to demonstrate that both http and https upstream
# reverse proxy works.
REVERSE_PROXY_PASS = [
b'http://httpbin.org/get',
b'https://httpbin.org/get',
]
from ..http import Url
from ..http.parser import HttpParser
from ..http.server import ReverseProxyBasePlugin
from ..common.types import RePattern
from ..http.exception.base import HttpProtocolException


class ReverseProxyPlugin(ReverseProxyBasePlugin):
Expand All @@ -35,8 +31,38 @@ class ReverseProxyPlugin(ReverseProxyBasePlugin):
}
```

Update the routes config before.
Plugin also demonstrates how to write "Python" equivalent for any
"Nginx Lua" based configuration i.e. your plugin code will have
full control over what do after one of your route has matched.
"""

def routes(self) -> List[Tuple[str, List[bytes]]]:
return [(REVERSE_PROXY_LOCATION, REVERSE_PROXY_PASS)]
def routes(self) -> List[Union[str, Tuple[str, List[bytes]]]]:
return [
# A static route
(
r'/get$',
[b'http://httpbin.org/get', b'https://httpbin.org/get'],
),
# A dynamic route to catch requests on "/get/<int>""
# See "handle_route" method below for what we do when
# this pattern matches.
r'/get/(\d+)$',
]

def handle_route(self, request: HttpParser, pattern: RePattern) -> Url:
"""For our example dynamic route, we want to simply convert
any incoming request to "/get/1" into "/get?id=1" when serving from upstream.
"""
choice: Url = Url.from_bytes(b'http://httpbin.org/get')
assert request.path
result = re.search(pattern, request.path.decode())
if not result or len(result.groups()) != 1:
raise HttpProtocolException('Invalid request')
assert choice.remainder == b'/get'
# NOTE: Internally, reverse proxy core replaces
# original request.path with the choice.remainder value.
# e.g. for this example, request.path will be "/get/1".
# Core will automatically replace that with "/get?id=1"
# before dispatching request to choice of upstream server.
choice.remainder += f'?id={result.groups()[0]}'.encode()
return choice