Skip to content

Issue441 automatic backoff retry on http429 #547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion openeo/rest/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
OidcResourceOwnerPasswordAuthenticator,
)
from openeo.rest.datacube import DataCube, InputDate
from openeo.rest.http import requests_with_retry
from openeo.rest.job import BatchJob, RESTJob
from openeo.rest.mlmodel import MlModel
from openeo.rest.rest_capabilities import RESTCapabilities
Expand Down Expand Up @@ -85,7 +86,8 @@ def __init__(
):
self._root_url = root_url
self.auth = auth or NullAuth()
self.session = session or requests.Session()
# TODO: #441 [WIP] Add requests_with_retry here to the session?
self.session = session or requests_with_retry()
self.default_timeout = default_timeout or DEFAULT_TIMEOUT
self.default_headers = {
"User-Agent": "openeo-python-client/{cv} {py}/{pv} {pl}".format(
Expand Down
39 changes: 39 additions & 0 deletions openeo/rest/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Set

import requests
import requests.adapters

MAX_RETRIES = 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default of max 3 retries might be a bit low in practice in combination with default backoff_factor: last attempt will be roughly just be 4 seconds after first attempt, which might be too quick to stop trying



def requests_with_retry(
total: int = MAX_RETRIES,
read: int = MAX_RETRIES,
other: int = MAX_RETRIES,
status: int = MAX_RETRIES,
backoff_factor: float = 1,
status_forcelist: Set[int] = frozenset([429, 500, 502, 503, 504]),
**kwargs,
) -> requests.Session:
"""
Create a `requests.Session` with automatic retrying

Inspiration and references:
- https://requests.readthedocs.io/en/latest/api/#requests.adapters.HTTPAdapter
- https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry
- https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/#retry-on-failure
"""
session = requests.Session()
retry = requests.adapters.Retry(
total=total,
read=read,
other=other,
status=status,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
**kwargs,
)
adapter = requests.adapters.HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"flake8>=5.0.0",
"time_machine",
"pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency
"re-assert",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we already have dirty_equals for these kind of things, wee should not add yet another utility here

]

docs_require = [
Expand Down
40 changes: 40 additions & 0 deletions tests/rest/test_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import logging

import pytest
import requests.exceptions
from re_assert import Matches

from openeo.rest.http import requests_with_retry


def test_requests_with_retry(caplog):
"""Simple test for retrying using an invalid domain."""
caplog.set_level(logging.DEBUG)

session = requests_with_retry(total=2, backoff_factor=0.1)
with pytest.raises(requests.exceptions.ConnectionError, match="Max retries exceeded"):
_ = session.get("https://example.test")

assert caplog.messages == [
"Starting new HTTPS connection (1): example.test:443",
Matches("Incremented Retry.*Retry\(total=1"),
# Matches("Retrying.*total=1.*Failed to establish a new connection"),
Matches("Retrying.*total=1.*Failed to resolve 'example.test'"),
"Starting new HTTPS connection (2): example.test:443",
Matches("Incremented Retry.*Retry\(total=0"),
Matches("Retrying.*total=0.*Failed to resolve 'example.test'"),
"Starting new HTTPS connection (3): example.test:443",
]


def test_requests_with_retry_zero(caplog):
"""Simple test for retrying using an invalid domain."""
caplog.set_level(logging.DEBUG)

session = requests_with_retry(total=0)
with pytest.raises(requests.exceptions.ConnectionError, match="Max retries exceeded"):
_ = session.get("https://example.test")

assert caplog.messages == [
"Starting new HTTPS connection (1): example.test:443",
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are only tests with retries but ending in failure, there are no tests about retries that end up in success