Skip to content

Fix the handling of bad Last-Modified headers #277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Unreleased
----------

.. vendor-insert-here
- Fix the handling of malformed and missing ``Last-Modified`` headers in the
caching downloader. Thanks :user:`balihb`! (:issue:`275`)

0.23.1
------
Expand Down
14 changes: 8 additions & 6 deletions src/check_jsonschema/cachedownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class FailedDownloadError(Exception):


class CacheDownloader:
_LASTMOD_DEFAULT = "Sun, 01 Jan 1970 00:00:01 GMT"
_LASTMOD_FMT = "%a, %d %b %Y %H:%M:%S %Z"

# changed in v0.5.0
Expand Down Expand Up @@ -83,12 +82,15 @@ def _get_request(self) -> requests.Response:
raise FailedDownloadError("encountered error during download") from e

def _lastmod_from_response(self, response: requests.Response) -> float:
return time.mktime(
time.strptime(
response.headers.get("last-modified", self._LASTMOD_DEFAULT),
self._LASTMOD_FMT,
try:
return time.mktime(
time.strptime(response.headers["last-modified"], self._LASTMOD_FMT)
)
)
# OverflowError: time outside of platform-specific bounds
# ValueError: malformed/unparseable
# LookupError: no such header
except (OverflowError, ValueError, LookupError):
return 0.0

def _cache_hit(self, cachefile: str, response: requests.Response) -> bool:
# no file? miss
Expand Down
63 changes: 63 additions & 0 deletions tests/unit/test_cachedownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,66 @@ def test_cachedownloader_retries_on_bad_data(tmp_path, disable_cache):
assert not f.exists()
else:
assert f.exists()


@pytest.mark.parametrize("file_exists", (True, False))
@pytest.mark.parametrize(
"failure_mode", ("header_missing", "header_malformed", "time_overflow")
)
def test_cachedownloader_handles_bad_lastmod_header(
monkeypatch, tmp_path, file_exists, failure_mode
):
if failure_mode == "header_missing":
responses.add(
"GET",
"https://example.com/schema1.json",
headers={},
json={},
match_querystring=None,
)
elif failure_mode == "header_malformed":
responses.add(
"GET",
"https://example.com/schema1.json",
headers={"Last-Modified": "Jan 2000 00:00:01"},
json={},
match_querystring=None,
)
elif failure_mode == "time_overflow":
add_default_response()

def fake_mktime(*args):
raise OverflowError("uh-oh")

monkeypatch.setattr("time.mktime", fake_mktime)
else:
raise NotImplementedError

original_file_contents = b'{"foo": "bar"}'
f = tmp_path / "schema1.json"

if file_exists:
f.write_bytes(original_file_contents)
else:
assert not f.exists()

cd = CacheDownloader(
"https://example.com/schema1.json", filename=str(f), cache_dir=str(tmp_path)
)

# if the file already existed, it will not be overwritten by the cachedownloader
# so the returned value for both the downloader and a direct file read should be the
# original contents
if file_exists:
with cd.open() as fp:
assert fp.read() == original_file_contents
assert f.read_bytes() == original_file_contents
# otherwise, the file will have been created with new content
# both reads will show that new content
else:
with cd.open() as fp:
assert fp.read() == b"{}"
assert f.read_bytes() == b"{}"

# at the end, the file always exists on disk
assert f.exists()