diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f623b15a0..ef3e0725b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,8 @@ Unreleased ---------- .. vendor-insert-here +- Fix the handling of malformed and missing ``Last-Modified`` headers in the + caching downloader. Thanks :user:`balihb`! (:issue:`275`) 0.23.1 ------ diff --git a/src/check_jsonschema/cachedownloader.py b/src/check_jsonschema/cachedownloader.py index dc4d65066..5b6a26a7e 100644 --- a/src/check_jsonschema/cachedownloader.py +++ b/src/check_jsonschema/cachedownloader.py @@ -17,7 +17,6 @@ class FailedDownloadError(Exception): class CacheDownloader: - _LASTMOD_DEFAULT = "Sun, 01 Jan 1970 00:00:01 GMT" _LASTMOD_FMT = "%a, %d %b %Y %H:%M:%S %Z" # changed in v0.5.0 @@ -83,12 +82,15 @@ def _get_request(self) -> requests.Response: raise FailedDownloadError("encountered error during download") from e def _lastmod_from_response(self, response: requests.Response) -> float: - return time.mktime( - time.strptime( - response.headers.get("last-modified", self._LASTMOD_DEFAULT), - self._LASTMOD_FMT, + try: + return time.mktime( + time.strptime(response.headers["last-modified"], self._LASTMOD_FMT) ) - ) + # OverflowError: time outside of platform-specific bounds + # ValueError: malformed/unparseable + # LookupError: no such header + except (OverflowError, ValueError, LookupError): + return 0.0 def _cache_hit(self, cachefile: str, response: requests.Response) -> bool: # no file? miss diff --git a/tests/unit/test_cachedownloader.py b/tests/unit/test_cachedownloader.py index 1f9fbb0f4..f26e08527 100644 --- a/tests/unit/test_cachedownloader.py +++ b/tests/unit/test_cachedownloader.py @@ -205,3 +205,66 @@ def test_cachedownloader_retries_on_bad_data(tmp_path, disable_cache): assert not f.exists() else: assert f.exists() + + +@pytest.mark.parametrize("file_exists", (True, False)) +@pytest.mark.parametrize( + "failure_mode", ("header_missing", "header_malformed", "time_overflow") +) +def test_cachedownloader_handles_bad_lastmod_header( + monkeypatch, tmp_path, file_exists, failure_mode +): + if failure_mode == "header_missing": + responses.add( + "GET", + "https://example.com/schema1.json", + headers={}, + json={}, + match_querystring=None, + ) + elif failure_mode == "header_malformed": + responses.add( + "GET", + "https://example.com/schema1.json", + headers={"Last-Modified": "Jan 2000 00:00:01"}, + json={}, + match_querystring=None, + ) + elif failure_mode == "time_overflow": + add_default_response() + + def fake_mktime(*args): + raise OverflowError("uh-oh") + + monkeypatch.setattr("time.mktime", fake_mktime) + else: + raise NotImplementedError + + original_file_contents = b'{"foo": "bar"}' + f = tmp_path / "schema1.json" + + if file_exists: + f.write_bytes(original_file_contents) + else: + assert not f.exists() + + cd = CacheDownloader( + "https://example.com/schema1.json", filename=str(f), cache_dir=str(tmp_path) + ) + + # if the file already existed, it will not be overwritten by the cachedownloader + # so the returned value for both the downloader and a direct file read should be the + # original contents + if file_exists: + with cd.open() as fp: + assert fp.read() == original_file_contents + assert f.read_bytes() == original_file_contents + # otherwise, the file will have been created with new content + # both reads will show that new content + else: + with cd.open() as fp: + assert fp.read() == b"{}" + assert f.read_bytes() == b"{}" + + # at the end, the file always exists on disk + assert f.exists()