Skip to content

Use the new 'referencing' implementation in 'jsonschema' #289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
py: ["3.x"]
include:
- toxenv: py-mindeps
py: "3.7"
py: "3.8"

runs-on: ubuntu-latest
name: "Run '${{ matrix.toxenv }}' on python ${{ matrix.py }}"
Expand Down Expand Up @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
py: ['3.7', '3.8', '3.9', '3.10', '3.11']
py: ['3.8', '3.9', '3.10', '3.11']
name: "Run tests on ${{ matrix.os }}, py${{ matrix.py }}"
runs-on: ${{ matrix.os }}
steps:
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ Unreleased
.. vendor-insert-here

- Update vendored schemas (2023-07-18)
- Remove support for python3.7
- The minimum supported version of the `jsonschema` library is now `4.18.0`,
which introduces new `$ref` resolution behavior and fixes. That behavior is
used in all cases, which should result in faster evaluation especially on
large schemas.
- `$ref` usage may now refer to YAML, TOML, or JSON5 files, or any other
non-JSON format supported by `check-jsonschema`. The file type is inferred
only from the file extension in these cases and defaults to JSON if there is
no recognizable extension.

0.23.3
------
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python_requires = >=3.7
install_requires =
importlib-resources>=1.4.0;python_version<"3.9"
ruamel.yaml==0.17.32
jsonschema>=4.5.1,<5.0
jsonschema>=4.18.0,<5.0
requests<3.0
click>=8,<9
package_dir=
Expand Down
7 changes: 6 additions & 1 deletion src/check_jsonschema/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import click
import jsonschema
import referencing.exceptions

from . import utils
from .formats import FormatOptions
Expand Down Expand Up @@ -75,7 +76,11 @@ def _build_result(self) -> CheckResult:
def _run(self) -> None:
try:
result = self._build_result()
except jsonschema.RefResolutionError as e:
except (
referencing.exceptions.NoSuchResource,
referencing.exceptions.Unretrievable,
referencing.exceptions.Unresolvable,
) as e:
self._fail("Failure resolving $ref within schema\n", e)

self._reporter.report_result(result)
Expand Down
7 changes: 5 additions & 2 deletions src/check_jsonschema/identify_filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
}


def path_to_type(path: pathlib.Path, *, default_type: str = "json") -> str:
ext = path.suffix.lstrip(".")
def path_to_type(path: str | pathlib.Path, *, default_type: str = "json") -> str:
if isinstance(path, str):
ext = path.rpartition(".")[2]
else:
ext = path.suffix.lstrip(".")

if ext in _EXTENSION_MAP:
return _EXTENSION_MAP[ext]
Expand Down
13 changes: 9 additions & 4 deletions src/check_jsonschema/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
}

def get(
self, path: pathlib.Path, default_filetype: str
self, path: pathlib.Path | str, default_filetype: str
) -> t.Callable[[t.BinaryIO], t.Any]:
filetype = path_to_type(path, default_type=default_filetype)

Expand All @@ -82,10 +82,15 @@ def get(
+ ",".join(self._by_tag.keys())
)

def parse_file(self, path: pathlib.Path, default_filetype: str) -> t.Any:
def parse_data_with_path(
self, data: t.BinaryIO, path: pathlib.Path | str, default_filetype: str
) -> t.Any:
loadfunc = self.get(path, default_filetype)
try:
with open(path, "rb") as fp:
return loadfunc(fp)
return loadfunc(data)
except LOADING_FAILURE_ERROR_TYPES as e:
raise FailedFileLoadError(f"Failed to parse {path}") from e

def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
with open(path, "rb") as fp:
return self.parse_data_with_path(fp, path, default_filetype)
27 changes: 16 additions & 11 deletions src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@

from ..builtin_schemas import get_builtin_schema
from ..formats import FormatOptions, make_format_checker
from ..parsers import ParserSet
from ..utils import is_url_ish
from .errors import UnsupportedUrlScheme
from .readers import HttpSchemaReader, LocalSchemaReader
from .resolver import make_ref_resolver
from .resolver import make_reference_registry


def _extend_with_default(
Expand Down Expand Up @@ -71,6 +72,9 @@ def __init__(
if is_url_ish(self.schemafile):
self.url_info = urllib.parse.urlparse(self.schemafile)

# setup a parser collection
self._parsers = ParserSet()

# setup a schema reader lazily, when needed
self._reader: LocalSchemaReader | HttpSchemaReader | None = None

Expand All @@ -96,8 +100,8 @@ def _get_schema_reader(self) -> LocalSchemaReader | HttpSchemaReader:
f"detected parsed URL had an unrecognized scheme: {self.url_info}"
)

def get_schema_ref_base(self) -> str | None:
return self.reader.get_ref_base()
def get_schema_retrieval_uri(self) -> str | None:
return self.reader.get_retrieval_uri()

def get_schema(self) -> dict[str, t.Any]:
return self.reader.read_schema()
Expand All @@ -109,19 +113,19 @@ def get_validator(
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.Validator:
schema_uri = self.get_schema_ref_base()
retrieval_uri = self.get_schema_retrieval_uri()
schema = self.get_schema()

schema_dialect = schema.get("$schema")

# format checker (which may be None)
format_checker = make_format_checker(format_opts, schema_dialect)

# ref resolver which may be built from the schema path
# if the location is a URL, there's no change, but if it's a file path
# it's made absolute and URI-ized
# the resolver should use `$id` if there is one present in the schema
ref_resolver = make_ref_resolver(schema_uri, schema)
# reference resolution
# with support for YAML, TOML, and other formats from the parsers
reference_registry = make_reference_registry(
self._parsers, retrieval_uri, schema
)

# get the correct validator class and check the schema under its metaschema
validator_cls = jsonschema.validators.validator_for(schema)
Expand All @@ -134,7 +138,7 @@ def get_validator(
# now that we know it's safe to try to create the validator instance, do it
validator = validator_cls(
schema,
resolver=ref_resolver,
registry=reference_registry,
format_checker=format_checker,
)
return t.cast(jsonschema.Validator, validator)
Expand All @@ -143,8 +147,9 @@ def get_validator(
class BuiltinSchemaLoader(SchemaLoader):
def __init__(self, schema_name: str) -> None:
self.schema_name = schema_name
self._parsers = ParserSet()

def get_schema_ref_base(self) -> str | None:
def get_schema_retrieval_uri(self) -> str | None:
return None

def get_schema(self) -> dict[str, t.Any]:
Expand Down
8 changes: 3 additions & 5 deletions src/check_jsonschema/schema_loader/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,12 @@ def _run_load_callback(schema_location: str, callback: t.Callable) -> dict:


class LocalSchemaReader:
FORMATS = ("json", "json5", "yaml")

def __init__(self, filename: str) -> None:
self.path = filename2path(filename)
self.filename = str(self.path)
self.parsers = ParserSet(supported_formats=self.FORMATS)
self.parsers = ParserSet()

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.path.as_uri()

def _read_impl(self) -> t.Any:
Expand All @@ -57,7 +55,7 @@ def __init__(
validation_callback=json.loads,
)

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.url

def _read_impl(self) -> t.Any:
Expand Down
103 changes: 65 additions & 38 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,69 @@
from __future__ import annotations

import typing as t
import urllib.parse

import click
import jsonschema


class _CliRefResolver(jsonschema.RefResolver):
def resolve_remote(self, uri: str) -> t.Any:
if uri.endswith(".yaml") or uri.endswith(".yml"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a YAML file.

This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
elif uri.endswith(".json5"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a JSON5 file.

This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
return super().resolve_remote(uri)


def make_ref_resolver(
schema_uri: str | None, schema: dict
) -> jsonschema.RefResolver | None:
if not schema_uri:
return None

base_uri = schema.get("$id", schema_uri)
# FIXME: temporary type-ignore because typeshed has the type wrong
return _CliRefResolver(base_uri, schema) # type: ignore[arg-type]
import referencing
import requests
from referencing.jsonschema import DRAFT202012, Schema

from ..parsers import ParserSet
from ..utils import filename2path


def make_reference_registry(
parsers: ParserSet, retrieval_uri: str | None, schema: dict
) -> referencing.Registry:
id_attribute_: t.Any = schema.get("$id")
if isinstance(id_attribute_, str):
id_attribute: str | None = id_attribute_
else:
id_attribute = None

schema_resource = referencing.Resource.from_contents(
schema, default_specification=DRAFT202012
)
# mypy does not recognize that Registry is an `attrs` class and has `retrieve` as an
# argument to its implicit initializer
registry: referencing.Registry = referencing.Registry( # type: ignore[call-arg]
retrieve=create_retrieve_callable(parsers, retrieval_uri, id_attribute)
)

if retrieval_uri is not None:
registry = registry.with_resource(uri=retrieval_uri, resource=schema_resource)
if id_attribute is not None:
registry = registry.with_resource(uri=id_attribute, resource=schema_resource)

return registry


def create_retrieve_callable(
parser_set: ParserSet, retrieval_uri: str | None, id_attribute: str | None
) -> t.Callable[[str], referencing.Resource[Schema]]:
base_uri = id_attribute
if base_uri is None:
base_uri = retrieval_uri

def get_local_file(uri: str) -> t.Any:
path = filename2path(uri)
return parser_set.parse_file(path, "json")

def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
scheme = urllib.parse.urlsplit(uri).scheme
if scheme == "" and base_uri is not None:
full_uri = urllib.parse.urljoin(base_uri, uri)
else:
full_uri = uri

full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
if full_uri_scheme in ("http", "https"):
data = requests.get(full_uri, stream=True)
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
else:
parsed_object = get_local_file(full_uri)

return referencing.Resource.from_contents(
parsed_object, default_specification=DRAFT202012
)

return retrieve_reference
24 changes: 21 additions & 3 deletions tests/acceptance/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import textwrap

import pytest
from click.testing import CliRunner

from check_jsonschema import main as cli_main


def _render_result(result):
return f"""
output:
{textwrap.indent(result.output, " ")}

stderr:
{textwrap.indent(result.stderr, " ")}
"""


@pytest.fixture
def cli_runner():
return CliRunner(mix_stderr=False)
Expand All @@ -22,8 +34,14 @@ def func(cli_args, *args, **kwargs):

@pytest.fixture
def run_line_simple(run_line):
def func(cli_args, *args, **kwargs):
res = run_line(["check-jsonschema"] + cli_args, *args, **kwargs)
assert res.exit_code == 0
def func(cli_args, *args, full_traceback: bool = True, **kwargs):
res = run_line(
["check-jsonschema"]
+ (["--traceback-mode", "full"] if full_traceback else [])
+ cli_args,
*args,
**kwargs,
)
assert res.exit_code == 0, _render_result(res)

return func
14 changes: 4 additions & 10 deletions tests/acceptance/test_nonjson_schema_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@


@pytest.mark.parametrize("passing_data", [True, False])
def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
def test_yaml_reference(run_line, tmp_path, passing_data):
main_schemafile = tmp_path / "main_schema.json"
main_schemafile.write_text(json.dumps(YAML_REF_MAIN_SCHEMA))
# JSON is a subset of YAML, so this works for generated YAML
ref_schema = tmp_path / "title_schema.yaml"
ref_schema.write_text(json.dumps(TITLE_SCHEMA))

Expand All @@ -47,14 +48,11 @@ def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
)
assert result.exit_code == (0 if passing_data else 1)
assert (
"WARNING: You appear to be using a schema which references a YAML file"
in result.stderr
)


@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")
@pytest.mark.parametrize("passing_data", [True, False])
def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
def test_json5_reference(run_line, tmp_path, passing_data):
main_schemafile = tmp_path / "main_schema.json"
main_schemafile.write_text(json.dumps(JSON5_REF_MAIN_SCHEMA))
ref_schema = tmp_path / "title_schema.json5"
Expand All @@ -70,10 +68,6 @@ def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
)
assert result.exit_code == (0 if passing_data else 1)
assert (
"WARNING: You appear to be using a schema which references a JSON5 file"
in result.stderr
)


@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")
Expand Down
Loading