diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5fb35c5d9..7d36d6c47 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,12 @@ Unreleased .. vendor-insert-here - Update vendored schemas (2023-05-03) +- A new option, ``--disable-formats`` replaces and enhances the + ``--disable-format`` flag. ``--disable-formats`` takes a format to disable + and may be passed multiple times, allowing users to opt out of any specific + format checks. ``--disable-format "*"`` can be used to disable all format + checking. ``--disable-format`` is still supported, but is deprecated and + emits a warning. 0.22.0 ------ diff --git a/docs/usage.rst b/docs/usage.rst index 3bc39aff9..33b4c0cdf 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -185,11 +185,52 @@ following options can be used to control this behavior. ``--disable-format`` ~~~~~~~~~~~~~~~~~~~~ -Disable all ``"format"`` checks. +.. warning:: + + This option is deprecated. Use ``--disable-formats "*"`` instead. + +Disable all format checks. + +``--disable-formats`` +~~~~~~~~~~~~~~~~~~~~~ + +Disable specified ``"format"`` checks. + +Use ``--disable-formats "*"`` to disable all format checking. Because ``"format"`` checking is not done by all JSON Schema tools, it is possible that a file may validate under a schema with a different tool, but -fail with ``check-jsonschema`` if ``--disable-format`` is not set. +fail with ``check-jsonschema`` if ``--disable-formats`` is not set. + +This option may be specified multiple times or as a comma-delimited list and +supports the following formats as arguments: + +- ``date`` +- ``date-time`` +- ``duration`` +- ``email`` +- ``hostname`` +- ``idn-email`` +- ``idn-hostname`` +- ``ipv4`` +- ``ipv6`` +- ``iri`` +- ``iri-reference`` +- ``json-pointer`` +- ``regex`` +- ``relative-json-pointer`` +- ``time`` +- ``uri`` +- ``uri-reference`` +- ``uri-template`` +- ``uuid`` + +Example usage: + +.. code-block:: bash + + # disables all three of time, date-time, and iri + --disable-formats time,date-time --disable-formats iri ``--format-regex`` ~~~~~~~~~~~~~~~~~~ diff --git a/src/check_jsonschema/cli/__init__.py b/src/check_jsonschema/cli/__init__.py new file mode 100644 index 000000000..5b9980efe --- /dev/null +++ b/src/check_jsonschema/cli/__init__.py @@ -0,0 +1,3 @@ +from .main_command import main + +__all__ = ("main",) diff --git a/src/check_jsonschema/cli.py b/src/check_jsonschema/cli/main_command.py similarity index 69% rename from src/check_jsonschema/cli.py rename to src/check_jsonschema/cli/main_command.py index 6dd228315..c1df238d3 100644 --- a/src/check_jsonschema/cli.py +++ b/src/check_jsonschema/cli/main_command.py @@ -1,24 +1,26 @@ from __future__ import annotations -import enum import os import textwrap import click -from .catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG -from .checker import SchemaChecker -from .formats import FormatOptions, RegexFormatBehavior -from .instance_loader import InstanceLoader -from .parsers import SUPPORTED_FILE_FORMATS -from .reporter import REPORTER_BY_NAME, Reporter -from .schema_loader import ( +from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG +from ..checker import SchemaChecker +from ..formats import KNOWN_FORMATS, RegexFormatBehavior +from ..instance_loader import InstanceLoader +from ..parsers import SUPPORTED_FILE_FORMATS +from ..reporter import REPORTER_BY_NAME, Reporter +from ..schema_loader import ( BuiltinSchemaLoader, MetaSchemaLoader, SchemaLoader, SchemaLoaderBase, ) -from .transforms import TRANSFORM_LIBRARY, Transform +from ..transforms import TRANSFORM_LIBRARY +from .param_types import CommaDelimitedList +from .parse_result import ParseResult, SchemaLoadingMode +from .warnings import deprecation_warning_callback BUILTIN_SCHEMA_NAMES = [f"vendor.{k}" for k in SCHEMA_CATALOG.keys()] + [ f"custom.{k}" for k in CUSTOM_SCHEMA_NAMES @@ -28,68 +30,6 @@ ) -class SchemaLoadingMode(enum.Enum): - filepath = "filepath" - builtin = "builtin" - metaschema = "metaschema" - - -class ParseResult: - def __init__(self) -> None: - # primary options: schema + instances - self.schema_mode: SchemaLoadingMode = SchemaLoadingMode.filepath - self.schema_path: str | None = None - self.instancefiles: tuple[str, ...] = () - # cache controls - self.disable_cache: bool = False - self.cache_filename: str | None = None - # filetype detection (JSON, YAML, TOML, etc) - self.default_filetype: str = "json" - # data-transform (for Azure Pipelines and potentially future transforms) - self.data_transform: Transform | None = None - # fill default values on instances during validation - self.fill_defaults: bool = False - # regex format options - self.disable_format: bool = False - self.format_regex: RegexFormatBehavior = RegexFormatBehavior.default - # error and output controls - self.verbosity: int = 1 - self.traceback_mode: str = "short" - self.output_format: str = "text" - - def set_schema( - self, schemafile: str | None, builtin_schema: str | None, check_metaschema: bool - ) -> None: - mutex_arg_count = sum( - 1 if x else 0 for x in (schemafile, builtin_schema, check_metaschema) - ) - if mutex_arg_count == 0: - raise click.UsageError( - "Either --schemafile, --builtin-schema, or --check-metaschema " - "must be provided" - ) - if mutex_arg_count > 1: - raise click.UsageError( - "--schemafile, --builtin-schema, and --check-metaschema " - "are mutually exclusive" - ) - - if schemafile: - self.schema_mode = SchemaLoadingMode.filepath - self.schema_path = schemafile - elif builtin_schema: - self.schema_mode = SchemaLoadingMode.builtin - self.schema_path = builtin_schema - else: - self.schema_mode = SchemaLoadingMode.metaschema - - @property - def format_opts(self) -> FormatOptions: - return FormatOptions( - enabled=not self.disable_format, regex_behavior=self.format_regex - ) - - def set_color_mode(ctx: click.Context, param: str, value: str) -> None: if "NO_COLOR" in os.environ: ctx.color = False @@ -101,6 +41,20 @@ def set_color_mode(ctx: click.Context, param: str, value: str) -> None: }[value] +def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: + return textwrap.indent( + "\n".join( + textwrap.wrap( + ", ".join(values), + width=75, + break_long_words=False, + break_on_hyphens=False, + ), + ), + " ", + ) + + @click.command( "check-jsonschema", help="""\ @@ -108,8 +62,9 @@ def set_color_mode(ctx: click.Context, param: str, value: str) -> None: The schema is specified either with '--schemafile' or with '--builtin-schema'. -'check-jsonschema' supports and checks the following formats by default: - date, email, ipv4, regex, uuid +'check-jsonschema' supports format checks with appropriate libraries installed, +including the following formats by default: + date, email, ipv4, ipv6, regex, uuid \b For the "regex" format, there are multiple modes which can be specified with @@ -121,17 +76,13 @@ def set_color_mode(ctx: click.Context, param: str, value: str) -> None: \b The '--builtin-schema' flag supports the following schema names: """ - + textwrap.indent( - "\n".join( - textwrap.wrap( - ", ".join(BUILTIN_SCHEMA_NAMES), - width=75, - break_long_words=False, - break_on_hyphens=False, - ), - ), - " ", - ), + + pretty_helptext_list(BUILTIN_SCHEMA_NAMES) + + """\ + +\b +The '--disable-formats' flag supports the following formats: +""" + + pretty_helptext_list(KNOWN_FORMATS), ) @click.help_option("-h", "--help") @click.version_option() @@ -170,13 +121,29 @@ def set_color_mode(ctx: click.Context, param: str, value: str) -> None: ), ) @click.option( - "--disable-format", is_flag=True, help="Disable all format checks in the schema." + "--disable-format", + is_flag=True, + help="{deprecated} Disable all format checks in the schema.", + callback=deprecation_warning_callback( + "--disable-format", + is_flag=True, + append_message="Users should now pass '--disable-formats \"*\"' for " + "the same functionality.", + ), +) +@click.option( + "--disable-formats", + multiple=True, + help="Disable specific format checks in the schema. " + "Pass '*' to disable all format checks.", + type=CommaDelimitedList(choices=("*", *KNOWN_FORMATS)), + metavar="{*|FORMAT,FORMAT,...}", ) @click.option( "--format-regex", help=( "Set the mode of format validation for regexes. " - "If '--disable-format' is used, this option has no effect." + "If `--disable-formats regex` is used, this option has no effect." ), default=RegexFormatBehavior.default.value, type=click.Choice([x.value for x in RegexFormatBehavior], case_sensitive=False), @@ -249,6 +216,7 @@ def main( no_cache: bool, cache_filename: str | None, disable_format: bool, + disable_formats: tuple[list[str], ...], format_regex: str, default_filetype: str, traceback_mode: str, @@ -264,7 +232,13 @@ def main( args.set_schema(schemafile, builtin_schema, check_metaschema) args.instancefiles = instancefiles - args.disable_format = disable_format + normalized_disable_formats: tuple[str, ...] = tuple( + f for sublist in disable_formats for f in sublist + ) + if disable_format or "*" in normalized_disable_formats: + args.disable_all_formats = True + else: + args.disable_formats = normalized_disable_formats args.format_regex = RegexFormatBehavior(format_regex) args.disable_cache = no_cache args.default_filetype = default_filetype diff --git a/src/check_jsonschema/cli/param_types.py b/src/check_jsonschema/cli/param_types.py new file mode 100644 index 000000000..505bb75c5 --- /dev/null +++ b/src/check_jsonschema/cli/param_types.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import typing as t + +import click + + +class CommaDelimitedList(click.ParamType): + def __init__( + self, + *, + convert_values: t.Callable[[str], str] | None = None, + choices: t.Iterable[str] | None = None, + ) -> None: + super().__init__() + self.convert_values = convert_values + self.choices = list(choices) if choices is not None else None + + def get_metavar(self, param: click.Parameter) -> str: + if self.choices is not None: + return "{" + ",".join(self.choices) + "}" + return "TEXT,TEXT,..." + + def convert( + self, value: str, param: click.Parameter | None, ctx: click.Context | None + ) -> list[str]: + value = super().convert(value, param, ctx) + + # if `--foo` is a comma delimited list and someone passes + # `--foo ""`, take that as `foo=[]` rather than foo=[""] + resolved = value.split(",") if value else [] + + if self.convert_values is not None: + resolved = [self.convert_values(x) for x in resolved] + + if self.choices is not None: + bad_values = [x for x in resolved if x not in self.choices] + if bad_values: + self.fail( + f"the values {bad_values} were not valid choices", + param=param, + ctx=ctx, + ) + + return resolved diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py new file mode 100644 index 000000000..7d86ad822 --- /dev/null +++ b/src/check_jsonschema/cli/parse_result.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import enum + +import click + +from ..formats import FormatOptions, RegexFormatBehavior +from ..transforms import Transform + + +class SchemaLoadingMode(enum.Enum): + filepath = "filepath" + builtin = "builtin" + metaschema = "metaschema" + + +class ParseResult: + def __init__(self) -> None: + # primary options: schema + instances + self.schema_mode: SchemaLoadingMode = SchemaLoadingMode.filepath + self.schema_path: str | None = None + self.instancefiles: tuple[str, ...] = () + # cache controls + self.disable_cache: bool = False + self.cache_filename: str | None = None + # filetype detection (JSON, YAML, TOML, etc) + self.default_filetype: str = "json" + # data-transform (for Azure Pipelines and potentially future transforms) + self.data_transform: Transform | None = None + # fill default values on instances during validation + self.fill_defaults: bool = False + # regex format options + self.disable_all_formats: bool = False + self.disable_formats: tuple[str, ...] = () + self.format_regex: RegexFormatBehavior = RegexFormatBehavior.default + # error and output controls + self.verbosity: int = 1 + self.traceback_mode: str = "short" + self.output_format: str = "text" + + def set_schema( + self, schemafile: str | None, builtin_schema: str | None, check_metaschema: bool + ) -> None: + mutex_arg_count = sum( + 1 if x else 0 for x in (schemafile, builtin_schema, check_metaschema) + ) + if mutex_arg_count == 0: + raise click.UsageError( + "Either --schemafile, --builtin-schema, or --check-metaschema " + "must be provided" + ) + if mutex_arg_count > 1: + raise click.UsageError( + "--schemafile, --builtin-schema, and --check-metaschema " + "are mutually exclusive" + ) + + if schemafile: + self.schema_mode = SchemaLoadingMode.filepath + self.schema_path = schemafile + elif builtin_schema: + self.schema_mode = SchemaLoadingMode.builtin + self.schema_path = builtin_schema + else: + self.schema_mode = SchemaLoadingMode.metaschema + + @property + def format_opts(self) -> FormatOptions: + return FormatOptions( + enabled=not self.disable_all_formats, + regex_behavior=self.format_regex, + disabled_formats=self.disable_formats, + ) diff --git a/src/check_jsonschema/cli/warnings.py b/src/check_jsonschema/cli/warnings.py new file mode 100644 index 000000000..1228bf8c4 --- /dev/null +++ b/src/check_jsonschema/cli/warnings.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import typing as t +import warnings + +import click + + +def deprecation_warning_callback( + optstring: str, *, is_flag: bool = False, append_message: str | None = None +) -> t.Callable[[click.Context, click.Parameter, t.Any], t.Any]: + def callback(ctx: click.Context, param: click.Parameter, value: t.Any) -> t.Any: + if not value: + return value + if (is_flag and bool(value) is True) or (value is not None): + message = ( + f"'{optstring}' is deprecated and will be removed in a future release." + ) + if append_message is not None: + message += f" {append_message}" + warnings.warn(message) + + return value + + return callback diff --git a/src/check_jsonschema/formats.py b/src/check_jsonschema/formats.py index a9ac5bddd..87fd14a35 100644 --- a/src/check_jsonschema/formats.py +++ b/src/check_jsonschema/formats.py @@ -8,6 +8,33 @@ import jsonschema import jsonschema.validators +# all known format strings except for a selection from draft3 which have either +# been renamed or removed: +# - color +# - host-name +# - ip-address +KNOWN_FORMATS: tuple[str, ...] = ( + "date", + "date-time", + "duration", + "email", + "hostname", + "idn-email", + "idn-hostname", + "ipv4", + "ipv6", + "iri", + "iri-reference", + "json-pointer", + "regex", + "relative-json-pointer", + "time", + "uri", + "uri-reference", + "uri-template", + "uuid", +) + def _regex_check(instance: t.Any) -> bool: if not isinstance(instance, str): @@ -37,9 +64,13 @@ def __init__( *, enabled: bool = True, regex_behavior: RegexFormatBehavior = RegexFormatBehavior.default, + disabled_formats: tuple[str, ...] = (), ) -> None: self.enabled = enabled self.regex_behavior = regex_behavior + self.disabled_formats = disabled_formats + if "regex" in self.disabled_formats: + self.regex_behavior = RegexFormatBehavior.disabled def get_base_format_checker(schema_dialect: str | None) -> jsonschema.FormatChecker: @@ -63,9 +94,15 @@ def make_format_checker( base_checker = get_base_format_checker(schema_dialect) checker = copy.deepcopy(base_checker) - # remove the regex check + # remove the regex check -- it will be re-added if it is enabled del checker.checkers["regex"] + # remove the disabled checks + for checkname in opts.disabled_formats: + if checkname not in checker.checkers: + continue + del checker.checkers[checkname] + if opts.regex_behavior == RegexFormatBehavior.disabled: pass elif opts.regex_behavior == RegexFormatBehavior.default: diff --git a/tests/acceptance/test_format_failure.py b/tests/acceptance/test_format_failure.py index b6ac10540..3d927dcee 100644 --- a/tests/acceptance/test_format_failure.py +++ b/tests/acceptance/test_format_failure.py @@ -54,7 +54,8 @@ def test_format_failure_ignore(run_line_simple, tmp_path): run_line_simple( [ - "--disable-format", + "--disable-formats", + "*", "--schemafile", str(schemafile), str(doc1), @@ -74,7 +75,8 @@ def test_format_failure_ignore_multidoc(run_line_simple, tmp_path): run_line_simple( [ - "--disable-format", + "--disable-formats", + "*", "--schemafile", str(schemafile), str(doc1), diff --git a/tests/example-files/hooks/positive/metaschema/_config.yaml b/tests/example-files/hooks/positive/metaschema/_config.yaml index 2b604563f..73824b22d 100644 --- a/tests/example-files/hooks/positive/metaschema/_config.yaml +++ b/tests/example-files/hooks/positive/metaschema/_config.yaml @@ -1,3 +1,3 @@ files: 2020_invalid_format_value.json: - add_args: ["--disable-format"] + add_args: ["--disable-formats", "*"] diff --git a/tests/unit/test_cli_parse.py b/tests/unit/test_cli_parse.py index 094216b93..27de4b725 100644 --- a/tests/unit/test_cli_parse.py +++ b/tests/unit/test_cli_parse.py @@ -7,7 +7,7 @@ from click.testing import CliRunner from check_jsonschema import main as cli_main -from check_jsonschema.cli import ParseResult, SchemaLoadingMode +from check_jsonschema.cli.parse_result import ParseResult, SchemaLoadingMode class BoxedContext: @@ -22,7 +22,7 @@ def boxed_context(): @pytest.fixture def mock_parse_result(): args = ParseResult() - with mock.patch("check_jsonschema.cli.ParseResult") as m: + with mock.patch("check_jsonschema.cli.main_command.ParseResult") as m: m.return_value = args yield args @@ -32,7 +32,9 @@ def mock_cli_exec(boxed_context): def get_ctx(*args): boxed_context.ref = click.get_current_context() - with mock.patch("check_jsonschema.cli.execute", side_effect=get_ctx) as m: + with mock.patch( + "check_jsonschema.cli.main_command.execute", side_effect=get_ctx + ) as m: yield m @@ -195,3 +197,73 @@ def test_color_cli_option_is_choice(runner, setting, expected_value): ).exit_code == expected_value ) + + +def test_formats_default_to_enabled(runner, mock_parse_result): + runner.invoke(cli_main, ["--schemafile", "schema.json", "foo.json"]) + assert mock_parse_result.disable_all_formats is False + assert mock_parse_result.disable_formats == () + + +@pytest.mark.parametrize( + "addargs", + ( + [ + "--disable-formats", + "uri-reference", + "--disable-formats", + "date-time", + ], + ["--disable-formats", "uri-reference,date-time"], + ), +) +def test_disable_selected_formats(runner, mock_parse_result, addargs): + runner.invoke( + cli_main, + [ + "--schemafile", + "schema.json", + "foo.json", + ] + + addargs, + ) + assert mock_parse_result.disable_all_formats is False + assert set(mock_parse_result.disable_formats) == {"uri-reference", "date-time"} + + +@pytest.mark.parametrize( + "addargs", + ( + [ + "--disable-formats", + "uri-reference", + "--disable-formats", + "date-time", + "--disable-formats", + "*", + ], + ["--disable-formats", "*"], + ["--disable-formats", "*,email"], + ), +) +def test_disable_all_formats(runner, mock_parse_result, addargs): + # this should be an override, with or without other args + runner.invoke( + cli_main, + [ + "--schemafile", + "schema.json", + "foo.json", + ] + + addargs, + ) + assert mock_parse_result.disable_all_formats is True + + +def test_disable_format_deprecated_flag(runner, mock_parse_result): + # this should be an override, with or without other args + with pytest.warns(UserWarning, match="'--disable-format' is deprecated"): + runner.invoke( + cli_main, ["--schemafile", "schema.json", "foo.json", "--disable-format"] + ) + assert mock_parse_result.disable_all_formats is True