diff --git a/.pylintrc b/.pylintrc index e036b6217b..4b0e4fd1c0 100644 --- a/.pylintrc +++ b/.pylintrc @@ -3,7 +3,7 @@ # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -extension-pkg-whitelist= +extension-pkg-whitelist=pydantic # Add files or directories to the blacklist. They should be base names, not # paths. diff --git a/schema.json b/schema.json new file mode 100644 index 0000000000..9ad9136ce8 --- /dev/null +++ b/schema.json @@ -0,0 +1,258 @@ +{ + "title": "dvc.yaml", + "type": "object", + "properties": { + "stages": { + "title": "Stages", + "description": "List of stages", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Stage" + } + } + }, + "required": [ + "stages" + ], + "definitions": { + "DepModel": { + "title": "DepModel", + "description": "A dependency for the stage", + "type": "string" + }, + "Dependencies": { + "title": "Dependencies", + "type": "array", + "items": { + "$ref": "#/definitions/DepModel" + }, + "uniqueItems": true + }, + "CustomParamFileKeys": { + "title": "CustomParamFileKeys", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + } + }, + "Param": { + "title": "Param", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/CustomParamFileKeys" + } + ] + }, + "Params": { + "title": "Params", + "type": "array", + "items": { + "$ref": "#/definitions/Param" + }, + "uniqueItems": true + }, + "OutFlags": { + "title": "OutFlags", + "type": "object", + "properties": { + "cache": { + "title": "Cache", + "description": "Cache output by DVC", + "default": true, + "type": "boolean" + }, + "persist": { + "title": "Persist", + "description": "Persist output between runs", + "default": false, + "type": "boolean" + } + } + }, + "Out": { + "title": "Out", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/OutFlags" + } + } + ] + }, + "Outs": { + "title": "Outs", + "type": "array", + "items": { + "$ref": "#/definitions/Out" + }, + "uniqueItems": true + }, + "PlotFlags": { + "title": "PlotFlags", + "type": "object", + "properties": { + "cache": { + "title": "Cache", + "description": "Cache output by DVC", + "default": true, + "type": "boolean" + }, + "persist": { + "title": "Persist", + "description": "Persist output between runs", + "default": false, + "type": "boolean" + }, + "x": { + "title": "X", + "description": "Default field name to use as x-axis data", + "type": "string" + }, + "y": { + "title": "Y", + "description": "Default field name to use as y-axis data", + "type": "string" + }, + "x_label": { + "title": "X Label", + "description": "Default label for the x-axis", + "type": "string" + }, + "y_label": { + "title": "Y Label", + "description": "Default label for the y-axis", + "type": "string" + }, + "title": { + "title": "Title", + "description": "Default plot title", + "type": "string" + }, + "header": { + "title": "Header", + "description": "Whether the target CSV or TSV has a header or not", + "default": false, + "type": "boolean" + }, + "template": { + "title": "Template", + "description": "Default plot template", + "type": "string" + } + } + }, + "Plot": { + "title": "Plot", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/PlotFlags" + } + } + ] + }, + "Plots": { + "title": "Plots", + "type": "array", + "items": { + "$ref": "#/definitions/Plot" + }, + "uniqueItems": true + }, + "Stage": { + "title": "Stage", + "type": "object", + "properties": { + "cmd": { + "title": "Cmd", + "description": "Command to run", + "type": "string" + }, + "wdir": { + "title": "Wdir", + "description": "Working directory", + "type": "string" + }, + "deps": { + "title": "Deps", + "description": "Dependencies for the stage", + "allOf": [ + { + "$ref": "#/definitions/Dependencies" + } + ] + }, + "params": { + "title": "Params", + "description": "Params for the stage", + "allOf": [ + { + "$ref": "#/definitions/Params" + } + ] + }, + "outs": { + "title": "Outs", + "description": "Outputs of the stage", + "allOf": [ + { + "$ref": "#/definitions/Outs" + } + ] + }, + "metrics": { + "title": "Metrics", + "description": "Metrics of the stage", + "allOf": [ + { + "$ref": "#/definitions/Outs" + } + ] + }, + "plots": { + "title": "Plots", + "description": "Plots of the stage", + "allOf": [ + { + "$ref": "#/definitions/Plots" + } + ] + }, + "frozen": { + "title": "Frozen", + "description": "Assume stage as unchanged", + "default": false, + "type": "boolean" + }, + "always_changed": { + "title": "Always Changed", + "description": "Assume stage as always changed", + "default": false, + "type": "boolean" + }, + "meta": { + "title": "Meta", + "description": "Additional information/metadata" + } + }, + "required": [ + "cmd" + ] + } + } +} diff --git a/scripts/dvc.lock b/scripts/dvc.lock new file mode 100644 index 0000000000..8871b8e8d8 --- /dev/null +++ b/scripts/dvc.lock @@ -0,0 +1,8 @@ +generate-jsonschema: + cmd: python jsonschema_gen.py > ../schema.json + deps: + - path: jsonschema_gen.py + md5: 4d59d736987173600974ea6abe50e0cc + outs: + - path: ../schema.json + md5: 2221115bef9d255ddd25ba70099a15ec diff --git a/scripts/dvc.yaml b/scripts/dvc.yaml new file mode 100644 index 0000000000..8896f015f3 --- /dev/null +++ b/scripts/dvc.yaml @@ -0,0 +1,8 @@ +stages: + generate-jsonschema: + cmd: python jsonschema_gen.py > ../schema.json + deps: + - jsonschema_gen.py + outs: + - ../schema.json: + cache: false diff --git a/scripts/jsonschema_gen.py b/scripts/jsonschema_gen.py new file mode 100644 index 0000000000..91d56fe4f3 --- /dev/null +++ b/scripts/jsonschema_gen.py @@ -0,0 +1,109 @@ +try: + # pylint: disable=unused-import + from typing import TypedDict +except ImportError: + # pylint: disable=unused-import + from typing_extensions import TypedDict # noqa: F401 + +from typing import Any, Dict, Optional, Set, Union + +from pydantic import BaseModel, Field + +FilePath = str +ParamKey = str +StageName = str + + +class OutFlags(BaseModel): + cache: Optional[bool] = Field(True, description="Cache output by DVC") + persist: Optional[bool] = Field( + False, description="Persist output between runs" + ) + + +class PlotFlags(OutFlags): + x: str = Field( + None, description="Default field name to use as x-axis data" + ) + y: str = Field( + None, description="Default field name to use as y-axis data" + ) + x_label: str = Field(None, description="Default label for the x-axis") + y_label: str = Field(None, description="Default label for the y-axis") + title: str = Field(None, description="Default plot title") + header: bool = Field( + False, description="Whether the target CSV or TSV has a header or not" + ) + template: str = Field(None, description="Default plot template") + + +class DepModel(BaseModel): + __root__: FilePath = Field(..., description="A dependency for the stage") + + +class Dependencies(BaseModel): + __root__: Set[DepModel] + + +class CustomParamFileKeys(BaseModel): + __root__: Dict[FilePath, Set[ParamKey]] + + +class Param(BaseModel): + __root__: Union[ParamKey, CustomParamFileKeys] + + +class Params(BaseModel): + __root__: Set[Param] + + +class Out(BaseModel): + __root__: Union[FilePath, Dict[FilePath, OutFlags]] + + +class Outs(BaseModel): + __root__: Set[Out] + + +class Plot(BaseModel): + __root__: Union[FilePath, Dict[FilePath, PlotFlags]] + + +class Plots(BaseModel): + __root__: Set[Plot] + + +class Stage(BaseModel): + cmd: str = Field(..., description="Command to run") + wdir: Optional[str] = Field(None, description="Working directory") + deps: Optional[Dependencies] = Field( + None, description="Dependencies for the stage" + ) + params: Optional[Params] = Field(None, description="Params for the stage") + outs: Optional[Outs] = Field(None, description="Outputs of the stage") + metrics: Optional[Outs] = Field(None, description="Metrics of the stage") + plots: Optional[Plots] = Field(None, description="Plots of the stage") + frozen: Optional[bool] = Field( + False, description="Assume stage as unchanged" + ) + always_changed: Optional[bool] = Field( + False, description="Assume stage as always changed" + ) + meta: Any = Field(None, description="Additional information/metadata") + + class Config: + allow_mutation = False + + +Stages = Dict[StageName, Stage] + + +class DvcYamlModel(BaseModel): + stages: Stages = Field(..., description="List of stages") + + class Config: + title = "dvc.yaml" + + +if __name__ == "__main__": + print(DvcYamlModel.schema_json(indent=2)) diff --git a/setup.cfg b/setup.cfg index f9dc5b216d..b77c916940 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,7 +17,7 @@ count=true [isort] include_trailing_comma=true known_first_party=dvc,tests -known_third_party=PyInstaller,RangeHTTPServer,boto3,colorama,configobj,distro,dpath,flaky,flufl,funcy,git,grandalf,mock,moto,nanotime,networkx,packaging,pathspec,pylint,pytest,requests,ruamel,setuptools,shortuuid,shtab,tqdm,voluptuous,yaml,zc +known_third_party=PyInstaller,RangeHTTPServer,boto3,colorama,configobj,distro,dpath,flaky,flufl,funcy,git,grandalf,mock,moto,nanotime,networkx,packaging,pathspec,pydantic,pylint,pytest,requests,ruamel,setuptools,shortuuid,shtab,tqdm,voluptuous,yaml,zc line_length=79 force_grid_wrap=0 use_parentheses=True diff --git a/setup.py b/setup.py index 6d4831effe..595efc407d 100644 --- a/setup.py +++ b/setup.py @@ -130,6 +130,7 @@ def run(self): "pylint", "pylint-pytest", "pylint-plugin-utils", + "pydantic[typing_extensions]", ] if (sys.version_info) >= (3, 6):