From 9bbe5179496ba9520be3212404216fdf38be11f5 Mon Sep 17 00:00:00 2001 From: Nementon Date: Fri, 5 Feb 2021 22:58:03 +0100 Subject: [PATCH 01/17] bootstrap schemas resolver --- openapi_python_client/resolver/__init__.py | 0 openapi_python_client/resolver/data_loader.py | 22 +++++ openapi_python_client/resolver/reference.py | 22 +++++ .../resolver/resolved_schema.py | 25 +++++ .../resolver/resolver_types.py | 3 + .../resolver/schema_resolver.py | 98 +++++++++++++++++++ 6 files changed, 170 insertions(+) create mode 100644 openapi_python_client/resolver/__init__.py create mode 100644 openapi_python_client/resolver/data_loader.py create mode 100644 openapi_python_client/resolver/reference.py create mode 100644 openapi_python_client/resolver/resolved_schema.py create mode 100644 openapi_python_client/resolver/resolver_types.py create mode 100644 openapi_python_client/resolver/schema_resolver.py diff --git a/openapi_python_client/resolver/__init__.py b/openapi_python_client/resolver/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openapi_python_client/resolver/data_loader.py b/openapi_python_client/resolver/data_loader.py new file mode 100644 index 000000000..ab899d4b3 --- /dev/null +++ b/openapi_python_client/resolver/data_loader.py @@ -0,0 +1,22 @@ +import yaml +from .resolver_types import SchemaData + +class DataLoader: + + @classmethod + def load(cls, path: str, data: bytes) -> SchemaData: + data_type = path.split('.')[-1] + + if data_type == 'json': + return cls.load_json(data) + else: + return cls.load_yaml(data) + + @classmethod + def load_json(cls, data: bytes) -> SchemaData: + raise NotImplementedError() + + @classmethod + def load_yaml(cls, data: bytes) -> SchemaData: + return yaml.safe_load(data) + diff --git a/openapi_python_client/resolver/reference.py b/openapi_python_client/resolver/reference.py new file mode 100644 index 000000000..3d387ec98 --- /dev/null +++ b/openapi_python_client/resolver/reference.py @@ -0,0 +1,22 @@ +class Reference: + + def __init__(self, reference): + self._ref = reference + + @property + def value(self) -> str: + return self._ref + + def is_relative_reference(self): + return self.is_remote_ref() and not self.is_url_reference() + + def is_url_reference(self): + return self.is_remote_ref() and (self._ref.startswith('//', 0) or self._ref.startswith('http', 0)) + + def is_remote_ref(self): + return not self.is_local_ref() + + def is_local_ref(self): + return self._ref.startswith('#', 0) + + diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py new file mode 100644 index 000000000..19eb47d2b --- /dev/null +++ b/openapi_python_client/resolver/resolved_schema.py @@ -0,0 +1,25 @@ +from typing import Any, Dict, Optional, Sequence, Union +from .resolver_types import SchemaData + + +class ResolvedSchema: + + def __init__(self, root, refs, errors): + self._root: SchemaData = root + self._refs: Dict[str, SchemaData] = refs + self._errors: Sequense[str] = errors + + self._resolved_schema: SchemaData = self._root + self._process() + + @property + def schema(self) -> SchemaData: + return self._resolved_schema + + @property + def errors(self) -> Sequence[str]: + return self._errors.copy() + + def _process(self): + pass + diff --git a/openapi_python_client/resolver/resolver_types.py b/openapi_python_client/resolver/resolver_types.py new file mode 100644 index 000000000..baf35099b --- /dev/null +++ b/openapi_python_client/resolver/resolver_types.py @@ -0,0 +1,3 @@ +from typing import Any, Dict, NewType + +SchemaData = NewType('SchemaData', Dict[str, Any]) diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py new file mode 100644 index 000000000..2ac040c52 --- /dev/null +++ b/openapi_python_client/resolver/schema_resolver.py @@ -0,0 +1,98 @@ +import httpcore +import httpx +import urllib +import logging + +from typing import Any, Dict, Optional, Sequence, Union, Generator, NewType +from pathlib import Path + +from .resolver_types import SchemaData +from .reference import Reference +from .resolved_schema import ResolvedSchema +from .data_loader import DataLoader + +class SchemaResolver: + + def __init__(self, url_or_path: Union[str, Path]): + if not url_or_path: + raise ValueError('Invalid document root reference, it shall be an remote url or local file path') + + self._root_path: Optional[Path] = None + self._root_path_dir: Optional[Path] = None + self._root_url: Optional[str] = None + self._root_url_scheme: Optional[str] = None + + if isinstance(url_or_path, Path): + self._root_path = url_or_path.absolute() + self._root_path_dir = self._root_path.parent + else: + self._root_url = url_or_path + self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme + + def resolve(self, recursive: bool = True) -> ResolvedSchema: + root_schema: SchemaData + external_schemas: Dict[str, SchemaData] = {} + errors: Sequence[str] = [] + + if self._root_path: + root_schema = self._fetch_remote_file_path(self._root_path) + else: + root_schema = self._fetch_url_reference(self._root_url) + + self._resolve_schema_references(root_schema, external_schemas, errors, recursive) + return ResolvedSchema(root_schema, external_schemas, errors) + + def _resolve_schema_references(self, root: SchemaData, external_schemas: Dict[str, SchemaData], errors: Sequence[str], recursive: bool) -> Sequence[SchemaData]: + + for ref in self._lookup_schema_references(root): + if ref.is_local_ref(): + continue + + try: + path = ref.value.split('#')[0] + if path in external_schemas: + continue + + if ref.is_url_reference(): + external_schemas[path] = self._fetch_url_reference(path) + else: + external_schemas[path] = self._fetch_remote_reference(path) + + if recursive: + self._resolve_schema_references(external_schemas[path], external_schemas, errors, recursive) + + except Exception as e: + errors.append('Failed to gather external reference data of {0}'.format(ref.value)) + logging.exception('Failed to gather external reference data of {0}'.format(ref.value)) + + def _fetch_remote_reference(self, relative_path: str) -> SchemaData: + if self._root_path: + abs_path = self._root_path_dir.joinpath(relative_path) + return self._fetch_remote_file_path(abs_path) + else: + abs_url = urllib.parse.urljoin(self._root_url, relative_path) + return self._fetch_url_reference(abs_url) + + def _fetch_remote_file_path(self, path: Path) -> SchemaData: + logging.info('Fetching remote ref file path > {0}'.format(path)) + return DataLoader.load(str(path), path.read_bytes()) + + def _fetch_url_reference(self, url: str) -> SchemaData: + if url.startswith('//', 0): + url = "{0}{1}".format(self._root_url_scheme, url) + + logging.info('Fetching remote ref url > {0}'.format(url)) + return DataLoader.load(url, httpx.get(url).content) + + def _lookup_schema_references(self, attr: Any) -> Generator[Reference, None, None]: + if isinstance(attr, dict): + for key, val in attr.items(): + if key == '$ref': + yield Reference(val) + else: + yield from self._lookup_schema_references(val) + + elif isinstance(attr, list): + for val in attr: + yield from self._lookup_schema_references(val) + From 102bd26b83a0cea046e604fa8ed9c51ab4fc4b4e Mon Sep 17 00:00:00 2001 From: Nementon Date: Sat, 6 Feb 2021 00:54:30 +0100 Subject: [PATCH 02/17] __init__ / _get_document: use SchemaResolver --- openapi_python_client/__init__.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/openapi_python_client/__init__.py b/openapi_python_client/__init__.py index 8c97d45e9..4dcfc160f 100644 --- a/openapi_python_client/__init__.py +++ b/openapi_python_client/__init__.py @@ -17,6 +17,7 @@ from .parser import GeneratorData, import_string_from_reference from .parser.errors import GeneratorError from .utils import snake_case +from .resolver.schema_resolver import SchemaResolver if sys.version_info.minor < 8: # version did not exist before 3.8, need to use a backport from importlib_metadata import version @@ -287,20 +288,19 @@ def update_existing_client( def _get_document(*, url: Optional[str], path: Optional[Path]) -> Union[Dict[str, Any], GeneratorError]: - yaml_bytes: bytes if url is not None and path is not None: return GeneratorError(header="Provide URL or Path, not both.") - if url is not None: - try: - response = httpx.get(url) - yaml_bytes = response.content - except (httpx.HTTPError, httpcore.NetworkError): - return GeneratorError(header="Could not get OpenAPI document from provided URL") - elif path is not None: - yaml_bytes = path.read_bytes() - else: + + if url is None and path is None: return GeneratorError(header="No URL or Path provided") + + source: Union[str, Path] = url if url is not None else path try: - return yaml.safe_load(yaml_bytes) - except yaml.YAMLError: + resolver = SchemaResolver(source) + result = resolver.resolve() + if len(result.errors) > 0: + return GeneratorError(header=errors.join('; ')) + except Exception as e: return GeneratorError(header="Invalid YAML from provided source") + + return result.schema From e0c20e489967bdfdf5c7f800ac7d4fff52ee2fc3 Mon Sep 17 00:00:00 2001 From: Nementon Date: Sat, 6 Feb 2021 23:16:48 +0100 Subject: [PATCH 03/17] resolver / wip resolve remote ref to local ones --- openapi_python_client/__init__.py | 13 +- .../resolver/.resolved_schema.py.swp | Bin 0 -> 12288 bytes .../resolver/.schema_resolver.py.swp | Bin 0 -> 16384 bytes openapi_python_client/resolver/data_loader.py | 8 +- openapi_python_client/resolver/reference.py | 41 +++- .../resolver/resolved_schema.py | 191 +++++++++++++++++- .../resolver/resolver_types.py | 2 +- .../resolver/schema_resolver.py | 68 ++++--- 8 files changed, 259 insertions(+), 64 deletions(-) create mode 100644 openapi_python_client/resolver/.resolved_schema.py.swp create mode 100644 openapi_python_client/resolver/.schema_resolver.py.swp diff --git a/openapi_python_client/__init__.py b/openapi_python_client/__init__.py index 4dcfc160f..d9c35e97d 100644 --- a/openapi_python_client/__init__.py +++ b/openapi_python_client/__init__.py @@ -5,19 +5,16 @@ import sys from enum import Enum from pathlib import Path -from typing import Any, Dict, Optional, Sequence, Union +from typing import Any, Dict, Optional, Sequence, Union, cast -import httpcore -import httpx -import yaml from jinja2 import BaseLoader, ChoiceLoader, Environment, FileSystemLoader, PackageLoader from openapi_python_client import utils from .parser import GeneratorData, import_string_from_reference from .parser.errors import GeneratorError -from .utils import snake_case from .resolver.schema_resolver import SchemaResolver +from .utils import snake_case if sys.version_info.minor < 8: # version did not exist before 3.8, need to use a backport from importlib_metadata import version @@ -294,13 +291,13 @@ def _get_document(*, url: Optional[str], path: Optional[Path]) -> Union[Dict[str if url is None and path is None: return GeneratorError(header="No URL or Path provided") - source: Union[str, Path] = url if url is not None else path + source = cast(Union[str, Path], (url if url is not None else path)) try: resolver = SchemaResolver(source) result = resolver.resolve() if len(result.errors) > 0: - return GeneratorError(header=errors.join('; ')) - except Exception as e: + return GeneratorError(header="; ".join(result.errors)) + except Exception: return GeneratorError(header="Invalid YAML from provided source") return result.schema diff --git a/openapi_python_client/resolver/.resolved_schema.py.swp b/openapi_python_client/resolver/.resolved_schema.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..00668b71c19c357928e09fa02edd8dc8599e5a9c GIT binary patch literal 12288 zcmeI2O=}b}7{^n=n{7p}3aK92h1r)XN?lMa^rBX}bX^g&#OZ9NJ2;!9NwzQ-Jz6jN z1qACC@gjl;&sO{>o_vzbbl1XqFZ%@k%QvX)Rl;VUA2g-nB32o@wU;m}Wo{}< zT@`j9MLo$gC|_Awu(Hysxsmw+*Uj>!&rCAl!?>Y}sr7LK3Ct{kkxlDs^SzrlR{fv) z^=nJ?@}=I);=sX?01`j~NB{{S0VIF~kiaYwuxgIHfx*v}!(S<%bElr=M|>awB!C2v z01`j~NB{{S0VIF~kN^@u0%wpw+$ZG61wx+9L-X+afA#GPDbT*_A?#WYX3<|5%{#qd0hXl$)+({)kQ+T$=6?L*y5 zmeOSv+ksi8nV>GJ!L*dN?LB;Qr6ptxDJS!2Zb!O`;aW>=8i`cJO6?>zSbZ}!TE9oT ze1GKndv2`W2_^1Bdd0PctJTnsU;iXNbqAX`57$<>l0A{yU(w!(Z9Fw^n64Tpbm`KS zQ-@pM)n)HC?Ve<$cPk$tGww6Qg&D!!&t}4G5E1Oe6>zI6lA1Q%$P()iL=!c3AOz0O z5br$>aAz^$=Db`J&dZ+;%S2`wD-Yc5MLthdHsDSy$zx;ab~#{}3(O?da!4E1b$4b9 p#SP7dN@_b%GPuEJCibdb_)0~7jEHV2n@JJpFid&65$y>uk-rS)BC`Mh literal 0 HcmV?d00001 diff --git a/openapi_python_client/resolver/.schema_resolver.py.swp b/openapi_python_client/resolver/.schema_resolver.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..b10291ff1c3357ff534fca995394c8dccffac1e0 GIT binary patch literal 16384 zcmeHNO^g&p6fP0}K#<5mFDAtTnb~BvcafOjBuj!W7!1N@`SGxjrgyq#x230hsIFOd zvRRGsq!{nw-M@=^(1XT{5i~K;i(E{+XcPm+7%v{Y_`R;`pZQ}+^kAeq`L?&JUcGwt z)vJ0{HN7x(>hLpspEJeqY-8;5caNO?UFsSOzQumI2Fv zWxz6E8L$jk2L6K#c=(-vJM4Oc4*dB2KRf^b{s3b?0#|`6z-6EdoCRE9AMg-xV=H4{ z09Szbf%kwF;56{t7RG)8J_23>P61=U4&cxG8T$-)8CV78fQ`Ukn;H8UxC98`3E%** z8~A<`W8VN5fmeVj;KqH7T?Y;VGr(rxmwO=xd<}dGyasr{4DclIFz^*FGCl+Z-~)$% zt9LW@E${(w6u5R5V?O||18YDXV89<68G9R$zZ-Q%X9460M{8V)G=hoNw(Kr(N@0mKwt&~D_WW;AYhb-2 zb!x5RfQpulLO^#M_LC*K@gNRq2gFI0TnN-s zwK6$bsqv{Yb4{`<)d7plP-@apx6TtKQ_ha3-?k7B>$8J8)wZXW#%r8jjuf8nE_9W^ zuol*>Pfs%lMXI5Xq*S+7ln%b`g67qwN-jy5p{IZAe3s0iu^-wGqS=hJ=5-h;-a>(RD(g1}G%p=t z&+ztg2kMz#`2gq{KG45GSI!K%rB*O;@cmr24h?!YDNB3Qbb@p`mZY(&4VXjnv@i!z zM+_7~5w$>(r{B2V*+^Pa$!T&x?P5;S4g*!K?5vE>Po3$}tz>tIDMQUD`Vz(sFYS9d z^ihCBi)5j8nvrpiB0)w#I#dAXAouQv6|OPa^#1p*P*gF z-=WY?BFHY>j4$9LO1<1>BE`BpB)rb)so-m&Y8us&G~f(cl{H^@`g+spmeiHZ?~A{s z>U~q9*IA7jCL$N+4bF(s8VA<+lG5JE0LSSoE4kdz0fO=Kg`W;M{P4j>rPa0Exl4wZ zT_-Ikr!zihB5$8RK-CF}rJ15|?|>JREpLhADfN$-P8~u(3lcq%HVwJ72XkYo1EQm4 zZ=y}B$tvW6gJV@Pf(C$MDC+j*Qq;%RFINNO`$c*ibP%QEu%kq^@Dq$^X`5T>PEGG# zO52gI&q28g^c%XQ)b6CaN$C}pw(Z6+8XL@>teiTd z6`&fOPK>QMD%H{zp_Y3Uf`D1fYnqN)agf5nNnALAh8Z&o%&sm=f=slUrP7mAInjuv zV08Zf6=(HbILp)dAIISIxsLQ(z?;B}fDh~jNNy)!AIpGcz%pPNunbrRECZGS%YbFT zGGH074E*mHpzD(Iqkn}>?;P|E&&V5MT&NWHAf$pWmywzM@LJr$iLm!BO8?{<21$Oy QT(*Gz{|8o3b$#vjH;XGpFaQ7m literal 0 HcmV?d00001 diff --git a/openapi_python_client/resolver/data_loader.py b/openapi_python_client/resolver/data_loader.py index ab899d4b3..aaa0fd26b 100644 --- a/openapi_python_client/resolver/data_loader.py +++ b/openapi_python_client/resolver/data_loader.py @@ -1,13 +1,14 @@ import yaml + from .resolver_types import SchemaData -class DataLoader: +class DataLoader: @classmethod def load(cls, path: str, data: bytes) -> SchemaData: - data_type = path.split('.')[-1] + data_type = path.split(".")[-1].casefold() - if data_type == 'json': + if data_type == "json": return cls.load_json(data) else: return cls.load_yaml(data) @@ -19,4 +20,3 @@ def load_json(cls, data: bytes) -> SchemaData: @classmethod def load_yaml(cls, data: bytes) -> SchemaData: return yaml.safe_load(data) - diff --git a/openapi_python_client/resolver/reference.py b/openapi_python_client/resolver/reference.py index 3d387ec98..b38b82160 100644 --- a/openapi_python_client/resolver/reference.py +++ b/openapi_python_client/resolver/reference.py @@ -1,22 +1,43 @@ -class Reference: +import urllib +from typing import Union + - def __init__(self, reference): +class Reference: + def __init__(self, reference: str): self._ref = reference + @property + def remote_relative_path(self) -> Union[str, None]: + if self.is_remote_ref(): + return self._ref.split("#")[0] + return None + + @property + def path_parent(self) -> str: + path = self.path + parts = path.split("/") + parts.pop() + return "/".join(parts) + + @property + def path(self) -> str: + d = self._ref.split("#")[-1] + d = urllib.parse.unquote(d) + d = d.replace("~1", "/") + return d + @property def value(self) -> str: return self._ref - def is_relative_reference(self): + def is_relative_reference(self) -> bool: return self.is_remote_ref() and not self.is_url_reference() - def is_url_reference(self): - return self.is_remote_ref() and (self._ref.startswith('//', 0) or self._ref.startswith('http', 0)) + def is_url_reference(self) -> bool: + return self.is_remote_ref() and (self._ref.startswith("//", 0) or self._ref.startswith("http", 0)) - def is_remote_ref(self): + def is_remote_ref(self) -> bool: return not self.is_local_ref() - - def is_local_ref(self): - return self._ref.startswith('#', 0) - + def is_local_ref(self) -> bool: + return self._ref.startswith("#", 0) diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py index 19eb47d2b..35c41facb 100644 --- a/openapi_python_client/resolver/resolved_schema.py +++ b/openapi_python_client/resolver/resolved_schema.py @@ -1,25 +1,196 @@ -from typing import Any, Dict, Optional, Sequence, Union +import hashlib +from typing import Any, Dict, Generator, List, Tuple, Union, cast + +from .reference import Reference from .resolver_types import SchemaData class ResolvedSchema: - - def __init__(self, root, refs, errors): + def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[str]): self._root: SchemaData = root self._refs: Dict[str, SchemaData] = refs - self._errors: Sequense[str] = errors + self._errors: List[str] = errors + self._resolved_remotes_components: SchemaData = cast(SchemaData, {}) - self._resolved_schema: SchemaData = self._root - self._process() + self._resolved_schema: SchemaData = cast(SchemaData, {}) + if len(self._errors) == 0: + self._process() @property def schema(self) -> SchemaData: - return self._resolved_schema + return self._root @property - def errors(self) -> Sequence[str]: + def errors(self) -> List[str]: return self._errors.copy() - def _process(self): - pass + def _process(self) -> None: + self._process_remote_paths() + self._process_remote_components(self._root) + self._root.update(self._resolved_remotes_components) + + def _process_remote_paths(self) -> None: + refs_to_replace = [] + for owner, ref_key, ref_val in self._lookup_schema_references_in(self._root, "paths"): + ref = Reference(ref_val) + + if ref.is_local_ref(): + continue + + remote_path = ref.remote_relative_path + path = ref.path + + if remote_path not in self._refs: + self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) + else: + remote_schema = self._refs[remote_path] + remote_value = self._lookup_dict(remote_schema, path) + if not remote_value: + self._errors.append("Failed to read remote value {}, in remote ref {}".format(path, remote_path)) + else: + refs_to_replace.append((owner, remote_schema, remote_value)) + + for owner, remote_schema, remote_value in refs_to_replace: + self._process_remote_components(remote_schema, remote_value, 1) + self._replace_reference_with(owner, remote_value) + + def _process_remote_components( + self, owner: SchemaData, subpart: Union[SchemaData, None] = None, depth: int = 0 + ) -> None: + target = subpart if subpart else owner + + for parent, ref_key, ref_val in self._lookup_schema_references(target): + ref = Reference(ref_val) + + if ref.is_local_ref(): + # print('Found local reference >> {0}'.format(ref.value)) + if depth > 0: + self._transform_to_local_components(owner, ref) + else: + remote_path = ref.remote_relative_path + if remote_path not in self._refs: + self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) + else: + remote_owner = self._refs[remote_path] + self._transform_to_local_components(remote_owner, ref) + self._transform_to_local_ref(parent, ref) + + def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> None: + self._ensure_components_dir_exists(ref) + + # print('Processing remote component > {0}'.format(ref.value)) + remote_component = self._lookup_dict(owner, ref.path) + root_components_dir = self._lookup_dict(self._resolved_remotes_components, ref.path_parent) + component_name = ref.path.split("/")[-1] + + if component_name == "SorTransparentContainer" or component_name == "sorTransparentContainer": + print(ref.value) + + if remote_component is None: + print("Weirdy relookup of >> {0}".format(ref.value)) + assert ref.is_local_ref() and self._lookup_dict(self._resolved_remotes_components, ref.path) + return + + if "$ref" in remote_component: + subref = Reference(remote_component["$ref"]) + if not subref.is_local_ref(): + print("Lookup remote ref >>> {0}".format(subref.value)) + return self._process_remote_components(remote_component) + + if root_components_dir: + if component_name in root_components_dir: + local_component_hash = self._reference_schema_hash(root_components_dir[component_name]) + remote_component_hash = self._reference_schema_hash(remote_component) + + if local_component_hash == remote_component_hash: + return + else: + pass + # print('=' * 120) + # print('TODO: Find compoment collision to handle on >>> {0}'.format(ref.path)) + # print('Local componente {0} >> {1}'.format(local_component_hash, root_components_dir[component_name])) + # print('') + # print('Remote componente {0} >> {1}'.format(remote_component_hash, remote_component)) + # print('=' * 120) + else: + root_components_dir[component_name] = remote_component + self._process_remote_components(owner, remote_component, 2) + + def _ensure_components_dir_exists(self, ref: Reference) -> None: + cursor = self._resolved_remotes_components + for key in ref.path_parent.split("/"): + if key == "": + continue + + if key not in cursor: + cursor[key] = {} + + cursor = cursor[key] + + def _transform_to_local_ref(self, owner: Dict[str, Any], ref: Reference) -> None: + owner["$ref"] = "#{0}".format(ref.path) + + def _lookup_dict(self, attr: SchemaData, query: str) -> Union[SchemaData, None]: + cursor = attr + query_parts = [] + + if query.startswith("/paths"): + query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] + else: + query_parts = query.split("/") + + for key in query_parts: + if key == "": + continue + + if isinstance(cursor, dict) and key in cursor: + cursor = cursor[key] + else: + return None + return cursor + + def _replace_reference_with(self, root: Dict[str, Any], new_value: Dict[str, Any]) -> None: + for key in new_value: + root[key] = new_value[key] + + root.pop("$ref") + + def _lookup_schema_references_in( + self, attr: SchemaData, path: str + ) -> Generator[Tuple[SchemaData, str, Any], None, None]: + if not isinstance(attr, dict) or path not in attr: + return + + yield from self._lookup_schema_references(attr[path]) + + def _lookup_schema_references(self, attr: Any) -> Generator[Tuple[SchemaData, str, str], None, None]: + if isinstance(attr, dict): + for key, val in attr.items(): + if key == "$ref": + yield cast(SchemaData, attr), cast(str, key), cast(str, val) + else: + yield from self._lookup_schema_references(val) + + elif isinstance(attr, list): + for val in attr: + yield from self._lookup_schema_references(val) + + def _reference_schema_hash(self, schema: Dict[str, Any]) -> str: + md5 = hashlib.md5() + hash_elms = [] + for key in schema.keys(): + if key == "description": + continue + + if key == "type": + hash_elms.append(schema[key]) + + if key == "allOf": + for item in schema[key]: + hash_elms.append(str(item)) + + hash_elms.append(key) + hash_elms.sort() + md5.update(";".join(hash_elms).encode("utf-8")) + return md5.hexdigest() diff --git a/openapi_python_client/resolver/resolver_types.py b/openapi_python_client/resolver/resolver_types.py index baf35099b..84f6cea5b 100644 --- a/openapi_python_client/resolver/resolver_types.py +++ b/openapi_python_client/resolver/resolver_types.py @@ -1,3 +1,3 @@ from typing import Any, Dict, NewType -SchemaData = NewType('SchemaData', Dict[str, Any]) +SchemaData = NewType("SchemaData", Dict[str, Any]) diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index 2ac040c52..653bb2dfd 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -1,26 +1,25 @@ -import httpcore -import httpx -import urllib import logging - -from typing import Any, Dict, Optional, Sequence, Union, Generator, NewType +import urllib from pathlib import Path +from typing import Any, Dict, Generator, List, Union -from .resolver_types import SchemaData +import httpx + +from .data_loader import DataLoader from .reference import Reference from .resolved_schema import ResolvedSchema -from .data_loader import DataLoader +from .resolver_types import SchemaData -class SchemaResolver: +class SchemaResolver: def __init__(self, url_or_path: Union[str, Path]): if not url_or_path: - raise ValueError('Invalid document root reference, it shall be an remote url or local file path') - - self._root_path: Optional[Path] = None - self._root_path_dir: Optional[Path] = None - self._root_url: Optional[str] = None - self._root_url_scheme: Optional[str] = None + raise ValueError("Invalid document root reference, it shall be an remote url or local file path") + + self._root_path: Union[Path, None] = None + self._root_path_dir: Union[Path, None] = None + self._root_url: Union[str, None] = None + self._root_url_scheme: Union[str, None] = None if isinstance(url_or_path, Path): self._root_path = url_or_path.absolute() @@ -28,28 +27,32 @@ def __init__(self, url_or_path: Union[str, Path]): else: self._root_url = url_or_path self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme - + def resolve(self, recursive: bool = True) -> ResolvedSchema: + assert self._root_path or self._root_url + root_schema: SchemaData external_schemas: Dict[str, SchemaData] = {} - errors: Sequence[str] = [] + errors: List[str] = [] if self._root_path: root_schema = self._fetch_remote_file_path(self._root_path) - else: + elif self._root_url: root_schema = self._fetch_url_reference(self._root_url) self._resolve_schema_references(root_schema, external_schemas, errors, recursive) return ResolvedSchema(root_schema, external_schemas, errors) - def _resolve_schema_references(self, root: SchemaData, external_schemas: Dict[str, SchemaData], errors: Sequence[str], recursive: bool) -> Sequence[SchemaData]: + def _resolve_schema_references( + self, root: SchemaData, external_schemas: Dict[str, SchemaData], errors: List[str], recursive: bool + ) -> None: for ref in self._lookup_schema_references(root): if ref.is_local_ref(): continue try: - path = ref.value.split('#')[0] + path = ref.value.split("#")[0] if path in external_schemas: continue @@ -61,33 +64,37 @@ def _resolve_schema_references(self, root: SchemaData, external_schemas: Dict[st if recursive: self._resolve_schema_references(external_schemas[path], external_schemas, errors, recursive) - except Exception as e: - errors.append('Failed to gather external reference data of {0}'.format(ref.value)) - logging.exception('Failed to gather external reference data of {0}'.format(ref.value)) - + except Exception: + errors.append("Failed to gather external reference data of {0}".format(ref.value)) + logging.exception("Failed to gather external reference data of {0}".format(ref.value)) + def _fetch_remote_reference(self, relative_path: str) -> SchemaData: - if self._root_path: + assert self._root_path_dir or self._root_url + + if self._root_path_dir: abs_path = self._root_path_dir.joinpath(relative_path) return self._fetch_remote_file_path(abs_path) - else: + elif self._root_url: abs_url = urllib.parse.urljoin(self._root_url, relative_path) return self._fetch_url_reference(abs_url) + else: + raise RuntimeError("Bad object initilalization") def _fetch_remote_file_path(self, path: Path) -> SchemaData: - logging.info('Fetching remote ref file path > {0}'.format(path)) + logging.info("Fetching remote ref file path > {0}".format(path)) return DataLoader.load(str(path), path.read_bytes()) def _fetch_url_reference(self, url: str) -> SchemaData: - if url.startswith('//', 0): - url = "{0}{1}".format(self._root_url_scheme, url) + if url.startswith("//", 0): + url = "{0}:{1}".format(self._root_url_scheme, url) - logging.info('Fetching remote ref url > {0}'.format(url)) + logging.info("Fetching remote ref url > {0}".format(url)) return DataLoader.load(url, httpx.get(url).content) def _lookup_schema_references(self, attr: Any) -> Generator[Reference, None, None]: if isinstance(attr, dict): for key, val in attr.items(): - if key == '$ref': + if key == "$ref": yield Reference(val) else: yield from self._lookup_schema_references(val) @@ -95,4 +102,3 @@ def _lookup_schema_references(self, attr: Any) -> Generator[Reference, None, Non elif isinstance(attr, list): for val in attr: yield from self._lookup_schema_references(val) - From c90005fc7e15ad9ab63dba3c00ad9e7a6cafd598 Mon Sep 17 00:00:00 2001 From: Nementon Date: Wed, 10 Feb 2021 20:02:42 +0100 Subject: [PATCH 04/17] correct tests breaking changes --- openapi_python_client/__init__.py | 5 +++++ .../resolver/schema_resolver.py | 18 +++++++++++++----- tests/test___init__.py | 17 ++++++++++------- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/openapi_python_client/__init__.py b/openapi_python_client/__init__.py index d9c35e97d..61353eade 100644 --- a/openapi_python_client/__init__.py +++ b/openapi_python_client/__init__.py @@ -3,10 +3,13 @@ import shutil import subprocess import sys +import urllib from enum import Enum from pathlib import Path from typing import Any, Dict, Optional, Sequence, Union, cast +import httpcore +import httpx from jinja2 import BaseLoader, ChoiceLoader, Environment, FileSystemLoader, PackageLoader from openapi_python_client import utils @@ -297,6 +300,8 @@ def _get_document(*, url: Optional[str], path: Optional[Path]) -> Union[Dict[str result = resolver.resolve() if len(result.errors) > 0: return GeneratorError(header="; ".join(result.errors)) + except (httpx.HTTPError, httpcore.NetworkError, urllib.error.URLError): + return GeneratorError(header="Could not get OpenAPI document from provided URL") except Exception: return GeneratorError(header="Invalid YAML from provided source") diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index 653bb2dfd..b720052f8 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -1,7 +1,7 @@ import logging import urllib from pathlib import Path -from typing import Any, Dict, Generator, List, Union +from typing import Any, Dict, Generator, List, Union, cast import httpx @@ -21,12 +21,20 @@ def __init__(self, url_or_path: Union[str, Path]): self._root_url: Union[str, None] = None self._root_url_scheme: Union[str, None] = None - if isinstance(url_or_path, Path): + if self._isapath(url_or_path): + url_or_path = cast(Path, url_or_path) self._root_path = url_or_path.absolute() self._root_path_dir = self._root_path.parent else: + url_or_path = cast(str, url_or_path) self._root_url = url_or_path - self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme + try: + self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme + except Exception: + raise urllib.error.URLError(f"Coult not parse URL > {url_or_path}") + + def _isapath(self, url_or_path: Union[str, Path]) -> bool: + return isinstance(url_or_path, Path) def resolve(self, recursive: bool = True) -> ResolvedSchema: assert self._root_path or self._root_url @@ -81,14 +89,14 @@ def _fetch_remote_reference(self, relative_path: str) -> SchemaData: raise RuntimeError("Bad object initilalization") def _fetch_remote_file_path(self, path: Path) -> SchemaData: - logging.info("Fetching remote ref file path > {0}".format(path)) + logging.info(f"Fetching remote ref file path > {path}") return DataLoader.load(str(path), path.read_bytes()) def _fetch_url_reference(self, url: str) -> SchemaData: if url.startswith("//", 0): url = "{0}:{1}".format(self._root_url_scheme, url) - logging.info("Fetching remote ref url > {0}".format(url)) + logging.info(f"Fetching remote ref url > {url}") return DataLoader.load(url, httpx.get(url).content) def _lookup_schema_references(self, attr: Any) -> Generator[Reference, None, None]: diff --git a/tests/test___init__.py b/tests/test___init__.py index d45108181..dfcf699ba 100644 --- a/tests/test___init__.py +++ b/tests/test___init__.py @@ -1,4 +1,5 @@ import pathlib +from urllib.parse import ParseResult import httpcore import jinja2 @@ -167,7 +168,7 @@ def test__get_document_url_and_path(self, mocker): loads.assert_not_called() def test__get_document_bad_url(self, mocker): - get = mocker.patch("httpx.get", side_effect=httpcore.NetworkError) + get = mocker.patch("httpx.get") Path = mocker.patch("openapi_python_client.Path") loads = mocker.patch("yaml.safe_load") @@ -177,7 +178,7 @@ def test__get_document_bad_url(self, mocker): result = _get_document(url=url, path=None) assert result == GeneratorError(header="Could not get OpenAPI document from provided URL") - get.assert_called_once_with(url) + get.assert_not_called() Path.assert_not_called() loads.assert_not_called() @@ -188,7 +189,7 @@ def test__get_document_url_no_path(self, mocker): from openapi_python_client import _get_document - url = mocker.MagicMock() + url = "http://localhost/" _get_document(url=url, path=None) get.assert_called_once_with(url) @@ -198,6 +199,7 @@ def test__get_document_url_no_path(self, mocker): def test__get_document_path_no_url(self, mocker): get = mocker.patch("httpx.get") loads = mocker.patch("yaml.safe_load") + mocker.patch("openapi_python_client.resolver.schema_resolver.SchemaResolver._isapath", return_value=True) from openapi_python_client import _get_document @@ -205,12 +207,13 @@ def test__get_document_path_no_url(self, mocker): _get_document(url=None, path=path) get.assert_not_called() - path.read_bytes.assert_called_once() - loads.assert_called_once_with(path.read_bytes()) + path.absolute().read_bytes.assert_called_once() + loads.assert_called_once_with(path.absolute().read_bytes()) def test__get_document_bad_yaml(self, mocker): get = mocker.patch("httpx.get") loads = mocker.patch("yaml.safe_load", side_effect=yaml.YAMLError) + mocker.patch("openapi_python_client.resolver.schema_resolver.SchemaResolver._isapath", return_value=True) from openapi_python_client import _get_document @@ -218,8 +221,8 @@ def test__get_document_bad_yaml(self, mocker): result = _get_document(url=None, path=path) get.assert_not_called() - path.read_bytes.assert_called_once() - loads.assert_called_once_with(path.read_bytes()) + path.absolute().read_bytes.assert_called_once() + loads.assert_called_once_with(path.absolute().read_bytes()) assert result == GeneratorError(header="Invalid YAML from provided source") From 8f404a3207abd671985e0fbaa7c281d286707830 Mon Sep 17 00:00:00 2001 From: Nementon Date: Thu, 11 Feb 2021 18:30:30 +0100 Subject: [PATCH 05/17] resolver / refactor (squash me) --- .../resolver/.resolved_schema.py.swp | Bin 12288 -> 0 bytes .../resolver/.schema_resolver.py.swp | Bin 16384 -> 0 bytes openapi_python_client/resolver/pointer.py | 48 +++++++++++++++ openapi_python_client/resolver/reference.py | 58 ++++++++++-------- .../resolver/resolved_schema.py | 24 +++++--- .../resolver/schema_resolver.py | 8 ++- 6 files changed, 101 insertions(+), 37 deletions(-) delete mode 100644 openapi_python_client/resolver/.resolved_schema.py.swp delete mode 100644 openapi_python_client/resolver/.schema_resolver.py.swp create mode 100644 openapi_python_client/resolver/pointer.py diff --git a/openapi_python_client/resolver/.resolved_schema.py.swp b/openapi_python_client/resolver/.resolved_schema.py.swp deleted file mode 100644 index 00668b71c19c357928e09fa02edd8dc8599e5a9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2O=}b}7{^n=n{7p}3aK92h1r)XN?lMa^rBX}bX^g&#OZ9NJ2;!9NwzQ-Jz6jN z1qACC@gjl;&sO{>o_vzbbl1XqFZ%@k%QvX)Rl;VUA2g-nB32o@wU;m}Wo{}< zT@`j9MLo$gC|_Awu(Hysxsmw+*Uj>!&rCAl!?>Y}sr7LK3Ct{kkxlDs^SzrlR{fv) z^=nJ?@}=I);=sX?01`j~NB{{S0VIF~kiaYwuxgIHfx*v}!(S<%bElr=M|>awB!C2v z01`j~NB{{S0VIF~kN^@u0%wpw+$ZG61wx+9L-X+afA#GPDbT*_A?#WYX3<|5%{#qd0hXl$)+({)kQ+T$=6?L*y5 zmeOSv+ksi8nV>GJ!L*dN?LB;Qr6ptxDJS!2Zb!O`;aW>=8i`cJO6?>zSbZ}!TE9oT ze1GKndv2`W2_^1Bdd0PctJTnsU;iXNbqAX`57$<>l0A{yU(w!(Z9Fw^n64Tpbm`KS zQ-@pM)n)HC?Ve<$cPk$tGww6Qg&D!!&t}4G5E1Oe6>zI6lA1Q%$P()iL=!c3AOz0O z5br$>aAz^$=Db`J&dZ+;%S2`wD-Yc5MLthdHsDSy$zx;ab~#{}3(O?da!4E1b$4b9 p#SP7dN@_b%GPuEJCibdb_)0~7jEHV2n@JJpFid&65$y>uk-rS)BC`Mh diff --git a/openapi_python_client/resolver/.schema_resolver.py.swp b/openapi_python_client/resolver/.schema_resolver.py.swp deleted file mode 100644 index b10291ff1c3357ff534fca995394c8dccffac1e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHNO^g&p6fP0}K#<5mFDAtTnb~BvcafOjBuj!W7!1N@`SGxjrgyq#x230hsIFOd zvRRGsq!{nw-M@=^(1XT{5i~K;i(E{+XcPm+7%v{Y_`R;`pZQ}+^kAeq`L?&JUcGwt z)vJ0{HN7x(>hLpspEJeqY-8;5caNO?UFsSOzQumI2Fv zWxz6E8L$jk2L6K#c=(-vJM4Oc4*dB2KRf^b{s3b?0#|`6z-6EdoCRE9AMg-xV=H4{ z09Szbf%kwF;56{t7RG)8J_23>P61=U4&cxG8T$-)8CV78fQ`Ukn;H8UxC98`3E%** z8~A<`W8VN5fmeVj;KqH7T?Y;VGr(rxmwO=xd<}dGyasr{4DclIFz^*FGCl+Z-~)$% zt9LW@E${(w6u5R5V?O||18YDXV89<68G9R$zZ-Q%X9460M{8V)G=hoNw(Kr(N@0mKwt&~D_WW;AYhb-2 zb!x5RfQpulLO^#M_LC*K@gNRq2gFI0TnN-s zwK6$bsqv{Yb4{`<)d7plP-@apx6TtKQ_ha3-?k7B>$8J8)wZXW#%r8jjuf8nE_9W^ zuol*>Pfs%lMXI5Xq*S+7ln%b`g67qwN-jy5p{IZAe3s0iu^-wGqS=hJ=5-h;-a>(RD(g1}G%p=t z&+ztg2kMz#`2gq{KG45GSI!K%rB*O;@cmr24h?!YDNB3Qbb@p`mZY(&4VXjnv@i!z zM+_7~5w$>(r{B2V*+^Pa$!T&x?P5;S4g*!K?5vE>Po3$}tz>tIDMQUD`Vz(sFYS9d z^ihCBi)5j8nvrpiB0)w#I#dAXAouQv6|OPa^#1p*P*gF z-=WY?BFHY>j4$9LO1<1>BE`BpB)rb)so-m&Y8us&G~f(cl{H^@`g+spmeiHZ?~A{s z>U~q9*IA7jCL$N+4bF(s8VA<+lG5JE0LSSoE4kdz0fO=Kg`W;M{P4j>rPa0Exl4wZ zT_-Ikr!zihB5$8RK-CF}rJ15|?|>JREpLhADfN$-P8~u(3lcq%HVwJ72XkYo1EQm4 zZ=y}B$tvW6gJV@Pf(C$MDC+j*Qq;%RFINNO`$c*ibP%QEu%kq^@Dq$^X`5T>PEGG# zO52gI&q28g^c%XQ)b6CaN$C}pw(Z6+8XL@>teiTd z6`&fOPK>QMD%H{zp_Y3Uf`D1fYnqN)agf5nNnALAh8Z&o%&sm=f=slUrP7mAInjuv zV08Zf6=(HbILp)dAIISIxsLQ(z?;B}fDh~jNNy)!AIpGcz%pPNunbrRECZGS%YbFT zGGH074E*mHpzD(Iqkn}>?;P|E&&V5MT&NWHAf$pWmywzM@LJr$iLm!BO8?{<21$Oy QT(*Gz{|8o3b$#vjH;XGpFaQ7m diff --git a/openapi_python_client/resolver/pointer.py b/openapi_python_client/resolver/pointer.py new file mode 100644 index 000000000..36874e294 --- /dev/null +++ b/openapi_python_client/resolver/pointer.py @@ -0,0 +1,48 @@ +import urllib.parse +from typing import List, Union + + +class Pointer: + """ https://tools.ietf.org/html/rfc6901 """ + + def __init__(self, pointer: str) -> None: + if pointer is None or pointer != "" and not pointer.startswith("/"): + raise ValueError(f'Invalid pointer value {pointer}, it must match: *( "/" reference-token )') + + self._pointer = pointer + + @property + def value(self) -> str: + return self._pointer + + @property + def parent(self) -> Union["Pointer", None]: + tokens = self.tokens(False) + + if len(tokens) > 1: + tokens.pop() + return Pointer("/".join(tokens)) + else: + assert tokens[-1] == "" + return None + + def tokens(self, unescape: bool = True) -> List[str]: + tokens = [] + + if unescape: + for token in self._pointer.split("/"): + tokens.append(self._unescape(token)) + else: + tokens = self._pointer.split("/") + + return tokens + + @property + def unescapated_value(self) -> str: + return self._unescape(self._pointer) + + def _unescape(self, data: str) -> str: + data = urllib.parse.unquote(data) + data = data.replace("~1", "/") + data = data.replace("~0", "~") + return data diff --git a/openapi_python_client/resolver/reference.py b/openapi_python_client/resolver/reference.py index b38b82160..534232bcd 100644 --- a/openapi_python_client/resolver/reference.py +++ b/openapi_python_client/resolver/reference.py @@ -1,43 +1,51 @@ -import urllib -from typing import Union +import urllib.parse + +from .pointer import Pointer class Reference: + """ https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03 """ + def __init__(self, reference: str): self._ref = reference + self._parsed_ref = urllib.parse.urlparse(reference) @property - def remote_relative_path(self) -> Union[str, None]: - if self.is_remote_ref(): - return self._ref.split("#")[0] - return None + def path(self) -> str: + return urllib.parse.urldefrag(self._parsed_ref.geturl()).url @property - def path_parent(self) -> str: - path = self.path - parts = path.split("/") - parts.pop() - return "/".join(parts) + def pointer(self) -> Pointer: + frag = self._parsed_ref.fragment + if self.is_url() and frag != "" and not frag.startswith("/"): + frag = f"/{frag}" - @property - def path(self) -> str: - d = self._ref.split("#")[-1] - d = urllib.parse.unquote(d) - d = d.replace("~1", "/") - return d + return Pointer(frag) + + def is_relative(self) -> bool: + """ return True if reference path is a relative path """ + return not self.is_absolute() + + def is_absolute(self) -> bool: + """ return True is reference path is an absolute path """ + return self._parsed_ref.netloc != "" @property def value(self) -> str: return self._ref - def is_relative_reference(self) -> bool: - return self.is_remote_ref() and not self.is_url_reference() + def is_url(self) -> bool: + """ return True if the reference path is pointing to an external url location """ + return self.is_remote() and self._parsed_ref.netloc != "" - def is_url_reference(self) -> bool: - return self.is_remote_ref() and (self._ref.startswith("//", 0) or self._ref.startswith("http", 0)) + def is_remote(self) -> bool: + """ return True if the reference pointer is pointing to a remote document """ + return not self.is_local() - def is_remote_ref(self) -> bool: - return not self.is_local_ref() + def is_local(self) -> bool: + """ return True if the reference pointer is pointing to the current document """ + return self._parsed_ref.path == "" - def is_local_ref(self) -> bool: - return self._ref.startswith("#", 0) + def is_full_document(self) -> bool: + """ return True if the reference pointer is pointing to the whole document content """ + return self.pointer.parent is None diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py index 35c41facb..7e528e694 100644 --- a/openapi_python_client/resolver/resolved_schema.py +++ b/openapi_python_client/resolver/resolved_schema.py @@ -34,10 +34,10 @@ def _process_remote_paths(self) -> None: for owner, ref_key, ref_val in self._lookup_schema_references_in(self._root, "paths"): ref = Reference(ref_val) - if ref.is_local_ref(): + if ref.is_local(): continue - remote_path = ref.remote_relative_path + remote_path = ref.pointer.value path = ref.path if remote_path not in self._refs: @@ -62,12 +62,12 @@ def _process_remote_components( for parent, ref_key, ref_val in self._lookup_schema_references(target): ref = Reference(ref_val) - if ref.is_local_ref(): + if ref.is_local(): # print('Found local reference >> {0}'.format(ref.value)) if depth > 0: self._transform_to_local_components(owner, ref) else: - remote_path = ref.remote_relative_path + remote_path = ref.pointer.value if remote_path not in self._refs: self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) else: @@ -80,20 +80,23 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N # print('Processing remote component > {0}'.format(ref.value)) remote_component = self._lookup_dict(owner, ref.path) - root_components_dir = self._lookup_dict(self._resolved_remotes_components, ref.path_parent) - component_name = ref.path.split("/")[-1] + pointer_parent = ref.pointer.parent + + if pointer_parent is not None: + root_components_dir = self._lookup_dict(self._resolved_remotes_components, pointer_parent.value) + component_name = ref.path.split("/")[-1] if component_name == "SorTransparentContainer" or component_name == "sorTransparentContainer": print(ref.value) if remote_component is None: print("Weirdy relookup of >> {0}".format(ref.value)) - assert ref.is_local_ref() and self._lookup_dict(self._resolved_remotes_components, ref.path) + assert ref.is_local() and self._lookup_dict(self._resolved_remotes_components, ref.path) return if "$ref" in remote_component: subref = Reference(remote_component["$ref"]) - if not subref.is_local_ref(): + if not subref.is_local(): print("Lookup remote ref >>> {0}".format(subref.value)) return self._process_remote_components(remote_component) @@ -118,7 +121,10 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N def _ensure_components_dir_exists(self, ref: Reference) -> None: cursor = self._resolved_remotes_components - for key in ref.path_parent.split("/"): + pointer_dir = ref.pointer.parent + assert pointer_dir is not None + + for key in pointer_dir.value.split("/"): # noqa if key == "": continue diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index b720052f8..8c9507463 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -30,7 +30,9 @@ def __init__(self, url_or_path: Union[str, Path]): self._root_url = url_or_path try: self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme - except Exception: + if self._root_url_scheme not in ["http", "https"]: + raise ValueError(f"Unsupported URL scheme '{self._root_url_scheme}', expecting http or https") + except (TypeError, AttributeError): raise urllib.error.URLError(f"Coult not parse URL > {url_or_path}") def _isapath(self, url_or_path: Union[str, Path]) -> bool: @@ -56,7 +58,7 @@ def _resolve_schema_references( ) -> None: for ref in self._lookup_schema_references(root): - if ref.is_local_ref(): + if ref.is_local(): continue try: @@ -64,7 +66,7 @@ def _resolve_schema_references( if path in external_schemas: continue - if ref.is_url_reference(): + if ref.is_url(): external_schemas[path] = self._fetch_url_reference(path) else: external_schemas[path] = self._fetch_remote_reference(path) From dde5fac18879a79c25550f1be612faf657b6fcb3 Mon Sep 17 00:00:00 2001 From: Nementon Date: Thu, 11 Feb 2021 18:31:01 +0100 Subject: [PATCH 06/17] resolver / add reference tests --- .../test_resolver/test_resolver_reference.py | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 tests/test_resolver/test_resolver_reference.py diff --git a/tests/test_resolver/test_resolver_reference.py b/tests/test_resolver/test_resolver_reference.py new file mode 100644 index 000000000..6782426f3 --- /dev/null +++ b/tests/test_resolver/test_resolver_reference.py @@ -0,0 +1,212 @@ +import pytest + + +def get_data_set(): + # https://swagger.io/docs/specification/using-ref/ + return { + "local_references": ["#/definitions/myElement"], + "remote_references": [ + "document.json#/myElement", + "../document.json#/myElement", + "../another-folder/document.json#/myElement", + ], + "url_references": [ + "http://path/to/your/resource", + "http://path/to/your/resource.json#myElement", + "//anotherserver.com/files/example.json", + ], + "relative_references": [ + "#/definitions/myElement", + "document.json#/myElement", + "../document.json#/myElement", + "../another-folder/document.json#/myElement", + ], + "absolute_references": [ + "http://path/to/your/resource", + "http://path/to/your/resource.json#myElement", + "//anotherserver.com/files/example.json", + ], + "full_document_references": [ + "http://path/to/your/resource", + "//anotherserver.com/files/example.json", + ], + "not_full_document_references": [ + "#/definitions/myElement", + "document.json#/myElement", + "../document.json#/myElement", + "../another-folder/document.json#/myElement", + "http://path/to/your/resource.json#myElement", + ], + "path_by_reference": { + "#/definitions/myElement": "", + "document.json#/myElement": "document.json", + "../document.json#/myElement": "../document.json", + "../another-folder/document.json#/myElement": "../another-folder/document.json", + "http://path/to/your/resource": "http://path/to/your/resource", + "http://path/to/your/resource.json#myElement": "http://path/to/your/resource.json", + "//anotherserver.com/files/example.json": "//anotherserver.com/files/example.json", + }, + "pointer_by_reference": { + "#/definitions/myElement": "/definitions/myElement", + "document.json#/myElement": "/myElement", + "../document.json#/myElement": "/myElement", + "../another-folder/document.json#/myElement": "/myElement", + "http://path/to/your/resource": "", + "http://path/to/your/resource.json#myElement": "/myElement", + "//anotherserver.com/files/example.json": "", + }, + "pointerparent_by_reference": { + "#/definitions/myElement": "/definitions", + "document.json#/myElement": "", + "../document.json#/myElement": "", + "../another-folder/document.json#/myElement": "", + "http://path/to/your/resource": None, + "http://path/to/your/resource.json#myElement": "", + "//anotherserver.com/files/example.json": None, + }, + } + + +def test_is_local(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["local_references"]: + ref = Reference(ref_str) + assert ref.is_local() == True + + for ref_str in data_set["remote_references"]: + ref = Reference(ref_str) + assert ref.is_local() == False + + for ref_str in data_set["url_references"]: + ref = Reference(ref_str) + assert ref.is_local() == False + + +def test_is_remote(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["local_references"]: + ref = Reference(ref_str) + assert ref.is_remote() == False + + for ref_str in data_set["remote_references"]: + ref = Reference(ref_str) + assert ref.is_remote() == True + + for ref_str in data_set["url_references"]: + ref = Reference(ref_str) + assert ref.is_remote() == True + + +def test_is_url(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["local_references"]: + ref = Reference(ref_str) + assert ref.is_url() == False + + for ref_str in data_set["remote_references"]: + ref = Reference(ref_str) + assert ref.is_url() == False + + for ref_str in data_set["url_references"]: + ref = Reference(ref_str) + assert ref.is_url() == True + + +def test_is_absolute(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["absolute_references"]: + ref = Reference(ref_str) + assert ref.is_absolute() == True + + for ref_str in data_set["relative_references"]: + ref = Reference(ref_str) + assert ref.is_absolute() == False + + +def test_is_relative(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["absolute_references"]: + ref = Reference(ref_str) + assert ref.is_relative() == False + + for ref_str in data_set["relative_references"]: + ref = Reference(ref_str) + assert ref.is_relative() == True + + +def test_pointer(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["pointer_by_reference"].keys(): + ref = Reference(ref_str) + pointer = data_set["pointer_by_reference"][ref_str] + assert ref.pointer.value == pointer + + +def test_pointer_parent(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["pointerparent_by_reference"].keys(): + ref = Reference(ref_str) + pointer_parent = data_set["pointerparent_by_reference"][ref_str] + + if pointer_parent is not None: + assert ref.pointer.parent.value == pointer_parent + else: + assert ref.pointer.parent == None + + +def test_path(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["path_by_reference"].keys(): + ref = Reference(ref_str) + path = data_set["path_by_reference"][ref_str] + assert ref.path == path + + +def test_is_full_document(): + from openapi_python_client.resolver.reference import Reference + + data_set = get_data_set() + + for ref_str in data_set["full_document_references"]: + ref = Reference(ref_str) + assert ref.is_full_document() == True + assert ref.pointer.parent == None + + for ref_str in data_set["not_full_document_references"]: + ref = Reference(ref_str) + assert ref.is_full_document() == False + assert ref.pointer.parent != None + + +def test_value(): + from openapi_python_client.resolver.reference import Reference + + ref = Reference("fooBaR") + assert ref.value == "fooBaR" + + ref = Reference("FooBAR") + assert ref.value == "FooBAR" From 60fcdb1d44191cf636bd5dc53b013d69a6cbac8d Mon Sep 17 00:00:00 2001 From: Nementon Date: Thu, 11 Feb 2021 18:31:21 +0100 Subject: [PATCH 07/17] resolver / add data_loader tests --- .../test_resolver_data_loader.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/test_resolver/test_resolver_data_loader.py diff --git a/tests/test_resolver/test_resolver_data_loader.py b/tests/test_resolver/test_resolver_data_loader.py new file mode 100644 index 000000000..ed20dd95f --- /dev/null +++ b/tests/test_resolver/test_resolver_data_loader.py @@ -0,0 +1,50 @@ +import pytest + + +def test_load(mocker): + from openapi_python_client.resolver.data_loader import DataLoader + + dl_load_json = mocker.patch("openapi_python_client.resolver.data_loader.DataLoader.load_json") + dl_load_yaml = mocker.patch("openapi_python_client.resolver.data_loader.DataLoader.load_yaml") + + content = mocker.MagicMock() + DataLoader.load("foobar.json", content) + dl_load_json.assert_called_once_with(content) + + content = mocker.MagicMock() + DataLoader.load("foobar.jSoN", content) + dl_load_json.assert_called_with(content) + + content = mocker.MagicMock() + DataLoader.load("foobar.yaml", content) + dl_load_yaml.assert_called_once_with(content) + + content = mocker.MagicMock() + DataLoader.load("foobar.yAmL", content) + dl_load_yaml.assert_called_with(content) + + content = mocker.MagicMock() + DataLoader.load("foobar.ymL", content) + dl_load_yaml.assert_called_with(content) + + content = mocker.MagicMock() + DataLoader.load("foobar", content) + dl_load_yaml.assert_called_with(content) + + +def test_load_yaml(mocker): + from openapi_python_client.resolver.data_loader import DataLoader + + yaml_safeload = mocker.patch("yaml.safe_load") + + content = mocker.MagicMock() + DataLoader.load_yaml(content) + yaml_safeload.assert_called_once_with(content) + + +def test_load_json(mocker): + from openapi_python_client.resolver.data_loader import DataLoader + + content = mocker.MagicMock() + with pytest.raises(NotImplementedError): + DataLoader.load_json(content) From 3d6367fd4d1fe471a80447aea02a8bf6825f6f88 Mon Sep 17 00:00:00 2001 From: Nementon Date: Thu, 11 Feb 2021 18:31:50 +0100 Subject: [PATCH 08/17] resolver / add schema_resolver tests (wip) --- .../test_resolver_schema_resolver.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/test_resolver/test_resolver_schema_resolver.py diff --git a/tests/test_resolver/test_resolver_schema_resolver.py b/tests/test_resolver/test_resolver_schema_resolver.py new file mode 100644 index 000000000..3be3267dd --- /dev/null +++ b/tests/test_resolver/test_resolver_schema_resolver.py @@ -0,0 +1,22 @@ +import urllib + +import pytest + + +def test___init__invalid_data(mocker): + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + with pytest.raises(ValueError): + SchemaResolver(None) + + invalid_url = "foobar" + with pytest.raises(ValueError): + SchemaResolver(invalid_url) + + invalid_url = 42 + with pytest.raises(urllib.error.URLError): + SchemaResolver(invalid_url) + + invalid_url = mocker.Mock() + with pytest.raises(urllib.error.URLError): + SchemaResolver(invalid_url) From 33276c98e7abb556c714d9108cb6d98e93962a99 Mon Sep 17 00:00:00 2001 From: Nementon Date: Fri, 12 Feb 2021 18:38:51 +0100 Subject: [PATCH 09/17] resovler / add pointer tests --- tests/test_resolver/test_resolver_pointer.py | 97 ++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/test_resolver/test_resolver_pointer.py diff --git a/tests/test_resolver/test_resolver_pointer.py b/tests/test_resolver/test_resolver_pointer.py new file mode 100644 index 000000000..92e1ded35 --- /dev/null +++ b/tests/test_resolver/test_resolver_pointer.py @@ -0,0 +1,97 @@ +import pytest + + +def get_data_set(): + # https://tools.ietf.org/html/rfc6901 + return { + "valid_pointers": [ + "/myElement", + "/definitions/myElement", + "", + "/foo", + "/foo/0", + "/", + "/a~1b", + "/c%d", + "/e^f", + "/g|h", + "/i\\j" '/k"l', + "/ ", + "/m~0n", + "/m~01", + ], + "invalid_pointers": ["../foo", "foobar", None], + "tokens_by_pointer": { + "/myElement": ["", "myElement"], + "/definitions/myElement": ["", "definitions", "myElement"], + "": [""], + "/foo": ["", "foo"], + "/foo/0": ["", "foo", "0"], + "/": ["", ""], + "/a~1b": ["", "a/b"], + "/c%d": ["", "c%d"], + "/e^f": ["", "e^f"], + "/g|h": ["", "g|h"], + "/i\\j": ["", "i\\j"], + '/k"l': ["", 'k"l'], + "/ ": ["", " "], + "/m~0n": ["", "m~n"], + "/m~01": ["", "m~1"], + }, + } + + +def test___init__(): + from openapi_python_client.resolver.pointer import Pointer + + data_set = get_data_set() + + for pointer_str in data_set["valid_pointers"]: + p = Pointer(pointer_str) + assert p.value != None + assert p.value == pointer_str + + for pointer_str in data_set["invalid_pointers"]: + with pytest.raises(ValueError): + p = Pointer(pointer_str) + + +def test_token(): + from openapi_python_client.resolver.pointer import Pointer + + data_set = get_data_set() + + for pointer_str in data_set["tokens_by_pointer"].keys(): + p = Pointer(pointer_str) + expected_tokens = data_set["tokens_by_pointer"][pointer_str] + + for idx, token in enumerate(p.tokens()): + assert expected_tokens[idx] == token + + +def test_parent(): + from openapi_python_client.resolver.pointer import Pointer + + data_set = get_data_set() + + for pointer_str in data_set["tokens_by_pointer"].keys(): + p = Pointer(pointer_str) + expected_tokens = data_set["tokens_by_pointer"][pointer_str] + + while p.parent is not None: + p = p.parent + expected_tokens.pop() + assert p.tokens()[-1] == expected_tokens[-1] + assert len(p.tokens()) == len(expected_tokens) + + assert len(expected_tokens) == 1 + assert expected_tokens[-1] == "" + + +def test__unescape_and__escape(): + from openapi_python_client.resolver.pointer import Pointer + + escaped_unescaped_values = [("/m~0n", "/m~n"), ("/m~01", "/m~1"), ("/a~1b", "/a/b"), ("/foobar", "/foobar")] + + for escaped, unescaped in escaped_unescaped_values: + assert Pointer(escaped).unescapated_value == unescaped From ad9c7dab9bf4243840ff09ef58c763239a892d8c Mon Sep 17 00:00:00 2001 From: Nementon Date: Sun, 14 Feb 2021 21:54:39 +0100 Subject: [PATCH 10/17] resolver / refactor (squash me) --- .../resolver/schema_resolver.py | 75 +++++++++++++------ 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index 8c9507463..5a1c602e7 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -17,17 +17,18 @@ def __init__(self, url_or_path: Union[str, Path]): raise ValueError("Invalid document root reference, it shall be an remote url or local file path") self._root_path: Union[Path, None] = None - self._root_path_dir: Union[Path, None] = None self._root_url: Union[str, None] = None self._root_url_scheme: Union[str, None] = None + self._parent_path: str if self._isapath(url_or_path): url_or_path = cast(Path, url_or_path) self._root_path = url_or_path.absolute() - self._root_path_dir = self._root_path.parent + self._parent_path = str(self._root_path.parent) else: url_or_path = cast(str, url_or_path) self._root_url = url_or_path + self._parent_path = url_or_path try: self._root_url_scheme = urllib.parse.urlparse(url_or_path).scheme if self._root_url_scheme not in ["http", "https"]: @@ -44,17 +45,23 @@ def resolve(self, recursive: bool = True) -> ResolvedSchema: root_schema: SchemaData external_schemas: Dict[str, SchemaData] = {} errors: List[str] = [] + parent: str if self._root_path: root_schema = self._fetch_remote_file_path(self._root_path) elif self._root_url: root_schema = self._fetch_url_reference(self._root_url) - self._resolve_schema_references(root_schema, external_schemas, errors, recursive) + self._resolve_schema_references(self._parent_path, root_schema, external_schemas, errors, recursive) return ResolvedSchema(root_schema, external_schemas, errors) def _resolve_schema_references( - self, root: SchemaData, external_schemas: Dict[str, SchemaData], errors: List[str], recursive: bool + self, + parent: str, + root: SchemaData, + external_schemas: Dict[str, SchemaData], + errors: List[str], + recursive: bool, ) -> None: for ref in self._lookup_schema_references(root): @@ -62,33 +69,57 @@ def _resolve_schema_references( continue try: - path = ref.value.split("#")[0] + path = self._absolute_path(ref.path, parent) + parent = self._parent(path) + if path in external_schemas: continue - if ref.is_url(): - external_schemas[path] = self._fetch_url_reference(path) - else: - external_schemas[path] = self._fetch_remote_reference(path) + external_schemas[path] = self._fetch_remote_reference(path) if recursive: - self._resolve_schema_references(external_schemas[path], external_schemas, errors, recursive) + self._resolve_schema_references(parent, external_schemas[path], external_schemas, errors, recursive) except Exception: - errors.append("Failed to gather external reference data of {0}".format(ref.value)) - logging.exception("Failed to gather external reference data of {0}".format(ref.value)) + errors.append(f"Failed to gather external reference data of {ref.value} from {path}") + logging.exception(f"Failed to gather external reference data of {ref.value} from {path}") - def _fetch_remote_reference(self, relative_path: str) -> SchemaData: - assert self._root_path_dir or self._root_url + def _parent(self, abs_path: str) -> str: + if abs_path.startswith("http", 0): + return urllib.parse.urljoin(f"{abs_path}/", "..") + else: + path = Path(abs_path) + return str(path.parent) + + def _absolute_path(self, relative_path: str, parent: str) -> str: + if relative_path.startswith("http", 0): + return relative_path + + if relative_path.startswith("//"): + if parent.startswith("http"): + scheme = urllib.parse.urlparse(parent).scheme + return f"{scheme}:{relative_path}" + else: + scheme = self._root_url_scheme or "http" + return f"{scheme}:{relative_path}" + + if parent.startswith("http"): + return urllib.parse.urljoin(parent, relative_path) + else: + parent_dir = Path(parent) + abs_path = parent_dir.joinpath(relative_path) + abs_path = abs_path.resolve() + return str(abs_path) - if self._root_path_dir: - abs_path = self._root_path_dir.joinpath(relative_path) - return self._fetch_remote_file_path(abs_path) - elif self._root_url: - abs_url = urllib.parse.urljoin(self._root_url, relative_path) - return self._fetch_url_reference(abs_url) + def _fetch_remote_reference(self, abs_path: str) -> SchemaData: + res: SchemaData + + if abs_path.startswith("http"): + res = self._fetch_url_reference(abs_path) else: - raise RuntimeError("Bad object initilalization") + res = self._fetch_remote_file_path(Path(abs_path)) + + return res def _fetch_remote_file_path(self, path: Path) -> SchemaData: logging.info(f"Fetching remote ref file path > {path}") @@ -96,7 +127,7 @@ def _fetch_remote_file_path(self, path: Path) -> SchemaData: def _fetch_url_reference(self, url: str) -> SchemaData: if url.startswith("//", 0): - url = "{0}:{1}".format(self._root_url_scheme, url) + url = "{0}:{1}".format((self._root_url_scheme or "http"), url) logging.info(f"Fetching remote ref url > {url}") return DataLoader.load(url, httpx.get(url).content) From 1805b0dc49a44ef42ff9e5918f70608eb24a90ea Mon Sep 17 00:00:00 2001 From: Nementon Date: Sun, 14 Feb 2021 21:55:09 +0100 Subject: [PATCH 11/17] resolver / add schema_resolver tests (squash me) --- .../test_resolver_schema_resolver.py | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) diff --git a/tests/test_resolver/test_resolver_schema_resolver.py b/tests/test_resolver/test_resolver_schema_resolver.py index 3be3267dd..36caa3d7e 100644 --- a/tests/test_resolver/test_resolver_schema_resolver.py +++ b/tests/test_resolver/test_resolver_schema_resolver.py @@ -1,4 +1,6 @@ +import pathlib import urllib +import urllib.parse import pytest @@ -20,3 +22,246 @@ def test___init__invalid_data(mocker): invalid_url = mocker.Mock() with pytest.raises(urllib.error.URLError): SchemaResolver(invalid_url) + + +def test__init_with_filepath(mocker): + mocker.patch("openapi_python_client.resolver.schema_resolver.SchemaResolver._isapath", return_value=True) + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", return_value={}) + path = mocker.MagicMock() + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + resolver = SchemaResolver(path) + resolver.resolve() + + path.absolute().read_bytes.assert_called_once() + + +def test__init_with_url(mocker): + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", return_value={}) + url_parse = mocker.patch( + "urllib.parse.urlparse", + return_value=urllib.parse.ParseResult( + scheme="http", netloc="foobar.io", path="foo", params="", query="", fragment="/bar" + ), + ) + get = mocker.patch("httpx.get") + + url = mocker.MagicMock() + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + resolver = SchemaResolver(url) + resolver.resolve() + + url_parse.assert_called_once_with(url) + get.assert_called_once() + + +def test__resolve_schema_references_with_path(mocker): + read_bytes = mocker.patch("pathlib.Path.read_bytes") + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + path = pathlib.Path("/foo/bar/foobar") + path_parent = str(path.parent) + schema = {"foo": {"$ref": "foobar#/foobar"}} + external_schemas = {} + errors = [] + + def _datalaod_mocked_result(path, data): + if path == "/foo/bar/foobar": + return {"foobar": "bar", "bar": {"$ref": "bar#/foobar"}, "local": {"$ref": "#/toto"}} + + if path == "/foo/bar/bar": + return {"foobar": "bar", "bar": {"$ref": "../bar#/foobar"}} + + if path == "/foo/bar": + return {"foobar": "bar/bar", "bar": {"$ref": "/barfoo.io/foobar#foobar"}} + + if path == "/barfoo.io/foobar": + return {"foobar": "barfoo.io/foobar", "bar": {"$ref": "./bar#foobar"}} + + if path == "/barfoo.io/bar": + return {"foobar": "barfoo.io/bar", "bar": {"$ref": "/bar.foo/foobar"}} + + if path == "/bar.foo/foobar": + return {"foobar": "bar.foo/foobar", "bar": {"$ref": "/foo.bar/foobar"}} + + if path == "/foo.bar/foobar": + return {"foobar": "foo.bar/foobar", "bar": {"$ref": "/foo/bar/foobar"}} # Loop to first path + + raise ValueError(f"Unexpected path {path}") + + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", _datalaod_mocked_result) + resolver = SchemaResolver(path) + resolver._resolve_schema_references(path_parent, schema, external_schemas, errors, True) + + assert len(errors) == 0 + assert "/foo/bar/foobar" in external_schemas + assert "/foo/bar/bar" in external_schemas + assert "/foo/bar" in external_schemas + assert "/barfoo.io/foobar" in external_schemas + assert "/barfoo.io/bar" in external_schemas + assert "/bar.foo/foobar" in external_schemas + assert "/foo.bar/foobar" in external_schemas + + +def test__resolve_schema_references_with_url(mocker): + get = mocker.patch("httpx.get") + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + url = "http://foobar.io/foo/bar/foobar" + url_parent = "http://foobar.io/foo/bar/" + schema = {"foo": {"$ref": "foobar#/foobar"}} + external_schemas = {} + errors = [] + + def _datalaod_mocked_result(url, data): + if url == "http://foobar.io/foo/bar/foobar": + return {"foobar": "bar", "bar": {"$ref": "bar#/foobar"}, "local": {"$ref": "#/toto"}} + + if url == "http://foobar.io/foo/bar/bar": + return {"foobar": "bar", "bar": {"$ref": "../bar#/foobar"}} + + if url == "http://foobar.io/foo/bar": + return {"foobar": "bar/bar", "bar": {"$ref": "//barfoo.io/foobar#foobar"}} + + if url == "http://barfoo.io/foobar": + return {"foobar": "barfoo.io/foobar", "bar": {"$ref": "./bar#foobar"}} + + if url == "http://barfoo.io/bar": + return {"foobar": "barfoo.io/bar", "bar": {"$ref": "https://bar.foo/foobar"}} + + if url == "https://bar.foo/foobar": + return {"foobar": "bar.foo/foobar", "bar": {"$ref": "//foo.bar/foobar"}} + + if url == "https://foo.bar/foobar": + return {"foobar": "foo.bar/foobar", "bar": {"$ref": "http://foobar.io/foo/bar/foobar"}} # Loop to first uri + + raise ValueError(f"Unexpected url {url}") + + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", _datalaod_mocked_result) + + resolver = SchemaResolver(url) + resolver._resolve_schema_references(url_parent, schema, external_schemas, errors, True) + + assert len(errors) == 0 + assert "http://foobar.io/foo/bar/bar" in external_schemas + assert "http://foobar.io/foo/bar" in external_schemas + assert "http://barfoo.io/foobar" in external_schemas + assert "http://barfoo.io/foobar" in external_schemas + assert "http://barfoo.io/bar" in external_schemas + assert "https://bar.foo/foobar" in external_schemas + assert "https://foo.bar/foobar" in external_schemas + + +def test__resolve_schema_references_mix_path_and_url(mocker): + read_bytes = mocker.patch("pathlib.Path.read_bytes") + get = mocker.patch("httpx.get") + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + path = pathlib.Path("/foo/bar/foobar") + path_parent = str(path.parent) + schema = {"foo": {"$ref": "foobar#/foobar"}} + external_schemas = {} + errors = [] + + def _datalaod_mocked_result(path, data): + if path == "/foo/bar/foobar": + return {"foobar": "bar", "bar": {"$ref": "bar#/foobar"}, "local": {"$ref": "#/toto"}} + + if path == "/foo/bar/bar": + return {"foobar": "bar", "bar": {"$ref": "../bar#/foobar"}} + + if path == "/foo/bar": + return {"foobar": "bar/bar", "bar": {"$ref": "//barfoo.io/foobar#foobar"}} + + if path == "http://barfoo.io/foobar": + return {"foobar": "barfoo.io/foobar", "bar": {"$ref": "./bar#foobar"}} + + if path == "http://barfoo.io/bar": + return {"foobar": "barfoo.io/bar", "bar": {"$ref": "https://bar.foo/foobar"}} + + if path == "https://bar.foo/foobar": + return {"foobar": "bar.foo/foobar", "bar": {"$ref": "//foo.bar/foobar"}} + + if path == "https://foo.bar/foobar": + return {"foobar": "foo.bar/foobar"} + + raise ValueError(f"Unexpected path {path}") + + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", _datalaod_mocked_result) + resolver = SchemaResolver(path) + resolver._resolve_schema_references(path_parent, schema, external_schemas, errors, True) + + assert len(errors) == 0 + assert "/foo/bar/foobar" in external_schemas + assert "/foo/bar/bar" in external_schemas + assert "/foo/bar" in external_schemas + assert "http://barfoo.io/foobar" in external_schemas + assert "http://barfoo.io/bar" in external_schemas + assert "https://bar.foo/foobar" in external_schemas + assert "https://foo.bar/foobar" in external_schemas + + +def test__resolve_schema_references_with_error(mocker): + get = mocker.patch("httpx.get") + + import httpcore + + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + url = "http://foobar.io/foo/bar/foobar" + url_parent = "http://foobar.io/foo/bar/" + schema = {"foo": {"$ref": "foobar#/foobar"}} + external_schemas = {} + errors = [] + + def _datalaod_mocked_result(url, data): + if url == "http://foobar.io/foo/bar/foobar": + return { + "foobar": "bar", + "bar": {"$ref": "bar#/foobar"}, + "barfoor": {"$ref": "barfoo#foobar"}, + "local": {"$ref": "#/toto"}, + } + + if url == "http://foobar.io/foo/bar/bar": + raise httpcore.NetworkError("mocked error") + + if url == "http://foobar.io/foo/bar/barfoo": + return {"foobar": "foo/bar/barfoo", "bar": {"$ref": "//barfoo.io/foobar#foobar"}} + + if url == "http://barfoo.io/foobar": + return {"foobar": "foobar"} + + mocker.patch("openapi_python_client.resolver.schema_resolver.DataLoader.load", _datalaod_mocked_result) + resolver = SchemaResolver(url) + resolver._resolve_schema_references(url_parent, schema, external_schemas, errors, True) + + assert len(errors) == 1 + assert errors[0] == "Failed to gather external reference data of bar#/foobar from http://foobar.io/foo/bar/bar" + assert "http://foobar.io/foo/bar/bar" not in external_schemas + assert "http://foobar.io/foo/bar/foobar" in external_schemas + assert "http://foobar.io/foo/bar/barfoo" in external_schemas + assert "http://barfoo.io/foobar" in external_schemas + + +def test___lookup_schema_references(): + from openapi_python_client.resolver.schema_resolver import SchemaResolver + + data_set = { + "foo": {"$ref": "#/ref_1"}, + "bar": {"foobar": {"$ref": "#/ref_2"}}, + "foobar": [{"foo": {"$ref": "#/ref_3"}}, {"bar": [{"foobar": {"$ref": "#/ref_4"}}]}], + } + + resolver = SchemaResolver("http://foobar.io") + expected_references = sorted([f"#/ref_{x}" for x in range(1, 5)]) + references = sorted([x.value for x in resolver._lookup_schema_references(data_set)]) + + for idx, ref in enumerate(references): + assert expected_references[idx] == ref From bb5e10f4a53fd4f19a80b69223de866d8fecf57d Mon Sep 17 00:00:00 2001 From: avy Date: Mon, 8 Mar 2021 16:32:27 +0100 Subject: [PATCH 12/17] fix absolute paths feature bugs --- openapi_python_client/resolver/reference.py | 19 +++- .../resolver/resolved_schema.py | 46 +++++---- .../resolver/schema_resolver.py | 2 +- .../test_resolver_resolved_schema.py | 96 +++++++++++++++++++ 4 files changed, 141 insertions(+), 22 deletions(-) create mode 100644 tests/test_resolver/test_resolver_resolved_schema.py diff --git a/openapi_python_client/resolver/reference.py b/openapi_python_client/resolver/reference.py index 534232bcd..dbd5bd007 100644 --- a/openapi_python_client/resolver/reference.py +++ b/openapi_python_client/resolver/reference.py @@ -1,4 +1,6 @@ import urllib.parse +from pathlib import Path +from typing import Union from .pointer import Pointer @@ -6,14 +8,29 @@ class Reference: """ https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03 """ - def __init__(self, reference: str): + def __init__(self, reference: str, parent: str = None): self._ref = reference self._parsed_ref = urllib.parse.urlparse(reference) + self._parent = parent @property def path(self) -> str: return urllib.parse.urldefrag(self._parsed_ref.geturl()).url + @property + def abs_path(self) -> str: + if self._parent: + parent_dir = Path(self._parent) + abs_path = parent_dir.joinpath(self.path) + abs_path = abs_path.resolve() + return str(abs_path) + else: + return self.path + + @property + def parent(self) -> Union[str, None]: + return self._parent + @property def pointer(self) -> Pointer: frag = self._parsed_ref.fragment diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py index 7e528e694..6cd37415b 100644 --- a/openapi_python_client/resolver/resolved_schema.py +++ b/openapi_python_client/resolver/resolved_schema.py @@ -6,11 +6,12 @@ class ResolvedSchema: - def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[str]): + def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[str], parent: str): self._root: SchemaData = root self._refs: Dict[str, SchemaData] = refs self._errors: List[str] = errors self._resolved_remotes_components: SchemaData = cast(SchemaData, {}) + self._parent = parent self._resolved_schema: SchemaData = cast(SchemaData, {}) if len(self._errors) == 0: @@ -24,21 +25,29 @@ def schema(self) -> SchemaData: def errors(self) -> List[str]: return self._errors.copy() + def _dict_deep_update(self, d: Dict[str, Any], u: Dict[str, Any]) -> Dict[str, Any]: + for k, v in u.items(): + if isinstance(v, Dict): + d[k] = self._dict_deep_update(d.get(k, {}), v) + else: + d[k] = v + return d + def _process(self) -> None: self._process_remote_paths() - self._process_remote_components(self._root) - self._root.update(self._resolved_remotes_components) + self._process_remote_components(self._root, parent_path=self._parent) + self._dict_deep_update(self._root, self._resolved_remotes_components) def _process_remote_paths(self) -> None: refs_to_replace = [] for owner, ref_key, ref_val in self._lookup_schema_references_in(self._root, "paths"): - ref = Reference(ref_val) + ref = Reference(ref_val, self._parent) if ref.is_local(): continue - remote_path = ref.pointer.value - path = ref.path + remote_path = ref.abs_path + path = ref.pointer.unescapated_value if remote_path not in self._refs: self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) @@ -51,23 +60,23 @@ def _process_remote_paths(self) -> None: refs_to_replace.append((owner, remote_schema, remote_value)) for owner, remote_schema, remote_value in refs_to_replace: - self._process_remote_components(remote_schema, remote_value, 1) + self._process_remote_components(remote_schema, remote_value, 1, self._parent) self._replace_reference_with(owner, remote_value) def _process_remote_components( - self, owner: SchemaData, subpart: Union[SchemaData, None] = None, depth: int = 0 + self, owner: SchemaData, subpart: Union[SchemaData, None] = None, depth: int = 0, parent_path: str = None ) -> None: target = subpart if subpart else owner for parent, ref_key, ref_val in self._lookup_schema_references(target): - ref = Reference(ref_val) + ref = Reference(ref_val, parent_path) if ref.is_local(): # print('Found local reference >> {0}'.format(ref.value)) if depth > 0: self._transform_to_local_components(owner, ref) else: - remote_path = ref.pointer.value + remote_path = ref.abs_path if remote_path not in self._refs: self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) else: @@ -79,15 +88,12 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N self._ensure_components_dir_exists(ref) # print('Processing remote component > {0}'.format(ref.value)) - remote_component = self._lookup_dict(owner, ref.path) + remote_component = self._lookup_dict(owner, ref.pointer.value) pointer_parent = ref.pointer.parent if pointer_parent is not None: root_components_dir = self._lookup_dict(self._resolved_remotes_components, pointer_parent.value) - component_name = ref.path.split("/")[-1] - - if component_name == "SorTransparentContainer" or component_name == "sorTransparentContainer": - print(ref.value) + component_name = ref.pointer.value.split("/")[-1] if remote_component is None: print("Weirdy relookup of >> {0}".format(ref.value)) @@ -95,12 +101,12 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N return if "$ref" in remote_component: - subref = Reference(remote_component["$ref"]) + subref = Reference(remote_component["$ref"], ref.parent) if not subref.is_local(): print("Lookup remote ref >>> {0}".format(subref.value)) - return self._process_remote_components(remote_component) + self._process_remote_components(remote_component, parent_path=ref.parent) - if root_components_dir: + if root_components_dir is not None: if component_name in root_components_dir: local_component_hash = self._reference_schema_hash(root_components_dir[component_name]) remote_component_hash = self._reference_schema_hash(remote_component) @@ -117,7 +123,7 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N # print('=' * 120) else: root_components_dir[component_name] = remote_component - self._process_remote_components(owner, remote_component, 2) + self._process_remote_components(owner, remote_component, 2, ref.parent) def _ensure_components_dir_exists(self, ref: Reference) -> None: cursor = self._resolved_remotes_components @@ -134,7 +140,7 @@ def _ensure_components_dir_exists(self, ref: Reference) -> None: cursor = cursor[key] def _transform_to_local_ref(self, owner: Dict[str, Any], ref: Reference) -> None: - owner["$ref"] = "#{0}".format(ref.path) + owner["$ref"] = "#{0}".format(ref.pointer.value) def _lookup_dict(self, attr: SchemaData, query: str) -> Union[SchemaData, None]: cursor = attr diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index 5a1c602e7..2d20952b1 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -53,7 +53,7 @@ def resolve(self, recursive: bool = True) -> ResolvedSchema: root_schema = self._fetch_url_reference(self._root_url) self._resolve_schema_references(self._parent_path, root_schema, external_schemas, errors, recursive) - return ResolvedSchema(root_schema, external_schemas, errors) + return ResolvedSchema(root_schema, external_schemas, errors, self._parent_path) def _resolve_schema_references( self, diff --git a/tests/test_resolver/test_resolver_resolved_schema.py b/tests/test_resolver/test_resolver_resolved_schema.py new file mode 100644 index 000000000..b31111d2d --- /dev/null +++ b/tests/test_resolver/test_resolver_resolved_schema.py @@ -0,0 +1,96 @@ +import pathlib +import urllib +import urllib.parse + +import pytest + + +def test__resolved_schema_with_resolved_external_references(): + + from openapi_python_client.resolver.resolved_schema import ResolvedSchema + + root_schema = {"foobar": {"$ref": "foobar.yaml#/foo"}} + external_schemas = {"/home/user/foobar.yaml": {"foo": {"description": "foobar_description"}}} + errors = [] + + resolved_schema = ResolvedSchema(root_schema, external_schemas, errors, "/home/user").schema + + assert len(errors) == 0 + assert "foo" in resolved_schema + assert "foobar" in resolved_schema + assert "$ref" in resolved_schema["foobar"] + assert "#/foo" in resolved_schema["foobar"]["$ref"] + assert "description" in resolved_schema["foo"] + assert "foobar_description" in resolved_schema["foo"]["description"] + + +def test__resolved_schema_with_absolute_paths(): + + from openapi_python_client.resolver.resolved_schema import ResolvedSchema + + root_schema = {"foobar": {"$ref": "foobar.yaml#/foo"}, "barfoo": {"$ref": "../barfoo.yaml#/bar"}} + + external_schemas = { + "/home/user/foobar.yaml": {"foo": {"description": "foobar_description"}}, + "/home/barfoo.yaml": {"bar": {"description": "barfoo_description"}}, + } + + errors = [] + + resolved_schema = ResolvedSchema(root_schema, external_schemas, errors, "/home/user").schema + + assert len(errors) == 0 + assert "foo" in resolved_schema + assert "bar" in resolved_schema + assert "foobar" in resolved_schema + assert "barfoo" in resolved_schema + assert "$ref" in resolved_schema["foobar"] + assert "#/foo" in resolved_schema["foobar"]["$ref"] + assert "$ref" in resolved_schema["barfoo"] + assert "#/bar" in resolved_schema["barfoo"]["$ref"] + assert "description" in resolved_schema["foo"] + assert "foobar_description" in resolved_schema["foo"]["description"] + assert "description" in resolved_schema["bar"] + assert "barfoo_description" in resolved_schema["bar"]["description"] + + +def test__resolved_schema_with_conflicts(): + + from openapi_python_client.resolver.resolved_schema import ResolvedSchema + + root_schema = { + "foobar": {"$ref": "first_instance.yaml#/foo"}, + "barfoo": {"$ref": "second_instance.yaml#/foo"}, + "foobarfoo": {"$ref": "second_instance.yaml#/foo"}, + "barfoobar": {"$ref": "first_instance.yaml#/foo"}, + } + + external_schemas = { + "/home/user/first_instance.yaml": {"foo": {"description": "foo_first_description"}}, + "/home/user/second_instance.yaml": {"foo": {"description": "foo_second_description"}}, + } + + current_result = { + "foobar": {"$ref": "#/foo"}, + "barfoo": {"$ref": "#/foo"}, + "foobarfoo": {"$ref": "#/foo"}, + "barfoobar": {"$ref": "#/foo"}, + "foo": {"description": "foo_first_description"}, + } + + desired_result = { + "foobar": {"$ref": "#/foo"}, + "barfoo": {"$ref": "#/foo2"}, + "foobarfoo": {"$ref": "#/foo2"}, + "barfoobar": {"$ref": "#/foo"}, + "foo": {"description": "foo_first_description"}, + "foo2": {"description": "foo_second_description"}, + } + + errors = [] + + resolved_schema = ResolvedSchema(root_schema, external_schemas, errors, "/home/user").schema + + print(resolved_schema) + assert len(errors) == 0 + assert resolved_schema == desired_result From ad9e2f81f610d98869d162c3d2dea2c46ef67ef1 Mon Sep 17 00:00:00 2001 From: avy Date: Fri, 12 Mar 2021 17:11:30 +0100 Subject: [PATCH 13/17] add collision_resolver --- .../resolver/collision_resolver.py | 156 ++++++++++++++++++ .../resolver/resolved_schema.py | 38 +---- .../resolver/schema_resolver.py | 2 + .../test_resolver_collision_resolver.py | 58 +++++++ .../test_resolver_resolved_schema.py | 42 ----- 5 files changed, 222 insertions(+), 74 deletions(-) create mode 100644 openapi_python_client/resolver/collision_resolver.py create mode 100644 tests/test_resolver/test_resolver_collision_resolver.py diff --git a/openapi_python_client/resolver/collision_resolver.py b/openapi_python_client/resolver/collision_resolver.py new file mode 100644 index 000000000..1fbf117c6 --- /dev/null +++ b/openapi_python_client/resolver/collision_resolver.py @@ -0,0 +1,156 @@ +import hashlib +from typing import Any, Dict, List, Tuple + +from .reference import Reference +from .resolver_types import SchemaData + + +class CollisionResolver: + def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[str], parent: str): + self._root: SchemaData = root + self._refs: Dict[str, SchemaData] = refs + self._errors: List[str] = errors + self._parent = parent + self._refs_index: Dict[str, str] = dict() + self._keys_to_replace: Dict[Reference, Tuple[int, SchemaData]] = dict() + self._debug = set() + + def _browse_schema(self, attr: Any, root_attr: Any) -> None: + if isinstance(attr, dict): + attr_copy = {**attr} # Create a shallow copy + for key, val in attr_copy.items(): + if key == "$ref": + ref = Reference(val, self._parent) + value = ref.pointer.value + + assert value + + schema = self._get_from_ref(ref, root_attr) + # if value == '/components/schemas/EeSubscription': + # print(schema) + # print() + hashed_schema = self._reference_schema_hash(schema) + + if value in self._refs_index.keys(): + if self._refs_index[value] != hashed_schema: + self._debug.add(ref.pointer.value) + if ref.is_local(): + self._increment_ref(ref, root_attr, hashed_schema, attr, key) + else: + assert ref.abs_path in self._refs.keys() + self._increment_ref(ref, self._refs[ref.abs_path], hashed_schema, attr, key) + else: + self._refs_index[value] = hashed_schema + else: + self._browse_schema(val, root_attr) + + elif isinstance(attr, list): + for val in attr: + self._browse_schema(val, root_attr) + + def _get_from_ref(self, ref: Reference, attr: SchemaData) -> SchemaData: + if ref.is_local(): + cursor = attr + else: + assert ref.abs_path in self._refs.keys() + cursor = self._refs[ref.abs_path] + query = ref.pointer.unescapated_value + query_parts = [] + + if query.startswith("/paths"): + query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] + else: + query_parts = query.split("/") + + for key in query_parts: + if key == "": + continue + + if isinstance(cursor, dict) and key in cursor: + cursor = cursor[key] + else: + print('ERROR') + + # if list(cursor) == ['$ref']: + # ref = Reference(cursor['$ref'],self._parent) + # if ref.is_remote(): + # print('remote_ref') + # print(ref.value) + # attr = self._refs[ref.abs_path] + # return self._get_from_ref(ref,attr) + + return cursor + + def _increment_ref( + self, ref: Reference, schema: SchemaData, hashed_schema: str, attr: Dict[str, Any], key: str + ) -> None: + i = 2 + value = ref.pointer.value + incremented_value = value + "_" + str(i) + + while incremented_value in self._refs_index.keys(): + if self._refs_index[incremented_value] == hashed_schema: + attr[key] = ref.value + "_" + str(i) + return + else: + i = i + 1 + incremented_value = value + "_" + str(i) + + attr[key] = ref.value + "_" + str(i) + self._refs_index[incremented_value] = hashed_schema + self._keys_to_replace[ref] = (i, schema) + + def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> None: + cursor = attr + query = ref_pointer + query_parts = [] + + if query.startswith("/paths"): + query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] + else: + query_parts = query.split("/") + + last_key = query_parts[-1] + + for key in query_parts: + if key == "": + continue + + if key == last_key: + assert key in cursor + cursor[key + "_" + str(i)] = cursor.pop(key) + return + + if isinstance(cursor, dict) and key in cursor: + cursor = cursor[key] + else: + return + + def resolve(self) -> None: + debug = set() + self._browse_schema(self._root, self._root) + for file, schema in self._refs.items(): + self._browse_schema(schema, schema) + for a, b in self._keys_to_replace.items(): + debug.add(a.pointer.value) + self._modify_root_ref_name(a.pointer.value, b[0], b[1]) + + print(self._debug) + + def _reference_schema_hash(self, schema: Dict[str, Any]) -> str: + md5 = hashlib.md5() + hash_elms = [] + for key in schema.keys(): + if key == "description": + hash_elms.append(schema[key]) + if key == "type": + hash_elms.append(schema[key]) + if key == "allOf": + for item in schema[key]: + hash_elms.append(str(item)) + + hash_elms.append(key) + + hash_elms.sort() + md5.update(";".join(hash_elms).encode("utf-8")) + return md5.hexdigest() diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py index 6cd37415b..9d0fa9066 100644 --- a/openapi_python_client/resolver/resolved_schema.py +++ b/openapi_python_client/resolver/resolved_schema.py @@ -1,4 +1,3 @@ -import hashlib from typing import Any, Dict, Generator, List, Tuple, Union, cast from .reference import Reference @@ -96,7 +95,7 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N component_name = ref.pointer.value.split("/")[-1] if remote_component is None: - print("Weirdy relookup of >> {0}".format(ref.value)) + print("Weird relookup of >> {0}".format(ref.value)) assert ref.is_local() and self._lookup_dict(self._resolved_remotes_components, ref.path) return @@ -108,19 +107,14 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N if root_components_dir is not None: if component_name in root_components_dir: - local_component_hash = self._reference_schema_hash(root_components_dir[component_name]) - remote_component_hash = self._reference_schema_hash(remote_component) - - if local_component_hash == remote_component_hash: + if remote_component == root_components_dir[component_name]: return else: + print("FOUND COLLISION IN RESOLVED SCHEMA, SHOULD NOT HAPPEN") + # print(component_name) + # print(remote_component) + # print(root_components_dir[component_name]) pass - # print('=' * 120) - # print('TODO: Find compoment collision to handle on >>> {0}'.format(ref.path)) - # print('Local componente {0} >> {1}'.format(local_component_hash, root_components_dir[component_name])) - # print('') - # print('Remote componente {0} >> {1}'.format(remote_component_hash, remote_component)) - # print('=' * 120) else: root_components_dir[component_name] = remote_component self._process_remote_components(owner, remote_component, 2, ref.parent) @@ -186,23 +180,3 @@ def _lookup_schema_references(self, attr: Any) -> Generator[Tuple[SchemaData, st elif isinstance(attr, list): for val in attr: yield from self._lookup_schema_references(val) - - def _reference_schema_hash(self, schema: Dict[str, Any]) -> str: - md5 = hashlib.md5() - hash_elms = [] - for key in schema.keys(): - if key == "description": - continue - - if key == "type": - hash_elms.append(schema[key]) - - if key == "allOf": - for item in schema[key]: - hash_elms.append(str(item)) - - hash_elms.append(key) - - hash_elms.sort() - md5.update(";".join(hash_elms).encode("utf-8")) - return md5.hexdigest() diff --git a/openapi_python_client/resolver/schema_resolver.py b/openapi_python_client/resolver/schema_resolver.py index 2d20952b1..3521842fb 100644 --- a/openapi_python_client/resolver/schema_resolver.py +++ b/openapi_python_client/resolver/schema_resolver.py @@ -5,6 +5,7 @@ import httpx +from .collision_resolver import CollisionResolver from .data_loader import DataLoader from .reference import Reference from .resolved_schema import ResolvedSchema @@ -53,6 +54,7 @@ def resolve(self, recursive: bool = True) -> ResolvedSchema: root_schema = self._fetch_url_reference(self._root_url) self._resolve_schema_references(self._parent_path, root_schema, external_schemas, errors, recursive) + CollisionResolver(root_schema, external_schemas, errors, self._parent_path).resolve() return ResolvedSchema(root_schema, external_schemas, errors, self._parent_path) def _resolve_schema_references( diff --git a/tests/test_resolver/test_resolver_collision_resolver.py b/tests/test_resolver/test_resolver_collision_resolver.py new file mode 100644 index 000000000..4f0ccf37d --- /dev/null +++ b/tests/test_resolver/test_resolver_collision_resolver.py @@ -0,0 +1,58 @@ +import pathlib +import urllib +import urllib.parse + +import pytest + + +def test__collision_resolver(): + + from openapi_python_client.resolver.collision_resolver import CollisionResolver + from openapi_python_client.resolver.resolved_schema import ResolvedSchema + + root_schema = { + "foobar": {"$ref": "first_instance.yaml#/foo"}, + "barfoo": {"$ref": "second_instance.yaml#/foo"}, + "barbarfoo": {"$ref": "third_instance.yaml#/foo"}, + "foobarfoo": {"$ref": "second_instance.yaml#/foo"}, + "barfoobar": {"$ref": "first_instance.yaml#/bar/foo"}, + "localref": {"$ref": "#/local_ref"}, + "local_ref": {"description": "a local ref"}, + "last": {"$ref": "first_instance.yaml#/fourth_instance"}, + } + + external_schemas = { + "/home/user/first_instance.yaml": { + "foo": {"description": "foo_first_description"}, + "bar": {"foo": {"description": "nested foo"}}, + "fourth_instance": {"$ref": "fourth_instance.yaml#/foo"}, + }, + "/home/user/second_instance.yaml": {"foo": {"description": "foo_second_description"}}, + "/home/user/third_instance.yaml": {"foo": {"description": "foo_third_description"}}, + "/home/user/fourth_instance.yaml": {"foo": {"description": "foo_fourth_description"}}, + } + + desired_result = { + "foobar": {"$ref": "#/foo"}, + "barfoo": {"$ref": "#/foo_2"}, + "barbarfoo": {"$ref": "#/foo_3"}, + "foobarfoo": {"$ref": "#/foo_2"}, + "barfoobar": {"$ref": "#/bar/foo"}, + "localref": {"$ref": "#/local_ref"}, + "local_ref": {"description": "a local ref"}, + "last": {"$ref": "#/fourth_instance"}, + "foo": {"description": "foo_first_description"}, + "foo_2": {"description": "foo_second_description"}, + "foo_3": {"description": "foo_third_description"}, + "bar": {"foo": {"description": "nested foo"}}, + "foo_4": {"description": "foo_fourth_description"}, + "fourth_instance": {"$ref": "#/foo_4"}, + } + errors = [] + + CollisionResolver(root_schema, external_schemas, errors, "/home/user").resolve() + resolved_schema = ResolvedSchema(root_schema, external_schemas, errors, "/home/user").schema + + print(resolved_schema) + assert len(errors) == 0 + assert resolved_schema == desired_result diff --git a/tests/test_resolver/test_resolver_resolved_schema.py b/tests/test_resolver/test_resolver_resolved_schema.py index b31111d2d..5728a24eb 100644 --- a/tests/test_resolver/test_resolver_resolved_schema.py +++ b/tests/test_resolver/test_resolver_resolved_schema.py @@ -52,45 +52,3 @@ def test__resolved_schema_with_absolute_paths(): assert "foobar_description" in resolved_schema["foo"]["description"] assert "description" in resolved_schema["bar"] assert "barfoo_description" in resolved_schema["bar"]["description"] - - -def test__resolved_schema_with_conflicts(): - - from openapi_python_client.resolver.resolved_schema import ResolvedSchema - - root_schema = { - "foobar": {"$ref": "first_instance.yaml#/foo"}, - "barfoo": {"$ref": "second_instance.yaml#/foo"}, - "foobarfoo": {"$ref": "second_instance.yaml#/foo"}, - "barfoobar": {"$ref": "first_instance.yaml#/foo"}, - } - - external_schemas = { - "/home/user/first_instance.yaml": {"foo": {"description": "foo_first_description"}}, - "/home/user/second_instance.yaml": {"foo": {"description": "foo_second_description"}}, - } - - current_result = { - "foobar": {"$ref": "#/foo"}, - "barfoo": {"$ref": "#/foo"}, - "foobarfoo": {"$ref": "#/foo"}, - "barfoobar": {"$ref": "#/foo"}, - "foo": {"description": "foo_first_description"}, - } - - desired_result = { - "foobar": {"$ref": "#/foo"}, - "barfoo": {"$ref": "#/foo2"}, - "foobarfoo": {"$ref": "#/foo2"}, - "barfoobar": {"$ref": "#/foo"}, - "foo": {"description": "foo_first_description"}, - "foo2": {"description": "foo_second_description"}, - } - - errors = [] - - resolved_schema = ResolvedSchema(root_schema, external_schemas, errors, "/home/user").schema - - print(resolved_schema) - assert len(errors) == 0 - assert resolved_schema == desired_result From b45ef12de9eb2ddfecd611f73c0174dd3b7a2679 Mon Sep 17 00:00:00 2001 From: avy Date: Mon, 15 Mar 2021 13:56:24 +0100 Subject: [PATCH 14/17] =?UTF-8?q?fix=20collision=20issues=20with=20same=20?= =?UTF-8?q?schema=20in=20two=20different=20files=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../resolver/collision_resolver.py | 43 ++++++++----------- .../test_resolver_collision_resolver.py | 3 ++ 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/openapi_python_client/resolver/collision_resolver.py b/openapi_python_client/resolver/collision_resolver.py index 1fbf117c6..ca4bfdba9 100644 --- a/openapi_python_client/resolver/collision_resolver.py +++ b/openapi_python_client/resolver/collision_resolver.py @@ -12,8 +12,7 @@ def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[s self._errors: List[str] = errors self._parent = parent self._refs_index: Dict[str, str] = dict() - self._keys_to_replace: Dict[Reference, Tuple[int, SchemaData]] = dict() - self._debug = set() + self._keys_to_replace: Dict[str, Tuple[int, SchemaData, str]] = dict() def _browse_schema(self, attr: Any, root_attr: Any) -> None: if isinstance(attr, dict): @@ -26,14 +25,10 @@ def _browse_schema(self, attr: Any, root_attr: Any) -> None: assert value schema = self._get_from_ref(ref, root_attr) - # if value == '/components/schemas/EeSubscription': - # print(schema) - # print() hashed_schema = self._reference_schema_hash(schema) if value in self._refs_index.keys(): if self._refs_index[value] != hashed_schema: - self._debug.add(ref.pointer.value) if ref.is_local(): self._increment_ref(ref, root_attr, hashed_schema, attr, key) else: @@ -49,11 +44,10 @@ def _browse_schema(self, attr: Any, root_attr: Any) -> None: self._browse_schema(val, root_attr) def _get_from_ref(self, ref: Reference, attr: SchemaData) -> SchemaData: - if ref.is_local(): - cursor = attr - else: + if ref.is_remote(): assert ref.abs_path in self._refs.keys() - cursor = self._refs[ref.abs_path] + attr = self._refs[ref.abs_path] + cursor = attr query = ref.pointer.unescapated_value query_parts = [] @@ -69,15 +63,13 @@ def _get_from_ref(self, ref: Reference, attr: SchemaData) -> SchemaData: if isinstance(cursor, dict) and key in cursor: cursor = cursor[key] else: - print('ERROR') + print("ERROR") - # if list(cursor) == ['$ref']: - # ref = Reference(cursor['$ref'],self._parent) - # if ref.is_remote(): - # print('remote_ref') - # print(ref.value) - # attr = self._refs[ref.abs_path] - # return self._get_from_ref(ref,attr) + if list(cursor) == ["$ref"]: + ref2 = Reference(cursor["$ref"], self._parent) + if ref2.is_remote(): + attr = self._refs[ref2.abs_path] + return self._get_from_ref(ref2, attr) return cursor @@ -90,15 +82,18 @@ def _increment_ref( while incremented_value in self._refs_index.keys(): if self._refs_index[incremented_value] == hashed_schema: - attr[key] = ref.value + "_" + str(i) - return + if ref.value not in self._keys_to_replace.keys(): + break # have to increment target key aswell + else: + attr[key] = ref.value + "_" + str(i) + return else: i = i + 1 incremented_value = value + "_" + str(i) attr[key] = ref.value + "_" + str(i) self._refs_index[incremented_value] = hashed_schema - self._keys_to_replace[ref] = (i, schema) + self._keys_to_replace[ref.value] = (i, schema, ref.pointer.value) def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> None: cursor = attr @@ -127,15 +122,11 @@ def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> N return def resolve(self) -> None: - debug = set() self._browse_schema(self._root, self._root) for file, schema in self._refs.items(): self._browse_schema(schema, schema) for a, b in self._keys_to_replace.items(): - debug.add(a.pointer.value) - self._modify_root_ref_name(a.pointer.value, b[0], b[1]) - - print(self._debug) + self._modify_root_ref_name(b[2], b[0], b[1]) def _reference_schema_hash(self, schema: Dict[str, Any]) -> str: md5 = hashlib.md5() diff --git a/tests/test_resolver/test_resolver_collision_resolver.py b/tests/test_resolver/test_resolver_collision_resolver.py index 4f0ccf37d..b19541141 100644 --- a/tests/test_resolver/test_resolver_collision_resolver.py +++ b/tests/test_resolver/test_resolver_collision_resolver.py @@ -19,6 +19,7 @@ def test__collision_resolver(): "localref": {"$ref": "#/local_ref"}, "local_ref": {"description": "a local ref"}, "last": {"$ref": "first_instance.yaml#/fourth_instance"}, + "baz": {"$ref": "fifth_instance.yaml#/foo"}, } external_schemas = { @@ -30,11 +31,13 @@ def test__collision_resolver(): "/home/user/second_instance.yaml": {"foo": {"description": "foo_second_description"}}, "/home/user/third_instance.yaml": {"foo": {"description": "foo_third_description"}}, "/home/user/fourth_instance.yaml": {"foo": {"description": "foo_fourth_description"}}, + "/home/user/fifth_instance.yaml": {"foo": {"description": "foo_second_description"}}, } desired_result = { "foobar": {"$ref": "#/foo"}, "barfoo": {"$ref": "#/foo_2"}, + "baz": {"$ref": "#/foo_2"}, "barbarfoo": {"$ref": "#/foo_3"}, "foobarfoo": {"$ref": "#/foo_2"}, "barfoobar": {"$ref": "#/bar/foo"}, From 7b5389a883855223003f67619f3362f03ad4844d Mon Sep 17 00:00:00 2001 From: avy Date: Mon, 15 Mar 2021 14:33:33 +0100 Subject: [PATCH 15/17] do not crash on retry of key modification --- openapi_python_client/resolver/collision_resolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openapi_python_client/resolver/collision_resolver.py b/openapi_python_client/resolver/collision_resolver.py index ca4bfdba9..aafbfca21 100644 --- a/openapi_python_client/resolver/collision_resolver.py +++ b/openapi_python_client/resolver/collision_resolver.py @@ -111,8 +111,8 @@ def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> N if key == "": continue - if key == last_key: - assert key in cursor + if key == last_key and key + "_" + str(i) not in cursor: + assert key in cursor, "Didnt find %s in %s" % (ref_pointer, attr) cursor[key + "_" + str(i)] = cursor.pop(key) return From db9aa70967387dc68f52950ee5f739961fa350a9 Mon Sep 17 00:00:00 2001 From: avy Date: Mon, 15 Mar 2021 16:09:59 +0100 Subject: [PATCH 16/17] find collision of 2 same object at different place --- openapi_python_client/resolver/collision_resolver.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/openapi_python_client/resolver/collision_resolver.py b/openapi_python_client/resolver/collision_resolver.py index aafbfca21..a6168eff5 100644 --- a/openapi_python_client/resolver/collision_resolver.py +++ b/openapi_python_client/resolver/collision_resolver.py @@ -12,6 +12,7 @@ def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[s self._errors: List[str] = errors self._parent = parent self._refs_index: Dict[str, str] = dict() + self._schema_index: Dict[str, Reference] = dict() self._keys_to_replace: Dict[str, Tuple[int, SchemaData, str]] = dict() def _browse_schema(self, attr: Any, root_attr: Any) -> None: @@ -36,6 +37,17 @@ def _browse_schema(self, attr: Any, root_attr: Any) -> None: self._increment_ref(ref, self._refs[ref.abs_path], hashed_schema, attr, key) else: self._refs_index[value] = hashed_schema + + if hashed_schema in self._schema_index.keys(): + existing_ref = self._schema_index[hashed_schema] + if ( + existing_ref.pointer.value != ref.pointer.value + and ref.pointer.tokens()[-1] == existing_ref.pointer.tokens()[-1] + ): + print("Found same schema for different pointer") + else: + self._schema_index[hashed_schema] = ref + else: self._browse_schema(val, root_attr) From 093ebe814a2d058dd5572ff0757f4bbf4f1f434e Mon Sep 17 00:00:00 2001 From: avy Date: Mon, 15 Mar 2021 16:36:03 +0100 Subject: [PATCH 17/17] use tokens instead of splitting paths --- .../resolver/collision_resolver.py | 24 ++++--------------- .../resolver/resolved_schema.py | 21 ++++++++-------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/openapi_python_client/resolver/collision_resolver.py b/openapi_python_client/resolver/collision_resolver.py index a6168eff5..361685c33 100644 --- a/openapi_python_client/resolver/collision_resolver.py +++ b/openapi_python_client/resolver/collision_resolver.py @@ -13,7 +13,7 @@ def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[s self._parent = parent self._refs_index: Dict[str, str] = dict() self._schema_index: Dict[str, Reference] = dict() - self._keys_to_replace: Dict[str, Tuple[int, SchemaData, str]] = dict() + self._keys_to_replace: Dict[str, Tuple[int, SchemaData, List[str]]] = dict() def _browse_schema(self, attr: Any, root_attr: Any) -> None: if isinstance(attr, dict): @@ -60,13 +60,7 @@ def _get_from_ref(self, ref: Reference, attr: SchemaData) -> SchemaData: assert ref.abs_path in self._refs.keys() attr = self._refs[ref.abs_path] cursor = attr - query = ref.pointer.unescapated_value - query_parts = [] - - if query.startswith("/paths"): - query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] - else: - query_parts = query.split("/") + query_parts = ref.pointer.tokens() for key in query_parts: if key == "": @@ -105,18 +99,10 @@ def _increment_ref( attr[key] = ref.value + "_" + str(i) self._refs_index[incremented_value] = hashed_schema - self._keys_to_replace[ref.value] = (i, schema, ref.pointer.value) + self._keys_to_replace[ref.value] = (i, schema, ref.pointer.tokens()) - def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> None: + def _modify_root_ref_name(self, query_parts: List[str], i: int, attr: SchemaData) -> None: cursor = attr - query = ref_pointer - query_parts = [] - - if query.startswith("/paths"): - query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] - else: - query_parts = query.split("/") - last_key = query_parts[-1] for key in query_parts: @@ -124,7 +110,7 @@ def _modify_root_ref_name(self, ref_pointer: str, i: int, attr: SchemaData) -> N continue if key == last_key and key + "_" + str(i) not in cursor: - assert key in cursor, "Didnt find %s in %s" % (ref_pointer, attr) + assert key in cursor, "Didnt find %s in %s" % (key, attr) cursor[key + "_" + str(i)] = cursor.pop(key) return diff --git a/openapi_python_client/resolver/resolved_schema.py b/openapi_python_client/resolver/resolved_schema.py index 9d0fa9066..3c473f234 100644 --- a/openapi_python_client/resolver/resolved_schema.py +++ b/openapi_python_client/resolver/resolved_schema.py @@ -47,12 +47,13 @@ def _process_remote_paths(self) -> None: remote_path = ref.abs_path path = ref.pointer.unescapated_value + tokens = ref.pointer.tokens() if remote_path not in self._refs: self._errors.append("Failed to resolve remote reference > {0}".format(remote_path)) else: remote_schema = self._refs[remote_path] - remote_value = self._lookup_dict(remote_schema, path) + remote_value = self._lookup_dict(remote_schema, tokens) if not remote_value: self._errors.append("Failed to read remote value {}, in remote ref {}".format(path, remote_path)) else: @@ -87,16 +88,16 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N self._ensure_components_dir_exists(ref) # print('Processing remote component > {0}'.format(ref.value)) - remote_component = self._lookup_dict(owner, ref.pointer.value) + remote_component = self._lookup_dict(owner, ref.pointer.tokens()) pointer_parent = ref.pointer.parent if pointer_parent is not None: - root_components_dir = self._lookup_dict(self._resolved_remotes_components, pointer_parent.value) + root_components_dir = self._lookup_dict(self._resolved_remotes_components, pointer_parent.tokens()) component_name = ref.pointer.value.split("/")[-1] if remote_component is None: print("Weird relookup of >> {0}".format(ref.value)) - assert ref.is_local() and self._lookup_dict(self._resolved_remotes_components, ref.path) + assert ref.is_local() and self._lookup_dict(self._resolved_remotes_components, ref.pointer.tokens()) return if "$ref" in remote_component: @@ -109,10 +110,14 @@ def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> N if component_name in root_components_dir: if remote_component == root_components_dir[component_name]: return + elif list(remote_component) == ["$ref"]: + pass else: print("FOUND COLLISION IN RESOLVED SCHEMA, SHOULD NOT HAPPEN") # print(component_name) + # print() # print(remote_component) + # print() # print(root_components_dir[component_name]) pass else: @@ -136,14 +141,8 @@ def _ensure_components_dir_exists(self, ref: Reference) -> None: def _transform_to_local_ref(self, owner: Dict[str, Any], ref: Reference) -> None: owner["$ref"] = "#{0}".format(ref.pointer.value) - def _lookup_dict(self, attr: SchemaData, query: str) -> Union[SchemaData, None]: + def _lookup_dict(self, attr: SchemaData, query_parts: List[str]) -> Union[SchemaData, None]: cursor = attr - query_parts = [] - - if query.startswith("/paths"): - query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")] - else: - query_parts = query.split("/") for key in query_parts: if key == "":