Skip to content

Commit 9a2af42

Browse files
committed
Merge branch feat-persist-custom-schema-annotations into oold-and-opensemantic-package-integration
2 parents 35dd834 + 452bb3e commit 9a2af42

File tree

3 files changed

+252
-6
lines changed

3 files changed

+252
-6
lines changed

src/osw/core.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import osw.model.entity as model
3434
from osw.defaults import params as default_params
3535
from osw.model.static import OswBaseModel
36+
from osw.utils.codegen import OOLDJsonSchemaParser
3637
from osw.utils.oold import (
3738
AggregateGeneratedSchemasParam,
3839
AggregateGeneratedSchemasParamMode,
@@ -380,6 +381,9 @@ class FetchSchemaParam(BaseModel):
380381
)
381382
legacy_generator: Optional[bool] = False
382383
"""uses legacy command line for code generation if true"""
384+
generate_annotations: Optional[bool] = True
385+
"""generate custom schema keywords in Fields and Classes.
386+
Required to update the schema in OSW without information loss"""
383387
offline_pages: Optional[Dict[str, WtPage]] = None
384388
"""pages to be used offline instead of fetching them from the OSW instance"""
385389
result_model_path: Optional[Union[str, pathlib.Path]] = None
@@ -410,6 +414,7 @@ def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None:
410414
schema_title=schema_title,
411415
mode=mode,
412416
legacy_generator=fetchSchemaParam.legacy_generator,
417+
generate_annotations=fetchSchemaParam.generate_annotations,
413418
offline_pages=fetchSchemaParam.offline_pages,
414419
result_model_path=fetchSchemaParam.result_model_path,
415420
)
@@ -438,6 +443,9 @@ class _FetchSchemaParam(BaseModel):
438443
)
439444
legacy_generator: Optional[bool] = False
440445
"""uses legacy command line for code generation if true"""
446+
generate_annotations: Optional[bool] = False
447+
"""generate custom schema keywords in Fields and Classes.
448+
Required to update the schema in OSW without information loss"""
441449
offline_pages: Optional[Dict[str, WtPage]] = None
442450
"""pages to be used offline instead of fetching them from the OSW instance"""
443451
result_model_path: Optional[Union[str, pathlib.Path]] = None
@@ -596,6 +604,12 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
596604
# suppress deprecation warnings from pydantic
597605
# see https://github.com/koxudaxi/datamodel-code-generator/issues/2213
598606
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
607+
608+
if fetchSchemaParam.generate_annotations:
609+
# monkey patch class
610+
datamodel_code_generator.parser.jsonschema.JsonSchemaParser = (
611+
OOLDJsonSchemaParser
612+
)
599613
datamodel_code_generator.generate(
600614
input_=pathlib.Path(schema_path),
601615
input_file_type="jsonschema",
@@ -620,6 +634,43 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
620634
)
621635
warnings.filterwarnings("default", category=PydanticDeprecatedSince20)
622636

637+
# note: we could use OOLDJsonSchemaParser directly (see below),
638+
# but datamodel_code_generator.generate
639+
# does some pre- and postprocessing we do not want to duplicate
640+
641+
# data_model_type = datamodel_code_generator.DataModelType.PydanticBaseModel
642+
# #data_model_type = DataModelType.PydanticV2BaseModel
643+
# target_python_version = datamodel_code_generator.PythonVersion.PY_38
644+
# data_model_types = datamodel_code_generator.model.get_data_model_types(
645+
# data_model_type, target_python_version
646+
# )
647+
# parser = OOLDJsonSchemaParserFixedRefs(
648+
# source=pathlib.Path(schema_path),
649+
650+
# base_class="osw.model.static.OswBaseModel",
651+
# data_model_type=data_model_types.data_model,
652+
# data_model_root_type=data_model_types.root_model,
653+
# data_model_field_type=data_model_types.field_model,
654+
# data_type_manager_type=data_model_types.data_type_manager,
655+
# target_python_version=target_python_version,
656+
657+
# #use_default=True,
658+
# apply_default_values_for_required_fields=True,
659+
# use_unique_items_as_set=True,
660+
# enum_field_as_literal=datamodel_code_generator.LiteralType.All,
661+
# use_title_as_name=True,
662+
# use_schema_description=True,
663+
# use_field_description=True,
664+
# encoding="utf-8",
665+
# use_double_quotes=True,
666+
# collapse_root_models=True,
667+
# reuse_model=True,
668+
# #field_include_all_keys=True
669+
# )
670+
# result = parser.parse()
671+
# with open(temp_model_path, "w", encoding="utf-8") as f:
672+
# f.write(result)
673+
623674
# see https://koxudaxi.github.io/datamodel-code-generator/
624675
# --base-class OswBaseModel: use a custom base class
625676
# --custom-template-dir src/model/template_data/
@@ -693,7 +744,6 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
693744
r"class\s*([\S]*)\s*\(\s*[\S\s]*?\s*\)\s*:.*\n"
694745
) # match class definition [\s\S]*(?:[^\S\n]*\n){2,}
695746
for cls in re.findall(pattern, org_content):
696-
print(cls)
697747
content = re.sub(
698748
r"(class\s*"
699749
+ cls

src/osw/model/entity.py

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# generated by datamodel-codegen:
22
# filename: Item.json
3-
# timestamp: 2024-09-12T12:35:11+00:00
3+
# timestamp: 2024-12-10T05:09:51+00:00
44

55
from __future__ import annotations
66

@@ -13,29 +13,71 @@
1313

1414

1515
class ReadAccess(OswBaseModel):
16+
class Config:
17+
schema_extra = {"title": "Read access", "title*": {"de": "Lesezugriff"}}
18+
1619
level: Optional[Literal["public", "internal", "restricted"]] = Field(
1720
None, title="Level"
1821
)
1922

2023

2124
class AccessRestrictions(OswBaseModel):
25+
class Config:
26+
schema_extra = {
27+
"title": "Access restrictions",
28+
"title*": {"de": "Zugriffsbeschränkungen"},
29+
"eval_template": {
30+
"$comment": "See https://www.mediawiki.org/wiki/Extension:Semantic_ACL",
31+
"type": "mustache-wikitext",
32+
"mode": "render",
33+
"value": "{{entry_access.read.level}} {{=<% %>=}} {{#set: |Visible to= {{#switch: <%={{ }}=%> {{{entry_access.read.level}}} {{=<% %>=}} |public=public |internal=users |restricted=whitelist |#default=}} }} <%={{ }}=%>",
34+
},
35+
}
36+
2237
read: Optional[ReadAccess] = Field(None, title="Read access")
2338

2439

2540
class Label(OswBaseModel):
41+
class Config:
42+
schema_extra = {
43+
"@context": {
44+
"rdf": "http://www.w3.org/2000/01/rdf-schema#",
45+
"text": {"@id": "@value"},
46+
"lang": {"@id": "@language"},
47+
},
48+
"title": "Label",
49+
}
50+
2651
text: constr(min_length=1) = Field(..., title="Text")
2752
lang: Optional[Literal["en", "de"]] = Field("en", title="Lang code")
2853

2954

30-
class Description(Label):
31-
pass
55+
class Description(OswBaseModel):
56+
class Config:
57+
schema_extra = {
58+
"@context": {
59+
"rdf": "http://www.w3.org/2000/01/rdf-schema#",
60+
"text": {"@id": "@value"},
61+
"lang": {"@id": "@language"},
62+
},
63+
"title": "Description",
64+
}
65+
66+
text: constr(min_length=1) = Field(..., title="Text")
67+
lang: Optional[Literal["en", "de"]] = Field("en", title="Lang code")
3268

3369

3470
class WikiPage(OswBaseModel):
3571
"""
3672
The wiki page containing this entity
3773
"""
3874

75+
class Config:
76+
schema_extra = {
77+
"title": "Wiki page",
78+
"description": "The wiki page containing this entity",
79+
}
80+
3981
title: Optional[str] = Field(None, title="Title")
4082
"""
4183
The page title
@@ -47,6 +89,14 @@ class WikiPage(OswBaseModel):
4789

4890

4991
class Meta(OswBaseModel):
92+
class Config:
93+
schema_extra = {
94+
"@context": {
95+
"change_id": {"@id": "Property:HasChangeId", "@type": "xsd:string"}
96+
},
97+
"title": "Meta",
98+
}
99+
50100
uuid: UUID = Field(default_factory=uuid4, title="UUID")
51101
wiki_page: Optional[WikiPage] = Field(None, title="Wiki page")
52102
"""
@@ -59,6 +109,70 @@ class Meta(OswBaseModel):
59109

60110

61111
class Entity(OswBaseModel):
112+
class Config:
113+
schema_extra = {
114+
"@context": {
115+
"bvco": "https://bvco.ontology.link/",
116+
"databatt": "http://www.databatt.org/",
117+
"emmo": "https://w3id.org/emmo#",
118+
"emmobattery": "https://w3id.org/emmo/domain/battery#",
119+
"emmochameo": "https://w3id.org/emmo/domain/chameo#",
120+
"emmochemicals": "https://w3id.org/emmo/domain/chemicalsubstance#",
121+
"emmoelch": "https://w3id.org/emmo/domain/electrochemistry#",
122+
"schema": "https://schema.org/",
123+
"skos": "https://www.w3.org/TR/skos-reference/",
124+
"xsd": "http://www.w3.org/2001/XMLSchema#",
125+
"wiki": "https://wiki-dev.open-semantic-lab.org/id/",
126+
"Category": {"@id": "wiki:Category-3A", "@prefix": True},
127+
"File": {
128+
"@id": "https://wiki-dev.open-semantic-lab.org/wiki/Special:Redirect/file/",
129+
"@prefix": True,
130+
},
131+
"Property": {"@id": "wiki:Property-3A", "@prefix": True},
132+
"Item": {"@id": "wiki:Item-3A", "@prefix": True},
133+
"attachments*": {"@id": "Property:HasFileAttachment", "@type": "@id"},
134+
"based_on": {"@id": "skos:isBasedOn", "@type": "@id"},
135+
"based_on*": {"@id": "Property:IsBasedOn", "@type": "@id"},
136+
"description": {"@id": "skos:definition", "@type": "@id"},
137+
"description*": {"@id": "Property:HasDescription", "@type": "@id"},
138+
"image": {"@id": "schema:image", "@type": "@id"},
139+
"image*": {"@id": "Property:HasImage", "@type": "@id"},
140+
"label": {"@id": "skos:prefLabel", "@type": "@id"},
141+
"label*": {"@id": "Property:HasLabel", "@type": "@id"},
142+
"lang": {"@id": "@language"},
143+
"meta": {
144+
"@id": "Property:HasMeta",
145+
"@type": "@id",
146+
"@context": {
147+
"change_id": {
148+
"@id": "Property:HasChangeId",
149+
"@type": "xsd:string",
150+
}
151+
},
152+
},
153+
"name*": {"@id": "Property:HasName"},
154+
"ordering_categories": {"@id": "Property:Category", "@type": "@id"},
155+
"ordering_categories*": {
156+
"@id": "Property:HasClassificationCategory",
157+
"@type": "@id",
158+
},
159+
"query_label": {"@id": "Property:HasLabel", "@type": "@id"},
160+
"rdf_type": {"@id": "@type", "@type": "@id"},
161+
"rdf_type*": {"@id": "schema:additionalType", "@type": "@id"},
162+
"rdf_type**": {"@id": "owl:sameAs", "@type": "@id"},
163+
"rdf_type***": {"@id": "Property:Equivalent_URI", "@type": "@id"},
164+
"short_name": {"@id": "Property:HasShortName"},
165+
"keywords": {"@id": "schema:keywords", "@type": "@id"},
166+
"keywords*": {"@id": "Property:HasKeyword", "@type": "@id"},
167+
"statements": {"@id": "Property:HasStatement", "@type": "@id"},
168+
"text": {"@id": "@value"},
169+
"uuid*": {"@id": "Property:HasUuid"},
170+
},
171+
"uuid": "ce353767-c628-45bd-9d88-d6eb3009aec0",
172+
"title": "Entity",
173+
"defaultProperties": ["description"],
174+
}
175+
62176
rdf_type: Optional[Set[str]] = Field(None, title="Additional RDF type(s)")
63177
"""
64178
Declares additional type(s) for this entity, e.g., to state that this entity has the same meaning as a term in a controlled vocabulary or ontology. This property is synonymous to the schema:additionalType and owl:sameAs. The default syntax is ontology:TermName. The ontology prefix has to be defined in the @context of the Entity, the category or any of the parent categories. The term name has to be a valid identifier in the ontology.
@@ -83,9 +197,11 @@ class Entity(OswBaseModel):
83197
query_label: Optional[str] = Field(None, title="Query label")
84198
description: Optional[List[Description]] = Field(None, title="Description")
85199
image: Optional[str] = Field(None, title="Image")
86-
ordering_categories: Optional[List[str]] = Field(None, title="Ordering categories")
200+
ordering_categories: Optional[List[str]] = Field(
201+
None, title="Classification categories"
202+
)
87203
"""
88-
Ordering categories are used to categorize instances, e.g., according to their use but not their properties. When querying for instances of a here listed ordering category, this instance will be returned. Note: Ordering categories define no properties, while 'regular' categories define properties, which an instance assigns values to.
204+
Classification categories are used to categorize instances, e.g., according to their use but not their properties. When querying for instances of a here listed classification category, this instance will be returned. Note: Classification categories define no properties, while 'regular' categories define properties, which an instance assigns values to.
89205
"""
90206
keywords: Optional[List[str]] = Field(None, title="Keywords / Tags")
91207
"""
@@ -103,6 +219,9 @@ class Entity(OswBaseModel):
103219

104220

105221
class ObjectStatement(OswBaseModel):
222+
class Config:
223+
schema_extra = {"title": "Object Statement"}
224+
106225
rdf_type: Optional[Any] = "rdf:Statement"
107226
uuid: UUID = Field(default_factory=uuid4, title="UUID")
108227
label: Optional[List[Label]] = Field(None, title="Label")
@@ -118,6 +237,9 @@ class ObjectStatement(OswBaseModel):
118237

119238

120239
class DataStatement(OswBaseModel):
240+
class Config:
241+
schema_extra = {"title": "Data Statement"}
242+
121243
rdf_type: Optional[Any] = "rdf:Statement"
122244
uuid: UUID = Field(default_factory=uuid4, title="UUID")
123245
label: Optional[List[Label]] = Field(None, title="Label")
@@ -133,6 +255,9 @@ class DataStatement(OswBaseModel):
133255

134256

135257
class QuantityStatement(OswBaseModel):
258+
class Config:
259+
schema_extra = {"title": "Quantity Statement"}
260+
136261
rdf_type: Optional[Any] = "rdf:Statement"
137262
uuid: UUID = Field(default_factory=uuid4, title="UUID")
138263
label: Optional[List[Label]] = Field(None, title="Label")
@@ -151,6 +276,18 @@ class QuantityStatement(OswBaseModel):
151276

152277

153278
class Item(Entity):
279+
class Config:
280+
schema_extra = {
281+
"@context": [
282+
"/wiki/Category:Entity?action=raw&slot=jsonschema",
283+
{
284+
"type": {"@id": "Property:HasType", "@type": "@id"},
285+
"type*": {"@id": "Property:HasSchema", "@type": "@id"},
286+
},
287+
],
288+
"title": "Item",
289+
}
290+
154291
type: Optional[List[str]] = Field(
155292
["Category:Item"], min_items=1, title="Types/Categories"
156293
)

src/osw/utils/codegen.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from pathlib import Path
2+
from typing import Any, Dict
3+
4+
from datamodel_code_generator import load_yaml_from_path
5+
from datamodel_code_generator.model import pydantic as pydantic_v1_model
6+
from datamodel_code_generator.model import pydantic_v2 as pydantic_v2_model
7+
from datamodel_code_generator.parser.jsonschema import (
8+
JsonSchemaObject,
9+
JsonSchemaParser,
10+
)
11+
12+
# https://docs.pydantic.dev/1.10/usage/schema/#schema-customization
13+
# https://docs.pydantic.dev/latest/concepts/json_schema/#using-json_schema_extra-with-a-dict
14+
# https://docs.pydantic.dev/latest/concepts/json_schema/#field-level-customization
15+
16+
17+
class PydanticV1Config(pydantic_v1_model.Config):
18+
# schema_extra: Optional[Dict[str, Any]] = None
19+
schema_extra: str = None
20+
21+
22+
class PydanticV2Config(pydantic_v2_model.ConfigDict):
23+
# schema_extra: Optional[Dict[str, Any]] = None
24+
json_schema_extra: str = None
25+
26+
27+
class OOLDJsonSchemaParser(JsonSchemaParser):
28+
"""Custom parser for OO-LD schemas.
29+
You can use this class directly or monkey-patch the datamodel_code_generator module:
30+
`datamodel_code_generator.parser.jsonschema.JsonSchemaParser = OOLDJsonSchemaParser`
31+
"""
32+
33+
def set_additional_properties(self, name: str, obj: JsonSchemaObject) -> None:
34+
schema_extras = repr(obj.extras) # keeps 'False' and 'True' boolean literals
35+
if self.data_model_type == pydantic_v1_model.BaseModel:
36+
self.extra_template_data[name]["config"] = PydanticV1Config(
37+
schema_extra=schema_extras
38+
)
39+
if self.data_model_type == pydantic_v2_model.BaseModel:
40+
self.extra_template_data[name]["config"] = PydanticV2Config(
41+
json_schema_extra=schema_extras
42+
)
43+
return super().set_additional_properties(name, obj)
44+
45+
46+
class OOLDJsonSchemaParserFixedRefs(OOLDJsonSchemaParser):
47+
"""Overwrite # overwrite the original `_get_ref_body_from_remote` function
48+
to fix wrongly composed paths. This issue occurs only when using this parser class directy
49+
and occurs not if used through mokey patching and `datamodel_code_generator.generate()`
50+
"""
51+
52+
def _get_ref_body_from_remote(self, resolved_ref: str) -> Dict[Any, Any]:
53+
# full_path = self.base_path / resolved_ref
54+
# fix: merge the paths correctly
55+
full_path = self.base_path / Path(resolved_ref).parts[-1]
56+
return self.remote_object_cache.get_or_put(
57+
str(full_path),
58+
default_factory=lambda _: load_yaml_from_path(full_path, self.encoding),
59+
)

0 commit comments

Comments
 (0)