Skip to content

Commit cfe4064

Browse files
kchasonajnelson-nist
authored andcommitted
Expose unrecognized-concept set construction logic as function
AJN: This is a partial application of @kchason 's work in PR 118, and is being pulled into its own patch series to focus review. References: * #118 Signed-off-by: Alex Nelson <[email protected]>
1 parent e51dc87 commit cfe4064

File tree

1 file changed

+75
-60
lines changed

1 file changed

+75
-60
lines changed

case_utils/case_validate/__init__.py

Lines changed: 75 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,79 @@ def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
7171
) or concept_iri.startswith("https://ontology.caseontology.org/")
7272

7373

74+
def get_invalid_cdo_concepts(
75+
data_graph: rdflib.Graph, ontology_graph: rdflib.Graph
76+
) -> Set[rdflib.URIRef]:
77+
"""
78+
Get the set of concepts in the data graph that are not part of the CDO ontology.
79+
80+
:param data_graph: The data graph to validate.
81+
:param ontology_graph: The ontology graph to use for validation.
82+
:return: The list of concepts in the data graph that are not part of the CDO ontology.
83+
"""
84+
# Construct set of CDO concepts for data graph concept-existence review.
85+
cdo_concepts: Set[rdflib.URIRef] = set()
86+
87+
for n_structural_class in [
88+
NS_OWL.Class,
89+
NS_OWL.AnnotationProperty,
90+
NS_OWL.DatatypeProperty,
91+
NS_OWL.ObjectProperty,
92+
NS_RDFS.Datatype,
93+
NS_SH.NodeShape,
94+
NS_SH.PropertyShape,
95+
NS_SH.Shape,
96+
]:
97+
for ontology_triple in ontology_graph.triples(
98+
(None, NS_RDF.type, n_structural_class)
99+
):
100+
if not isinstance(ontology_triple[0], rdflib.URIRef):
101+
continue
102+
if concept_is_cdo_concept(ontology_triple[0]):
103+
cdo_concepts.add(ontology_triple[0])
104+
for n_ontology_predicate in [
105+
NS_OWL.backwardCompatibleWith,
106+
NS_OWL.imports,
107+
NS_OWL.incompatibleWith,
108+
NS_OWL.priorVersion,
109+
NS_OWL.versionIRI,
110+
]:
111+
for ontology_triple in ontology_graph.triples(
112+
(None, n_ontology_predicate, None)
113+
):
114+
assert isinstance(ontology_triple[0], rdflib.URIRef)
115+
assert isinstance(ontology_triple[2], rdflib.URIRef)
116+
cdo_concepts.add(ontology_triple[0])
117+
cdo_concepts.add(ontology_triple[2])
118+
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
119+
if not isinstance(ontology_triple[0], rdflib.URIRef):
120+
continue
121+
cdo_concepts.add(ontology_triple[0])
122+
123+
# Also load historical ontology and version IRIs.
124+
ontology_and_version_iris_data = importlib.resources.read_text(
125+
case_utils.ontology, "ontology_and_version_iris.txt"
126+
)
127+
for line in ontology_and_version_iris_data.split("\n"):
128+
cleaned_line = line.strip()
129+
if cleaned_line == "":
130+
continue
131+
cdo_concepts.add(rdflib.URIRef(cleaned_line))
132+
133+
data_cdo_concepts: Set[rdflib.URIRef] = set()
134+
for data_triple in data_graph.triples((None, None, None)):
135+
for data_triple_member in data_triple:
136+
if isinstance(data_triple_member, rdflib.URIRef):
137+
if concept_is_cdo_concept(data_triple_member):
138+
data_cdo_concepts.add(data_triple_member)
139+
elif isinstance(data_triple_member, rdflib.Literal):
140+
if isinstance(data_triple_member.datatype, rdflib.URIRef):
141+
if concept_is_cdo_concept(data_triple_member.datatype):
142+
data_cdo_concepts.add(data_triple_member.datatype)
143+
144+
return data_cdo_concepts - cdo_concepts
145+
146+
74147
def main() -> None:
75148
parser = argparse.ArgumentParser(
76149
description="CASE wrapper to pySHACL command line tool."
@@ -181,67 +254,9 @@ def main() -> None:
181254
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph)
182255
ontology_graph.parse(arg_ontology_graph)
183256

184-
# Construct set of CDO concepts for data graph concept-existence review.
185-
cdo_concepts: Set[rdflib.URIRef] = set()
186-
187-
for n_structural_class in [
188-
NS_OWL.Class,
189-
NS_OWL.AnnotationProperty,
190-
NS_OWL.DatatypeProperty,
191-
NS_OWL.ObjectProperty,
192-
NS_RDFS.Datatype,
193-
NS_SH.NodeShape,
194-
NS_SH.PropertyShape,
195-
NS_SH.Shape,
196-
]:
197-
for ontology_triple in ontology_graph.triples(
198-
(None, NS_RDF.type, n_structural_class)
199-
):
200-
if not isinstance(ontology_triple[0], rdflib.URIRef):
201-
continue
202-
if concept_is_cdo_concept(ontology_triple[0]):
203-
cdo_concepts.add(ontology_triple[0])
204-
for n_ontology_predicate in [
205-
NS_OWL.backwardCompatibleWith,
206-
NS_OWL.imports,
207-
NS_OWL.incompatibleWith,
208-
NS_OWL.priorVersion,
209-
NS_OWL.versionIRI,
210-
]:
211-
for ontology_triple in ontology_graph.triples(
212-
(None, n_ontology_predicate, None)
213-
):
214-
assert isinstance(ontology_triple[0], rdflib.URIRef)
215-
assert isinstance(ontology_triple[2], rdflib.URIRef)
216-
cdo_concepts.add(ontology_triple[0])
217-
cdo_concepts.add(ontology_triple[2])
218-
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
219-
if not isinstance(ontology_triple[0], rdflib.URIRef):
220-
continue
221-
cdo_concepts.add(ontology_triple[0])
222-
223-
# Also load historical ontology and version IRIs.
224-
ontology_and_version_iris_data = importlib.resources.read_text(
225-
case_utils.ontology, "ontology_and_version_iris.txt"
226-
)
227-
for line in ontology_and_version_iris_data.split("\n"):
228-
cleaned_line = line.strip()
229-
if cleaned_line == "":
230-
continue
231-
cdo_concepts.add(rdflib.URIRef(cleaned_line))
232-
233-
data_cdo_concepts: Set[rdflib.URIRef] = set()
234-
for data_triple in data_graph.triples((None, None, None)):
235-
for data_triple_member in data_triple:
236-
if isinstance(data_triple_member, rdflib.URIRef):
237-
if concept_is_cdo_concept(data_triple_member):
238-
data_cdo_concepts.add(data_triple_member)
239-
elif isinstance(data_triple_member, rdflib.Literal):
240-
if isinstance(data_triple_member.datatype, rdflib.URIRef):
241-
if concept_is_cdo_concept(data_triple_member.datatype):
242-
data_cdo_concepts.add(data_triple_member.datatype)
257+
# Get the list of undefined CDO concepts in the graph
258+
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph)
243259

244-
undefined_cdo_concepts = data_cdo_concepts - cdo_concepts
245260
for undefined_cdo_concept in sorted(undefined_cdo_concepts):
246261
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning)
247262
undefined_cdo_concepts_message = (

0 commit comments

Comments
 (0)