-
Notifications
You must be signed in to change notification settings - Fork 3
Library Usage #118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Library Usage #118
Changes from 18 commits
2df147a
f485161
b509dc5
8c40df1
f6d48e2
2d3a65b
bfe9992
86ed417
40be311
ae5f077
97d7fbb
64bd95c
4208a99
73e4683
d41a418
8f957dc
6f9b6c9
00f1360
15f00c9
eccaad4
90f5c8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,141 +32,90 @@ | |
__version__ = "0.3.0" | ||
|
||
import argparse | ||
import importlib.resources | ||
import logging | ||
import os | ||
import sys | ||
import warnings | ||
from typing import Dict, Set, Tuple, Union | ||
from typing import Any, Dict, List, Optional, Tuple, Union | ||
|
||
import pyshacl # type: ignore | ||
import rdflib | ||
from rdflib import Graph | ||
|
||
import case_utils.ontology | ||
from case_utils.case_validate.validate_types import ( | ||
NonExistentCDOConceptWarning, | ||
ValidationResult, | ||
) | ||
from case_utils.case_validate.validate_utils import ( | ||
get_invalid_cdo_concepts, | ||
get_ontology_graph, | ||
) | ||
from case_utils.ontology.version_info import ( | ||
CURRENT_CASE_VERSION, | ||
built_version_choices_list, | ||
) | ||
|
||
NS_OWL = rdflib.OWL | ||
NS_RDF = rdflib.RDF | ||
NS_RDFS = rdflib.RDFS | ||
NS_SH = rdflib.SH | ||
|
||
_logger = logging.getLogger(os.path.basename(__file__)) | ||
|
||
|
||
class NonExistentCDOConceptWarning(UserWarning): | ||
def validate( | ||
input_file: str, | ||
*args: Any, | ||
case_version: Optional[str] = None, | ||
supplemental_graphs: Optional[List[str]] = None, | ||
abort_on_first: bool = False, | ||
inference: Optional[str] = None, | ||
**kwargs: Any, | ||
) -> ValidationResult: | ||
""" | ||
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags. | ||
Validate the given data graph against the given CASE ontology version and supplemental graphs. | ||
:param *args: The positional arguments to pass to the underlying pyshacl.validate function. | ||
:param input_file: The path to the file containing the data graph to validate. | ||
:param case_version: The version of the CASE ontology to use (e.g. 1.2.0). If None, the most recent version will | ||
be used. | ||
:param supplemental_graphs: The supplemental graphs to use. If None, no supplemental graphs will be used. | ||
:param abort_on_first: Whether to abort on the first validation error. | ||
:param inference: The type of inference to use. If "none" (type str), no inference will be used. If None (type NoneType), pyshacl defaults will be used. Note that at the time of this writing (pySHACL 0.23.0), pyshacl defaults are no inferencing for the data graph, and RDFS inferencing for the SHACL graph, which for case_utils.validate includes the SHACL and OWL graphs. | ||
:param **kwargs: The keyword arguments to pass to the underlying pyshacl.validate function. | ||
:return: The validation result object containing the defined properties. | ||
""" | ||
# Convert the data graph string to a rdflib.Graph object. | ||
data_graph = rdflib.Graph() | ||
data_graph.parse(input_file) | ||
|
||
pass | ||
# Get the ontology graph from the case_version and supplemental_graphs arguments | ||
ontology_graph: Graph = get_ontology_graph(case_version, supplemental_graphs) | ||
|
||
# Get the undefined CDO concepts | ||
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph) | ||
|
||
def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool: | ||
concept_iri = str(n_concept) | ||
return concept_iri.startswith( | ||
"https://ontology.unifiedcyberontology.org/" | ||
) or concept_iri.startswith("https://ontology.caseontology.org/") | ||
# Validate data graph against ontology graph. | ||
validate_result: Tuple[ | ||
bool, Union[Exception, bytes, str, rdflib.Graph], str | ||
ajnelson-nist marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] = pyshacl.validate( | ||
data_graph, | ||
*args, | ||
shacl_graph=ontology_graph, | ||
ont_graph=ontology_graph, | ||
inference=inference, | ||
meta_shacl=False, | ||
abort_on_first=abort_on_first, | ||
allow_infos=False, | ||
allow_warnings=False, | ||
debug=False, | ||
do_owl_imports=False, | ||
**kwargs, | ||
) | ||
|
||
# Relieve RAM of the data graph after validation has run. | ||
del data_graph | ||
|
||
def get_invalid_cdo_concepts( | ||
data_graph: rdflib.Graph, ontology_graph: rdflib.Graph | ||
) -> Set[rdflib.URIRef]: | ||
""" | ||
Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument. | ||
|
||
:param data_graph: The data graph to validate. | ||
:param ontology_graph: The ontology graph to use for validation. | ||
:return: The list of concepts in the data graph that are not part of the CDO ontology. | ||
|
||
>>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE | ||
>>> from rdflib import Graph, Literal, Namespace, URIRef | ||
>>> # Define a namespace for a knowledge base, and a namespace for custom extensions. | ||
>>> ns_kb = Namespace("http://example.org/kb/") | ||
>>> ns_ex = Namespace("http://example.org/ontology/") | ||
>>> dg = Graph() | ||
>>> og = Graph() | ||
>>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property. | ||
>>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class)) | ||
>>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty)) | ||
>>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty)) | ||
>>> # Define an individual. | ||
>>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"] | ||
>>> n_uco_object | ||
rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c') | ||
>>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK. | ||
>>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject)) | ||
>>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test"))) | ||
>>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value"))) | ||
>>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og) | ||
>>> invalid_cdo_concepts | ||
{rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')} | ||
>>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported. | ||
>>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts | ||
""" | ||
# Construct set of CDO concepts for data graph concept-existence review. | ||
cdo_concepts: Set[rdflib.URIRef] = set() | ||
|
||
for n_structural_class in [ | ||
NS_OWL.Class, | ||
NS_OWL.AnnotationProperty, | ||
NS_OWL.DatatypeProperty, | ||
NS_OWL.ObjectProperty, | ||
NS_RDFS.Datatype, | ||
NS_SH.NodeShape, | ||
NS_SH.PropertyShape, | ||
NS_SH.Shape, | ||
]: | ||
for ontology_triple in ontology_graph.triples( | ||
(None, NS_RDF.type, n_structural_class) | ||
): | ||
if not isinstance(ontology_triple[0], rdflib.URIRef): | ||
continue | ||
if concept_is_cdo_concept(ontology_triple[0]): | ||
cdo_concepts.add(ontology_triple[0]) | ||
for n_ontology_predicate in [ | ||
NS_OWL.backwardCompatibleWith, | ||
NS_OWL.imports, | ||
NS_OWL.incompatibleWith, | ||
NS_OWL.priorVersion, | ||
NS_OWL.versionIRI, | ||
]: | ||
for ontology_triple in ontology_graph.triples( | ||
(None, n_ontology_predicate, None) | ||
): | ||
assert isinstance(ontology_triple[0], rdflib.URIRef) | ||
assert isinstance(ontology_triple[2], rdflib.URIRef) | ||
cdo_concepts.add(ontology_triple[0]) | ||
cdo_concepts.add(ontology_triple[2]) | ||
for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)): | ||
if not isinstance(ontology_triple[0], rdflib.URIRef): | ||
continue | ||
cdo_concepts.add(ontology_triple[0]) | ||
|
||
# Also load historical ontology and version IRIs. | ||
ontology_and_version_iris_data = importlib.resources.read_text( | ||
case_utils.ontology, "ontology_and_version_iris.txt" | ||
return ValidationResult( | ||
validate_result[0], | ||
validate_result[1], | ||
validate_result[2], | ||
undefined_cdo_concepts, | ||
) | ||
for line in ontology_and_version_iris_data.split("\n"): | ||
cleaned_line = line.strip() | ||
if cleaned_line == "": | ||
continue | ||
cdo_concepts.add(rdflib.URIRef(cleaned_line)) | ||
|
||
data_cdo_concepts: Set[rdflib.URIRef] = set() | ||
for data_triple in data_graph.triples((None, None, None)): | ||
for data_triple_member in data_triple: | ||
if isinstance(data_triple_member, rdflib.URIRef): | ||
if concept_is_cdo_concept(data_triple_member): | ||
data_cdo_concepts.add(data_triple_member) | ||
elif isinstance(data_triple_member, rdflib.Literal): | ||
if isinstance(data_triple_member.datatype, rdflib.URIRef): | ||
if concept_is_cdo_concept(data_triple_member.datatype): | ||
data_cdo_concepts.add(data_triple_member.datatype) | ||
|
||
return data_cdo_concepts - cdo_concepts | ||
|
||
|
||
def main() -> None: | ||
|
@@ -268,20 +217,13 @@ def main() -> None: | |
_logger.debug("in_graph = %r.", in_graph) | ||
data_graph.parse(in_graph) | ||
|
||
ontology_graph = rdflib.Graph() | ||
if args.built_version != "none": | ||
ttl_filename = args.built_version + ".ttl" | ||
_logger.debug("ttl_filename = %r.", ttl_filename) | ||
ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename) | ||
ontology_graph.parse(data=ttl_data, format="turtle") | ||
if args.ontology_graph: | ||
for arg_ontology_graph in args.ontology_graph: | ||
_logger.debug("arg_ontology_graph = %r.", arg_ontology_graph) | ||
ontology_graph.parse(arg_ontology_graph) | ||
# Get the ontology graph based on the CASE version and supplemental graphs specified by the CLI | ||
ontology_graph = get_ontology_graph( | ||
case_version=args.built_version, supplemental_graphs=args.ontology_graph | ||
) | ||
|
||
# Get the list of undefined CDO concepts in the graph | ||
undefined_cdo_concepts = get_invalid_cdo_concepts(data_graph, ontology_graph) | ||
|
||
for undefined_cdo_concept in sorted(undefined_cdo_concepts): | ||
warnings.warn(undefined_cdo_concept, NonExistentCDOConceptWarning) | ||
undefined_cdo_concepts_message = ( | ||
|
@@ -299,8 +241,9 @@ def main() -> None: | |
if args.format != "human": | ||
validator_kwargs["serialize_report_graph"] = args.format | ||
|
||
validate_result: Tuple[bool, Union[Exception, bytes, str, rdflib.Graph], str] | ||
validate_result = pyshacl.validate( | ||
validate_result: Tuple[ | ||
bool, Union[Exception, bytes, str, rdflib.Graph], str | ||
ajnelson-nist marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] = pyshacl.validate( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this call needs to be replaced with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is addressed in |
||
data_graph, | ||
shacl_graph=ontology_graph, | ||
ont_graph=ontology_graph, | ||
|
@@ -311,7 +254,7 @@ def main() -> None: | |
allow_warnings=True if args.allow_warnings else False, | ||
debug=True if args.debug else False, | ||
do_owl_imports=True if args.imports else False, | ||
**validator_kwargs | ||
**validator_kwargs, | ||
) | ||
|
||
# Relieve RAM of the data graph after validation has run. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from typing import Set, Union | ||
|
||
import rdflib | ||
|
||
|
||
class ValidationResult: | ||
def __init__( | ||
self, | ||
conforms: bool, | ||
graph: Union[Exception, bytes, str, rdflib.Graph], | ||
text: str, | ||
undefined_concepts: Set[rdflib.URIRef], | ||
) -> None: | ||
self.conforms = conforms | ||
self.graph = graph | ||
self.text = text | ||
self.undefined_concepts = undefined_concepts | ||
|
||
|
||
class NonExistentCDOConceptWarning(UserWarning): | ||
""" | ||
This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags. | ||
""" | ||
|
||
pass | ||
|
||
|
||
class NonExistentCASEVersionError(Exception): | ||
""" | ||
This class is used when an invalid CASE version is requested that is not supported by the library. | ||
""" | ||
|
||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For our awareness, this is a narrower argument than the first argument of
pyshacl.validate
; here's today's definition:https://github.com/RDFLib/pySHACL/blob/v0.23.0/pyshacl/validate.py#L369-L370
pyshacl.validate
's first argument seems to permit a string to be a file path or URL, OR a full string dump of a graph. See these lines for heuristics inpyshacl.rdfutil.load.load_from_graph
:https://github.com/RDFLib/pySHACL/blob/v0.23.0/pyshacl/rdfutil/load.py#L222-L227
Should we implement "
str
means path" now, or just adopt theload_from_graph
usage now from these lines:https://github.com/RDFLib/pySHACL/blob/v0.23.0/pyshacl/validate.py#L424-L428
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Welp, I found an issue pushing us towards expanding
input_file
fromstr
.case_validate
is written to take multiple input files as data graphs (as well as multiple input files as ontology graphs). I believe this behavior should be preserved, because otherwise a user that needs to read two data graphs at once needs to do some intermediary graph compilation before callingcase_validate
.So, I think the first argument needs to become at least either
Union[str, List[str]]
orUnion[str, Graph]
. The current code path from the CLI entry point I think favorsUnion[str, List[str]]
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this is addressed in
15f00c9
.