Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(validate-data): validate MovingImageRepresentation (DEV-4333) #1268

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1d78bb7
Update file_value_shacl.py
Nora-Olivia-Ammann Nov 12, 2024
16e0afa
add file value ttl
Nora-Olivia-Ammann Nov 12, 2024
9cc95ed
Update file_value_cardinalities.ttl
Nora-Olivia-Ammann Nov 12, 2024
190e715
rename test file
Nora-Olivia-Ammann Nov 12, 2024
93052cf
Update file_value_violation.xml
Nora-Olivia-Ammann Nov 12, 2024
3a03d5e
add sparql
Nora-Olivia-Ammann Nov 12, 2024
76c56fe
serialise file value
Nora-Olivia-Ammann Nov 12, 2024
13e11ab
Update file_value_shacl.py
Nora-Olivia-Ammann Nov 12, 2024
493ae84
Update validate_data.py
Nora-Olivia-Ammann Nov 12, 2024
170fbd2
change file value turtle
Nora-Olivia-Ammann Nov 12, 2024
d6fb91e
update ttl
Nora-Olivia-Ammann Nov 12, 2024
04d97dc
Update file_value_cardinalities.ttl
Nora-Olivia-Ammann Nov 12, 2024
cd24fd4
remove gneric
Nora-Olivia-Ammann Nov 12, 2024
1742690
Update file_value_shacl.py
Nora-Olivia-Ammann Nov 12, 2024
0e991f3
Update validation_result.py
Nora-Olivia-Ammann Nov 12, 2024
37d7104
Update test_file_value_shacl.py
Nora-Olivia-Ammann Nov 13, 2024
f79b1cb
fix test
Nora-Olivia-Ammann Nov 13, 2024
460f699
fix file value
Nora-Olivia-Ammann Nov 13, 2024
0311ac6
Update test_make_data_rdf.py
Nora-Olivia-Ammann Nov 13, 2024
036a2e1
rename
Nora-Olivia-Ammann Nov 13, 2024
cb08c97
Update test_validate_data.py
Nora-Olivia-Ammann Nov 13, 2024
b398cdb
Merge branch 'main' into wip/dev-4333-validate-data-add-video-filevalue
Nora-Olivia-Ammann Nov 13, 2024
709c087
Update test_file_value_shacl.py
Nora-Olivia-Ammann Nov 13, 2024
23e9191
Merge branch 'wip/dev-4333-validate-data-add-video-filevalue' of http…
Nora-Olivia-Ammann Nov 13, 2024
0c38f43
fix test
Nora-Olivia-Ammann Nov 13, 2024
d1164cc
Update validate_data.py
Nora-Olivia-Ammann Nov 13, 2024
845aea5
remove generif file value
Nora-Olivia-Ammann Nov 13, 2024
9ea71b9
Merge branch 'main' into wip/dev-4333-validate-data-add-video-filevalue
Nora-Olivia-Ammann Nov 14, 2024
231fdc8
Merge branch 'main' into wip/dev-4333-validate-data-add-video-filevalue
Nora-Olivia-Ammann Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions src/dsp_tools/commands/validate_data/make_data_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
from dsp_tools.commands.validate_data.models.data_rdf import DataRDF
from dsp_tools.commands.validate_data.models.data_rdf import DateValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import DecimalValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import GenericFileValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import GeonameValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import IntValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import LinkValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import ListValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import MovingImageFileValueRDF
from dsp_tools.commands.validate_data.models.data_rdf import RDFTriples
from dsp_tools.commands.validate_data.models.data_rdf import ResourceRDF
from dsp_tools.commands.validate_data.models.data_rdf import RichtextRDF
Expand All @@ -65,7 +65,9 @@ def make_data_rdf(data_deserialised: DataDeserialised) -> DataRDF:
all_triples: list[RDFTriples] = []
for r in data_deserialised.resources:
all_triples.extend(_transform_one_resource(r))
file_values: list[RDFTriples] = [_transform_file_value(x) for x in data_deserialised.file_values]
file_values: list[RDFTriples] = [
transformed for x in data_deserialised.file_values if (transformed := _transform_file_value(x))
]
all_triples.extend(file_values)
return DataRDF(all_triples)

Expand Down Expand Up @@ -205,19 +207,25 @@ def _transform_uri_value(val: ValueDeserialised, res_iri: URIRef) -> ValueRDF:
return UriValueRDF(URIRef(val.prop_name), content, res_iri)


def _transform_file_value(val: AbstractFileValueDeserialised) -> AbstractFileValueRDF:
res_iri = DATA[val.res_id]
def _transform_file_value(val: AbstractFileValueDeserialised) -> AbstractFileValueRDF | None:
if isinstance(val, IIIFUriDeserialised):
return GenericFileValueRDF(res_iri=res_iri, value=Literal(str(val.value)))
return None
return _map_into_correct_file_value(val)


def _map_into_correct_file_value(val: AbstractFileValueDeserialised) -> AbstractFileValueRDF | None:
res_iri = DATA[val.res_id]
file_literal = Literal(val.value)
file_extension = _get_file_extension(val.value)
return GenericFileValueRDF(res_iri=res_iri, value=Literal(file_extension))
match file_extension:
case "mp4":
return MovingImageFileValueRDF(res_iri=res_iri, value=file_literal)
case _:
return None


def _get_file_extension(value: str | None) -> str:
file_extension = "No file path was given."
if value:
if "." not in value:
file_extension = f"This file is missing a valid extension, actual value: {value}"
else:
file_extension = value.split(".")[-1].lower()
file_extension = ""
if value and "." in value:
file_extension = value.split(".")[-1].lower()
return file_extension
10 changes: 4 additions & 6 deletions src/dsp_tools/commands/validate_data/models/data_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,11 @@ def make_graph(self) -> Graph:


@dataclass
class GenericFileValueRDF(AbstractFileValueRDF):
"""This class is a placeholder for all types of file values that are not yet implemented."""

class MovingImageFileValueRDF(AbstractFileValueRDF):
def make_graph(self) -> Graph:
g = Graph()
val_iri = DATA[str(uuid4())]
g.add((val_iri, RDF.type, API_SHAPES.GenericFileValue))
g.add((val_iri, API_SHAPES.fileValueHasValue, self.value))
g.add((self.res_iri, API_SHAPES.hasGenericFileValue, val_iri))
g.add((val_iri, RDF.type, KNORA_API.MovingImageFileValue))
g.add((val_iri, KNORA_API.fileValueHasFilename, self.value))
g.add((self.res_iri, KNORA_API.hasMovingImageFileValue, val_iri))
return g
4 changes: 2 additions & 2 deletions src/dsp_tools/commands/validate_data/models/input_problems.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,12 @@ def sort_value(self) -> str:


@dataclass
class MissingFileValueProblem(InputProblem):
class FileValueProblem(InputProblem):
expected: str

@property
def problem(self) -> str:
return "Required file value is missing"
return self.expected

def get_msg(self) -> str:
return f"{self.problem}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
from dsp_tools.commands.validate_data.models.input_problems import AllProblems
from dsp_tools.commands.validate_data.models.input_problems import ContentRegexProblem
from dsp_tools.commands.validate_data.models.input_problems import DuplicateValueProblem
from dsp_tools.commands.validate_data.models.input_problems import FileValueProblem
from dsp_tools.commands.validate_data.models.input_problems import GenericProblem
from dsp_tools.commands.validate_data.models.input_problems import InputProblem
from dsp_tools.commands.validate_data.models.input_problems import LinkedResourceDoesNotExistProblem
from dsp_tools.commands.validate_data.models.input_problems import LinkTargetTypeMismatchProblem
from dsp_tools.commands.validate_data.models.input_problems import MaxCardinalityProblem
from dsp_tools.commands.validate_data.models.input_problems import MinCardinalityProblem
from dsp_tools.commands.validate_data.models.input_problems import MissingFileValueProblem
from dsp_tools.commands.validate_data.models.input_problems import NonExistentCardinalityProblem
from dsp_tools.commands.validate_data.models.input_problems import UnexpectedResults
from dsp_tools.commands.validate_data.models.input_problems import ValueTypeProblem
Expand Down Expand Up @@ -358,9 +358,9 @@ def _reformat_one_validation_result(validation_result: ValidationResult) -> Inpu

def _reformat_min_cardinality_validation_result(validation_result: ResultMinCardinalityViolation) -> InputProblem:
iris = _reformat_main_iris(validation_result)
file_value_properties = ["shapes:hasGenericFileValue"]
file_value_properties = ["hasMovingImageFileValue"]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in the previous PR I want to change the identification, I will likely move it up to the query section. But that touches a lot of other code that has nothing to do with this, so I would like to do that separately.

if iris.prop_name in file_value_properties:
return MissingFileValueProblem(
return FileValueProblem(
res_id=iris.res_id,
res_type=iris.res_type,
prop_name="bitstream / iiif-uri",
Expand Down
32 changes: 12 additions & 20 deletions src/dsp_tools/commands/validate_data/sparql/file_value_shacl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@ def construct_file_value_cardinality(onto: Graph) -> Graph:
Returns:
Graph with file cardinalities
"""
val_prop_mapper = {"MovingImageRepresentation": "hasMovingImageFileValue"}

def as_class_type_and_shacl_shape(cls_name: str) -> tuple[str, str]:
return f"knora-api:{cls_name}", f"api-shapes:{val_prop_mapper[cls_name]}_PropShape"

g = Graph()
g += _construct_generic_file_value_cardinality(onto)
for t in val_prop_mapper.keys():
representation_type, shacl_shape = as_class_type_and_shacl_shape(t)
g += _construct_one_representation_shape(onto, representation_type, shacl_shape)
return g


def _construct_generic_file_value_cardinality(onto: Graph) -> Graph:
def _construct_one_representation_shape(onto: Graph, representation_type: str, shacl_shape: str) -> Graph:
query_s = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sh: <http://www.w3.org/ns/shacl#>
Expand All @@ -30,27 +37,12 @@ def _construct_generic_file_value_cardinality(onto: Graph) -> Graph:
PREFIX dash: <http://datashapes.org/dash#>

CONSTRUCT {
?class sh:property [
a sh:PropertyShape ;
sh:path api-shapes:hasGenericFileValue ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message "A file is required for this resource" ;
] .
?class sh:property %(shacl_shape)s .
} WHERE {
?class a owl:Class ;
rdfs:subClassOf ?superClass .
VALUES ?superClass {
knora-api:ArchiveRepresentation
knora-api:AudioRepresentation
knora-api:DocumentRepresentation
knora-api:MovingImageRepresentation
knora-api:StillImageRepresentation
knora-api:TextRepresentation
}
rdfs:subClassOf %(representation_type)s .
}
"""
""" % {"representation_type": representation_type, "shacl_shape": shacl_shape} # noqa: UP031 (printf-string-formatting)
if results_graph := onto.query(query_s).graph:
return results_graph
return Graph()
7 changes: 5 additions & 2 deletions src/dsp_tools/commands/validate_data/validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def _inform_about_experimental_feature() -> None:
"The following information of your data is being validated:",
"Cardinalities",
"If the value type used matches the ontology",
"Content of the values",
]
cprint(LIST_SEPARATOR.join(what_is_validated), color="magenta", attrs=["bold"])

Expand Down Expand Up @@ -117,7 +118,6 @@ def _get_all_onto_classes(ontos: Graph) -> tuple[set[str], set[str]]:
is_value_iri = URIRef(KNORA_API + "isValueClass")
value_classes = set(ontos.subjects(is_value_iri, Literal(True)))
value_cls = {reformat_onto_iri(x) for x in value_classes}
value_cls.add("shapes:GenericFileValue")
return res_cls, value_cls


Expand All @@ -144,12 +144,15 @@ def _create_graphs(onto_client: OntologyClient, list_client: ListClient, data_rd
shapes = construct_shapes_graphs(onto_for_construction, all_lists)
api_shapes = Graph()
api_shapes.parse("src/dsp_tools/resources/validate_data/api-shapes.ttl")
file_shapes = Graph()
file_shapes.parse("src/dsp_tools/resources/validate_data/file_value_cardinalities.ttl")
content_shapes = shapes.content + api_shapes
card_shapes = shapes.cardinality + file_shapes
data = data_rdf.make_graph()
return RDFGraphs(
data=data,
ontos=ontologies,
cardinality_shapes=shapes.cardinality,
cardinality_shapes=card_shapes,
content_shapes=content_shapes,
knora_api=knora_api,
)
Expand Down
49 changes: 0 additions & 49 deletions src/dsp_tools/resources/validate_data/api-shapes.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -254,55 +254,6 @@ api-shapes:UriValue_ClassShape
#########################################


#########################################
# RESOURCES WITH FILES
#########################################

###########################
# ArchiveRepresentation
###########################

# knora-api:ArchiveFileValue -> knora-api:hasArchiveFileValue


###########################
# AudioRepresentation
###########################

# knora-api:AudioFileValue -> knora-api:hasAudioFileValue


###########################
# DocumentRepresentation
###########################

# knora-api:DocumentFileValue -> knora-api:hasDocumentFileValue


###########################
# MovingImageRepresentation
###########################

# knora-api:MovingImageFileValue -> knora-api:hasMovingImageFileValue


###########################
# StillImageRepresentation
###########################

# knora-api:StillImageExternalFileValue -> knora-api:fileValueHasExternalUrl


# knora-api:StillImageFileValue -> knora-api:hasStillImageFileValue


###########################
# TextRepresentation
###########################

# knora-api:TextFileValue -> knora-api:hasTextFileValue


#########################################
# DSP BUILT IN RESOURCES
#########################################
Expand Down
77 changes: 77 additions & 0 deletions src/dsp_tools/resources/validate_data/file_value_cardinalities.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix knora-api: <http://api.knora.org/ontology/knora-api/v2#> .

@prefix api-shapes: <http://api.knora.org/ontology/knora-api/shapes/v2#> .


#########################################
# RESOURCES WITH FILES
#########################################

###########################
# ArchiveRepresentation
###########################

# knora-api:ArchiveFileValue -> knora-api:hasArchiveFileValue


###########################
# AudioRepresentation
###########################

# knora-api:AudioFileValue -> knora-api:hasAudioFileValue


###########################
# DocumentRepresentation
###########################

# knora-api:DocumentFileValue -> knora-api:hasDocumentFileValue


###########################
# MovingImageRepresentation
###########################

api-shapes:hasMovingImageFileValue_PropShape
a sh:PropertyShape ;
sh:path knora-api:hasMovingImageFileValue ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:message "A MovingImageRepresentation requires a file with the extension 'mp4'." ;
sh:node api-shapes:MovingImageFileValue_ClassShape .

api-shapes:MovingImageFileValue_ClassShape
a sh:NodeShape ;
sh:name "Validates the class type" ;
sh:message "MovingImageFileValue" ;
sh:class knora-api:MovingImageFileValue ;
sh:property [
a sh:PropertyShape ;
sh:path knora-api:fileValueHasFilename ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:pattern ".+\\.mp4$" ;
sh:severity sh:Violation ;
sh:message "A MovingImageRepresentation requires a file with the extension 'mp4'." ;
] ;
sh:severity sh:Violation .

###########################
# StillImageRepresentation
###########################

# knora-api:StillImageExternalFileValue -> knora-api:fileValueHasExternalUrl


# knora-api:StillImageFileValue -> knora-api:hasStillImageFileValue


###########################
# TextRepresentation
###########################

# knora-api:TextFileValue -> knora-api:hasTextFileValue
Loading