Skip to content

Commit

Permalink
Merge pull request #168 from BlueBrain/schemas
Browse files Browse the repository at this point in the history
Add schemas when registering / updating nexus resources
  • Loading branch information
AurelienJaquier authored Sep 12, 2024
2 parents 9c56159 + 9d17a3f commit eaa9e43
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 27 deletions.
3 changes: 3 additions & 0 deletions bluepyemodel/access_point/access_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ def store_emodels_sonata(
):
"""Store hoc file produced by export_sonata"""

def update_emodel_images(self, seed, keep_old_images=False):
"""Update an EModel resource with local emodel plots if access_point is nexus."""

def optimisation_state(self, seed=None, continue_opt=False):
"""Return the state of the optimisation.
Expand Down
53 changes: 40 additions & 13 deletions bluepyemodel/access_point/forge_access_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,33 @@ def resolve(self, text, scope="ontology", strategy="all", limit=1):

return self.forge.resolve(text, scope=scope, strategy=resolving_strategy, limit=limit)

def add_images_to_resource(self, images, resource, filters_existence=None):
"""Attach images to a resource.
Args:
images (list of str): list of local paths to images
resource (kgforge.core.Resource): resource to attach the images to
filters_existence (dict): contains resource type, name and metadata,
can be used to search for existence of resource on nexus.
Used to get image type if cannot be extracted from image path.
"""
resource = Dataset.from_resource(self.forge, resource, store_metadata=True)
if filters_existence is None:
filters_existence = {}
for path in images:
try:
resource_type = path.split("__")[-1].split(".")[0]
except IndexError:
resource_type = filters_existence.get("type", None)
# Do NOT do this BEFORE turning resource into a Dataset.
# That would break the storing LazyAction into a string
resource.add_image(
path=path,
content_type=f"application/{path.split('.')[-1]}",
about=resource_type,
)
return resource

def register(
self,
resource_description,
Expand All @@ -347,6 +374,7 @@ def register(
replace=False,
distributions=None,
images=None,
type_=None,
):
"""Register a resource from its dictionary description.
Expand All @@ -359,7 +387,9 @@ def register(
replace (bool): whether to replace resource if found with filters_existence
distributions (list): paths to resource object as json and other distributions
images (list): paths to images to be attached to the resource
type_ (str): type of the resource. Will be used to get the schemas.
"""
# pylint: disable=protected-access

if "type" not in resource_description:
raise AccessPointException("The resource description should contain 'type'.")
Expand Down Expand Up @@ -400,20 +430,16 @@ def register(
resource.add_distribution(path, content_type=f"application/{path.split('.')[-1]}")

if images:
for path in images:
try:
resource_type = path.split("__")[-1].split(".")[0]
except IndexError:
resource_type = filters_existence.get("type", None)
# Do NOT do this BEFORE turning resource into a Dataset.
# That would break the storing LazyAction into a string
resource.add_image(
path=path,
content_type=f"application/{path.split('.')[-1]}",
about=resource_type,
)
resource = self.add_images_to_resource(images, resource, filters_existence)

# validate with Entity schema at creation.
# validation with EModelWorkflow schema is done at a later step,
# when EModelWorkflow resource is complete
if type_ == "EModelWorkflow":
type_ = "Entity"
schema_id = self.forge._model.schema_id(type_)

self.forge.register(resource)
self.forge.register(resource, schema_id=schema_id)

def retrieve(self, id_):
"""Retrieve a resource based on its id"""
Expand Down Expand Up @@ -712,6 +738,7 @@ def object_to_nexus(
replace=replace,
distributions=distributions,
images=nexus_images,
type_=type_,
)

def update_distribution(self, resource, metadata_str, object_):
Expand Down
4 changes: 4 additions & 0 deletions bluepyemodel/access_point/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ def get_model_name_for_final(self, seed):

return f"{self.emodel_metadata.emodel}__{seed}"

def store_or_update_emodel(self, emodel):
"""Calls store_emodel."""
self.store_emodel(emodel)

def store_emodel(self, emodel):
"""Store an emodel obtained from BluePyOpt in the final.json. Note that if a model in the
final.json has the same key (emodel__iteration_tag__seed), it will be overwritten.
Expand Down
84 changes: 80 additions & 4 deletions bluepyemodel/access_point/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,14 @@ def get_nexus_subject(self, species):

return subject

def store_object(self, object_, seed=None, description=None, currents=None):
def store_object(
self, object_, seed=None, description=None, currents=None, is_analysis_suitable=False
):
"""Store a BPEM object on Nexus"""

metadata_dict = self.emodel_metadata_ontology.for_resource()
metadata_dict = self.emodel_metadata_ontology.for_resource(
is_analysis_suitable=is_analysis_suitable
)
if seed is not None:
metadata_dict["seed"] = seed
if description is not None:
Expand Down Expand Up @@ -673,6 +677,7 @@ def check_emodel_workflow_configurations(self, emodel_workflow):

def store_or_update_emodel_workflow(self, emodel_workflow):
"""If emodel workflow is not on nexus, store it. If it is, fetch it and update its state"""
# pylint: disable=protected-access
type_ = "EModelWorkflow"

filters = {"type": type_}
Expand All @@ -692,11 +697,13 @@ def store_or_update_emodel_workflow(self, emodel_workflow):
)
# if present on nexus -> update its state
else:
schema_type = "Entity"
resource = resources[0]
resource.state = emodel_workflow.state
ids_dict = emodel_workflow.get_related_nexus_ids()
if "generates" in ids_dict:
resource.generates = ids_dict["generates"]
schema_type = "EModelWorkflow"
if "hasPart" in ids_dict:
resource.hasPart = ids_dict["hasPart"]

Expand All @@ -705,7 +712,35 @@ def store_or_update_emodel_workflow(self, emodel_workflow):
resource, self.emodel_metadata.as_string(), emodel_workflow
)

self.access_point.forge.update(updated_resource)
schema_id = self.access_point.forge._model.schema_id(schema_type)
self.access_point.forge.update(updated_resource, schema_id=schema_id)

def update_emodel_images(self, seed, keep_old_images=False):
"""Update an EModel resource with local emodel plots."""
# pylint: disable=protected-access
type_ = "EModel"

filters = {"type": type_}
filters.update(self.emodel_metadata_ontology.filters_for_resource())
filters_legacy = {"type": type_}
filters_legacy.update(self.emodel_metadata_ontology.filters_for_resource_legacy())
filters["seed"] = int(seed)
filters_legacy["seed"] = int(seed)
resources = self.access_point.fetch_legacy_compatible(filters, filters_legacy)
if resources is None:
return
em_r = resources[0]
if not keep_old_images:
em_r.image = [] # remove any previous images

em = self.get_emodel(seed=seed)

em_r = self.access_point.add_images_to_resource(
em.as_dict()["nexus_images"], em_r, filters_existence=None
)
schema_id = self.access_point.forge._model.schema_id("EModel")

self.access_point.forge.update(em_r, schema_id=schema_id)

def get_emodel(self, seed=None):
"""Fetch an emodel"""
Expand All @@ -727,6 +762,35 @@ def get_emodel(self, seed=None):

return emodel

def store_or_update_emodel(self, emodel):
"""Update emodel if already present on nexus. If not, store it."""
# pylint: disable=protected-access
type_ = "EModel"

filters = {"type": type_}
filters.update(self.emodel_metadata_ontology.filters_for_resource())
filters_legacy = {"type": type_}
filters_legacy.update(self.emodel_metadata_ontology.filters_for_resource_legacy())
filters["seed"] = int(emodel.seed)
filters_legacy["seed"] = int(emodel.seed)
resources = self.access_point.fetch_legacy_compatible(filters, filters_legacy)

if resources is None:
self.store_emodel(emodel)
return

em_r = resources[0]
emodel_dict = emodel.as_dict()

em_r = self.access_point.add_images_to_resource(
emodel_dict["nexus_images"], em_r, filters_existence=None
)
self.access_point.update_distribution(em_r, self.emodel_metadata.as_string(), emodel)
em_r.score = emodel.fitness

schema_id = self.access_point.forge._model.schema_id("EModel")
self.access_point.forge.update(em_r, schema_id=schema_id)

def store_emodel(self, emodel, description=None):
"""Store an EModel on Nexus"""

Expand All @@ -738,7 +802,18 @@ def store_emodel(self, emodel, description=None):
)

emodel.workflow_id = nexus_id
self.store_object(emodel, seed=emodel.seed, description=description)
is_analysis_suitable = (
self.has_fitness_calculator_configuration
and self.has_model_configuration
and self.has_pipeline_settings
and self.has_targets_configuration
)
self.store_object(
emodel,
seed=emodel.seed,
description=description,
is_analysis_suitable=is_analysis_suitable,
)
# wait for the object to be uploaded and fetchable
time.sleep(self.sleep_time)

Expand Down Expand Up @@ -1256,6 +1331,7 @@ def store_morphology(self, morphology_name, morphology_path, mtype=None, reconst
self.access_point.register(
resource_description=payload,
distributions=[morphology_path],
type_="NeuronMorphology",
)

def store_hocs(
Expand Down
31 changes: 27 additions & 4 deletions bluepyemodel/emodel_pipeline/emodel_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,28 @@ def ttype_annotation_dict(self):
"name": "T-type annotation",
}

def annotation_list(self):
"""Returns an annotation list containing mtype, etype and ttype annotations"""
def annotation_list(self, is_analysis_suitable=False):
"""Returns an annotation list containing mtype, etype and ttype annotations.
Args:
is_analysis_suitable (bool): Should be True only when managing metatada for resources
of type EModel, for which all data are complete (has FCC, ETC, EMC, etc.).
"""
annotation_list = []
if is_analysis_suitable:
annotation_list.append(
{
"type": ["QualityAnnotation", "Annotation"],
"hasBody": {
"id": "https://bbp.epfl.ch/ontologies/core/bmo/AnalysisSuitable",
"type": ["AnnotationBody", "DataScope"],
"label": "Analysis Suitable",
},
"motivatedBy": {"id": "quality:Assessment", "type": "Motivation"},
"name": "Data usage scope annotation",
"note": "Analysis can be run on this model.",
}
)
if self.etype:
annotation_list.append(self.etype_annotation_dict())
if self.mtype:
Expand Down Expand Up @@ -182,15 +201,19 @@ def filters_for_resource_legacy(self):
"""Legacy metadata used for filtering, without the annotation list"""
return self.as_dict_for_resource_legacy()

def for_resource(self):
def for_resource(self, is_analysis_suitable=False):
"""Metadata to add to a resource to register.
DO NOT use for filtering. For filtering, use self.filters_for_resource() instead.
Args:
is_analysis_suitable (bool): Should be True only when managing metatada for resources
of type EModel, for which all data are complete (has FCC, ETC, EMC, etc.).
"""

metadata = self.as_dict_for_resource()

metadata["annotation"] = self.annotation_list()
metadata["annotation"] = self.annotation_list(is_analysis_suitable=is_analysis_suitable)

return metadata

Expand Down
9 changes: 9 additions & 0 deletions bluepyemodel/tasks/emodel_creation/optimisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import luigi

from bluepyemodel.access_point.access_point import OptimisationState
from bluepyemodel.access_point.nexus import NexusAccessPoint
from bluepyemodel.efeatures_extraction.efeatures_extraction import extract_save_features_protocols
from bluepyemodel.efeatures_extraction.targets_configurator import TargetsConfigurator
from bluepyemodel.emodel_pipeline.plotting import optimisation
Expand Down Expand Up @@ -1287,6 +1288,9 @@ def run(self):
sinespec_settings=sinespec_settings,
)

if isinstance(self.access_point, NexusAccessPoint):
self.access_point.update_emodel_images(seed=self.seed, keep_old_images=False)

def output(self):
""" """

Expand Down Expand Up @@ -1366,6 +1370,11 @@ def run(self):
only_validated=True,
)

if isinstance(self.access_point, NexusAccessPoint):
seeds = [emodel.seed for emodel in self.access_point.get_emodels()]
for seed in seeds:
self.access_point.update_emodel_images(seed=seed, keep_old_images=False)

def output(self):
""" """

Expand Down
2 changes: 1 addition & 1 deletion bluepyemodel/validation/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,6 @@ def validate(access_point, mapper, preselect_for_validation=False):
)
)

access_point.store_emodel(model)
access_point.store_or_update_emodel(model)

return emodels
11 changes: 6 additions & 5 deletions tests/unit_tests/test_emodelmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,12 @@ def test_ttype_annotation_dict(metadata):

def test_annotation_list(metadata):
"""Test annotation_list method."""
annotation_list = metadata.annotation_list()
assert len(annotation_list) == 3
assert annotation_list[0]["hasBody"]["label"] == "cAC"
assert annotation_list[1]["hasBody"]["label"] == "L5_TPC:B"
assert annotation_list[2]["hasBody"]["label"] == "245_L5 PT CTX"
annotation_list = metadata.annotation_list(is_analysis_suitable=True)
assert len(annotation_list) == 4
assert annotation_list[0]["hasBody"]["label"] == "Analysis Suitable"
assert annotation_list[1]["hasBody"]["label"] == "cAC"
assert annotation_list[2]["hasBody"]["label"] == "L5_TPC:B"
assert annotation_list[3]["hasBody"]["label"] == "245_L5 PT CTX"

# test when some of etype, mtype or ttype are None
metadata_args_2 = metadata_args.copy()
Expand Down

0 comments on commit eaa9e43

Please sign in to comment.