From ff22d59cfd58c9ea3da48699245a81800e4b0ab3 Mon Sep 17 00:00:00 2001 From: Max Novelli Date: Mon, 15 Jul 2024 11:01:41 +0200 Subject: [PATCH] WIP: updated schema configuration, implementation of values extraction --- ...on.json.example => coda.imsc.json.example} | 70 ++++++++++++------- src/background_ingestor.py | 37 +++++----- 2 files changed, 63 insertions(+), 44 deletions(-) rename resources/{coda.configuration.json.example => coda.imsc.json.example} (67%) diff --git a/resources/coda.configuration.json.example b/resources/coda.imsc.json.example similarity index 67% rename from resources/coda.configuration.json.example rename to resources/coda.imsc.json.example index b9995a8..abb5934 100644 --- a/resources/coda.configuration.json.example +++ b/resources/coda.imsc.json.example @@ -5,56 +5,74 @@ "selector": "filename:starts_with:/ess/data/coda", "variables" : { "pid": { - "source": "NXS:/entry/entry_identifier_uuid", - "type": "string" + "source": "NXS", + "path": "/entry/entry_identifier_uuid", + "value_type": "string" }, "proposal_id": { - "source": "NXS:/entry/experiment_identifier", - "type": "string", + "source": "NXS", + "path": ""/entry/experiment_identifier", + "value_type": "string", }, "pi_firstname": { - "source": "SC:proposals/:pi_firstname", - "type": "string" + "source": "SC", + "url": "proposals/", + "field" : "pi_firstname", + "value_type": "string" }, "pi_lastname": { - "source": "SC:proposals/:pi_lastname", - "type": "string" + "source": "SC", + "url": "proposals/", + "field": ":pi_lastname", + "value_type": "string" }, "pi_email": { - "source": "SC:proposals/:pi_email", - "type": "string" + "source": "SC", + "url": "proposals/", + "field": "pi_email", + "value_type": "string" }, "dataset_name": { - "source": "NXS:/entry/title", - "type": "string" + "source": "NXS" + "path": ""/entry/title", + "value_type": "string" }, "instrument_name": { - "source": "NXS:/entry/instrument/name", - "type": "string", + "source": "NXS", + "path": ""/entry/instrument/name", + "value_type": "string", }, "instrument_id": { - "source": "SC:instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D:id", - "type": "string" + "source": "SC", + "url": "instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D" + "field": "id", + "value_type": "string" }, "start_time": { - "source": "NXS:/entry/start_time", - "type": "date" + "source": "NXS", + "path": ""/entry/start_time", + "value_type": "date" }, "end_time": { - "source": "NXS:/entry/end_time", - "type": "date" + "source": "NXS", + "path": "/entry/end_time", + "value_type": "date" }, "run_number": { - "source": "NXS:/entry/entry_identifier", - "type": "integer" + "source": "NXS", + "path": ""/entry/entry_identifier", + "value_type": "integer" }, "acquisition_team_members_list": { - "source": "NXS:/entry/user_*/name", - "type": "string[]" + "source": "NXS", + "path" : "/entry/user_*/name", + "value_type": "string[]" } "acquisition_team_members": { - "source": "VALUE:join_with_space:", - "type": "string" + "source": "VALUE", + "operator" : "join_with_space" + "value" : "", + "value_type": "string" } }, "schema": { diff --git a/src/background_ingestor.py b/src/background_ingestor.py index ef286f3..e2736be 100644 --- a/src/background_ingestor.py +++ b/src/background_ingestor.py @@ -66,16 +66,16 @@ def extract_variables_values( # loop on all the variables defined for variable in variables.keys(): - source = variables[variable]["source"].split(":") + source = variables[variable]["source"] value = "" - if source[0] == "NXS": + if source == "NXS": # extract value from nexus file # we need to address path entry/user_*/name - value = h5file[source[1]][...] - elif source[0] == "SC": + value = h5file[variables[variable]["path"]][...] + elif source == "SC": # build url url = replace_variables_values( - config[""]["scicat_url"] + source[1], + config[""]["scicat_url"] + variables[variable]["url"], values ) # retrieve value from SciCat @@ -86,32 +86,33 @@ def extract_variables_values( } ) # extract value - value = response.json()[source[2]] - elif source[0] == "VALUE": + value = response.json()[variables[variable]["field"]] + elif source == "VALUE": # the value is the one indicated # there might be some substitution needed value = replace_variables_values( - source[2], + variables[variable]["value"], values ) - if source[1] == "": - pass - elif source[1] == "join_with_space": - value = ", ".join(value) + if "operator" in variables[variable].keys() and variables[variable]["operator"]: + operator = variables[variable]["operator"] + if operator == "join_with_space": + value = ", ".join(value) else: raise Exception("Invalid variable source configuration") - if variables[variable]["type"] == "string": + value_type = variables[variable]["value_type"] + if value_type == "string": value = str(value) - elif variables[variable]["type"] == "string[]": + elif value_type == "string[]": value = [str(v) for v in value] - elif variables[variable]["type"] == "integer": + elif value_type == "integer": value = int(value) - elif variables[variable]["type"] == "float": + elif value_type == "float": value = float(value) - elif variables[variable]["type"] == "date" and isinstance(value,int): + elif value_type == "date" and isinstance(value,int): value = datetime.datetime.fromtimestamp(value).isoformat() - elif variables[variable]["type"] == "date" and isinstance(value,str): + elif value_type == "date" and isinstance(value,str): value = datetime.datetime.fromisoformat(value).isoformat() values[variable] = value