Skip to content

Commit

Permalink
WIP: updated schema configuration, implementation of values extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
nitrosx committed Jul 15, 2024
1 parent 0ee0acc commit ff22d59
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,74 @@
"selector": "filename:starts_with:/ess/data/coda",
"variables" : {
"pid": {
"source": "NXS:/entry/entry_identifier_uuid",
"type": "string"
"source": "NXS",
"path": "/entry/entry_identifier_uuid",
"value_type": "string"
},
"proposal_id": {
"source": "NXS:/entry/experiment_identifier",
"type": "string",
"source": "NXS",
"path": ""/entry/experiment_identifier",
"value_type": "string",
},
"pi_firstname": {
"source": "SC:proposals/<proposal_id>:pi_firstname",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field" : "pi_firstname",
"value_type": "string"
},
"pi_lastname": {
"source": "SC:proposals/<proposal_id>:pi_lastname",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field": ":pi_lastname",
"value_type": "string"
},
"pi_email": {
"source": "SC:proposals/<proposal_id>:pi_email",
"type": "string"
"source": "SC",
"url": "proposals/<proposal_id>",
"field": "pi_email",
"value_type": "string"
},
"dataset_name": {
"source": "NXS:/entry/title",
"type": "string"
"source": "NXS"
"path": ""/entry/title",
"value_type": "string"
},
"instrument_name": {
"source": "NXS:/entry/instrument/name",
"type": "string",
"source": "NXS",
"path": ""/entry/instrument/name",
"value_type": "string",
},
"instrument_id": {
"source": "SC:instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D:id",
"type": "string"
"source": "SC",
"url": "instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D"
"field": "id",
"value_type": "string"
},
"start_time": {
"source": "NXS:/entry/start_time",
"type": "date"
"source": "NXS",
"path": ""/entry/start_time",
"value_type": "date"
},
"end_time": {
"source": "NXS:/entry/end_time",
"type": "date"
"source": "NXS",
"path": "/entry/end_time",
"value_type": "date"
},
"run_number": {
"source": "NXS:/entry/entry_identifier",
"type": "integer"
"source": "NXS",
"path": ""/entry/entry_identifier",
"value_type": "integer"
},
"acquisition_team_members_list": {
"source": "NXS:/entry/user_*/name",
"type": "string[]"
"source": "NXS",
"path" : "/entry/user_*/name",
"value_type": "string[]"
}
"acquisition_team_members": {
"source": "VALUE:join_with_space:<acquisition_team_members>",
"type": "string"
"source": "VALUE",
"operator" : "join_with_space"
"value" : "<acquisition_team_members>",
"value_type": "string"
}
},
"schema": {
Expand Down
37 changes: 19 additions & 18 deletions src/background_ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ def extract_variables_values(

# loop on all the variables defined
for variable in variables.keys():
source = variables[variable]["source"].split(":")
source = variables[variable]["source"]
value = ""
if source[0] == "NXS":
if source == "NXS":
# extract value from nexus file
# we need to address path entry/user_*/name
value = h5file[source[1]][...]
elif source[0] == "SC":
value = h5file[variables[variable]["path"]][...]
elif source == "SC":
# build url
url = replace_variables_values(
config[""]["scicat_url"] + source[1],
config[""]["scicat_url"] + variables[variable]["url"],
values
)
# retrieve value from SciCat
Expand All @@ -86,32 +86,33 @@ def extract_variables_values(
}
)
# extract value
value = response.json()[source[2]]
elif source[0] == "VALUE":
value = response.json()[variables[variable]["field"]]
elif source == "VALUE":
# the value is the one indicated
# there might be some substitution needed
value = replace_variables_values(
source[2],
variables[variable]["value"],
values
)
if source[1] == "":
pass
elif source[1] == "join_with_space":
value = ", ".join(value)
if "operator" in variables[variable].keys() and variables[variable]["operator"]:
operator = variables[variable]["operator"]
if operator == "join_with_space":
value = ", ".join(value)
else:
raise Exception("Invalid variable source configuration")

if variables[variable]["type"] == "string":
value_type = variables[variable]["value_type"]
if value_type == "string":
value = str(value)
elif variables[variable]["type"] == "string[]":
elif value_type == "string[]":
value = [str(v) for v in value]
elif variables[variable]["type"] == "integer":
elif value_type == "integer":
value = int(value)
elif variables[variable]["type"] == "float":
elif value_type == "float":
value = float(value)
elif variables[variable]["type"] == "date" and isinstance(value,int):
elif value_type == "date" and isinstance(value,int):
value = datetime.datetime.fromtimestamp(value).isoformat()
elif variables[variable]["type"] == "date" and isinstance(value,str):
elif value_type == "date" and isinstance(value,str):
value = datetime.datetime.fromisoformat(value).isoformat()

values[variable] = value
Expand Down

0 comments on commit ff22d59

Please sign in to comment.