-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update dtype converter and add tests.
- Loading branch information
1 parent
9cf66f6
commit 2f7a3d6
Showing
3 changed files
with
116 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) | ||
# import scippnexus as snx | ||
import datetime | ||
import json | ||
import logging | ||
import pathlib | ||
from urllib.parse import urljoin | ||
|
||
|
@@ -13,6 +13,7 @@ | |
build_background_ingestor_arg_parser, | ||
build_scicat_background_ingester_config, | ||
) | ||
from scicat_dataset import convert_to_type | ||
from scicat_logging import build_logger | ||
from scicat_metadata import collect_schemas, select_applicable_schema | ||
from system_helpers import exit_at_exceptions | ||
|
@@ -23,24 +24,6 @@ def replace_variables_values(url: str, values: dict) -> str: | |
url = url.replace("{" + key + "}", str(value)) | ||
return url | ||
|
||
def convert_to_type(input_value, value_type: str) : | ||
output_value = None | ||
if value_type == "string": | ||
output_value = str(input_value) | ||
elif value_type == "string[]": | ||
output_value = [str(v) for v in input_value] | ||
elif value_type == "integer": | ||
output_value = int(input_value) | ||
elif value_type == "float": | ||
output_value = float(input_value) | ||
elif value_type == "date" and isinstance(input_value, int): | ||
output_value = datetime.datetime.fromtimestamp(input_value, tz=datetime.UTC).isoformat() | ||
elif value_type == "date" and isinstance(input_value, str): | ||
output_value = datetime.datetime.fromisoformat(input_value).isoformat() | ||
else | ||
raise Exception("Invalid value type") | ||
return output_value | ||
|
||
|
||
def extract_variables_values( | ||
variables: dict, h5file, config: BackgroundIngestorConfig | ||
|
@@ -82,90 +65,40 @@ def extract_variables_values( | |
else: | ||
raise Exception("Invalid variable source configuration") | ||
|
||
values[variable] = convert_to_type(value,variables[variable]["value_type"]) | ||
values[variable] = convert_to_type(value, variables[variable]["value_type"]) | ||
|
||
return values | ||
|
||
|
||
def prepare_scicat_dataset(metadata_schema, values): | ||
""" | ||
Prepare scicat dataset as dictionary ready to be sent over to scicat as a POST request | ||
This is an example: | ||
{ | ||
"pid": "20.500.12269/e3690b21-ee8c-40d6-9409-6b6fdca776d2", | ||
"datasetName": "this is a dataset", | ||
"description": "this is the description of the dataset", | ||
"principalInvestigator": "Massimiliano Novelli", | ||
"creationLocation": "ESS:CODA", | ||
"scientificMetadata": { | ||
"run_number": { | ||
"value": 18856, | ||
"unit": "", | ||
"human_name": "Run Number", | ||
"type": "integer" | ||
}, | ||
"sample_temperature": { | ||
"value": 20.4, | ||
"unit": "C", | ||
"human_name": "Sample Temperature", | ||
"type": "quantity" | ||
}, | ||
"start_time" : { | ||
"value" : "2024-07-16T09:30:12.987Z", | ||
"unit" : "", | ||
"human_name" : "Start Time", | ||
"type" : "date" | ||
} | ||
}, | ||
"owner": "Massimiliano Novelli", | ||
"ownerEmail": "[email protected]", | ||
"sourceFolder": "/ess/data/coda/2024/616254", | ||
"contactEmail": "[email protected]", | ||
"creationTime": "2024-07-16T10:00:00.000Z", | ||
"type": "raw", | ||
"techniques": [ | ||
{ | ||
"pid": "http://purl.org/pan-science/PaNET/PaNET01155", | ||
"names": "absorption and phase contrast nanotomography" | ||
} | ||
], | ||
"instrumentId": "20.500.12269/765b3dc3-f658-410e-b371-04dd1adcd520", | ||
"sampleId": "bd31725a-dbfd-4c32-87db-1c1ebe61e5ca", | ||
"proposalId": "616254", | ||
"ownerGroup": "ess_proposal_616254", | ||
"accessGroups": [ | ||
"scientific information management systems group" | ||
] | ||
} | ||
""" | ||
schema = metadata_schema["schema"] | ||
"""Prepare scicat dataset as dictionary ready to be ``POST``ed.""" | ||
schema: dict = metadata_schema["schema"] | ||
dataset = {} | ||
scientific_metadata = { | ||
'ingestor_metadata_schema_id' : { | ||
'ingestor_metadata_schema_id': { | ||
"value": metadata_schema["id"], | ||
"unit": "", | ||
"human_name": "Ingestor Metadata Schema Id", | ||
"type": "string" | ||
"type": "string", | ||
} | ||
} | ||
for key, field in schema.items(): | ||
for field in schema.values(): | ||
machine_name = field["machine_name"] | ||
field_type = field["type"] | ||
if field["field_type"] == "high_level": | ||
dataset[machine_name] = convert_to_type( | ||
replace_variables_values(field["value"],values), | ||
field_type | ||
replace_variables_values(field["value"], values), field_type | ||
) | ||
elif field["field_type"] == "scientific_metadata": | ||
scientific_metadata[machine_name] = { | ||
"value" : convert_to_type( | ||
replace_variables_values(field["value"],values), | ||
field_type | ||
"value": convert_to_type( | ||
replace_variables_values(field["value"], values), field_type | ||
), | ||
"unit" : "", | ||
"human_name" : field["human_name"] if "human_name" is in field.keys() and field["human_name"] else machine_name, | ||
"type" : field_type | ||
"unit": "", | ||
"human_name": field["human_name"] | ||
if field.get("human_name", None) | ||
else machine_name, | ||
"type": field_type, | ||
} | ||
else: | ||
raise Exception("Metadata schema field type invalid") | ||
|
@@ -175,7 +108,7 @@ def prepare_scicat_dataset(metadata_schema, values): | |
return dataset | ||
|
||
|
||
def create_scicat_dataset(dataset,config): | ||
def create_scicat_dataset(dataset: str, config: dict, logger: logging.Logger) -> dict: | ||
""" | ||
Execute a POST request to scicat to create a dataset | ||
""" | ||
|
@@ -191,19 +124,17 @@ def create_scicat_dataset(dataset,config): | |
|
||
result = response.json() | ||
if response.ok: | ||
|
||
... | ||
else: | ||
err = result.get("error", {}) | ||
raise Exception(f"Error creating new dataset: {err}") | ||
|
||
logger.info( | ||
"Dataset create successfully. Dataset pid: %s", | ||
result['pid'] | ||
) | ||
logger.info("Dataset create successfully. Dataset pid: %s", result['pid']) | ||
return result | ||
|
||
def prepare_files_list(nexus_file,done_writing_message_file,config): ... | ||
def prepare_scicat_origdatablock(files_list,config): ... | ||
|
||
def prepare_files_list(nexus_file, done_writing_message_file, config): ... | ||
def prepare_scicat_origdatablock(files_list, config): ... | ||
def create_scicat_origdatablock( | ||
scicat_dataset_pid, nexus_file=None, done_writing_message_file=None | ||
): ... | ||
|
@@ -254,15 +185,13 @@ def main() -> None: | |
) | ||
|
||
# create files list with b2blake hash of all the files | ||
files_list = prepare_files_list(nexus_file_path,done_writing_message_file,config) | ||
_ = prepare_files_list(nexus_file_path, done_writing_message_file, config) | ||
|
||
# create and populate scicat dataset entry | ||
scicat_dataset = prepare_scicat_dataset( | ||
metadata_schema, variables_values | ||
) | ||
scicat_dataset = prepare_scicat_dataset(metadata_schema, variables_values) | ||
|
||
# create dataset in scicat | ||
scicat_dataset = create_scicat_dataset(scicat_dataset,config) | ||
scicat_dataset = create_scicat_dataset(scicat_dataset, config) | ||
scicat_dataset_pid = scicat_dataset["pid"] | ||
|
||
# create and populate scicat origdatablock entry | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) | ||
import pytest | ||
from scicat_dataset import convert_to_type | ||
|
||
|
||
def test_dtype_string_converter() -> None: | ||
assert convert_to_type("test", "string") == "test" | ||
assert convert_to_type(123, "string") == "123" | ||
assert convert_to_type(123.456, "string") == "123.456" | ||
|
||
|
||
def test_dtype_string_array_converter() -> None: | ||
assert convert_to_type("test", "string[]") == ["t", "e", "s", "t"] | ||
assert convert_to_type([1, 2, 3], "string[]") == ["1", "2", "3"] | ||
assert convert_to_type([1.1, 2.2, 3.3], "string[]") == ["1.1", "2.2", "3.3"] | ||
|
||
|
||
def test_dtype_integer_converter() -> None: | ||
assert convert_to_type("123", "integer") == 123 | ||
assert convert_to_type(123, "integer") == 123 | ||
assert convert_to_type(123.456, "integer") == 123 | ||
|
||
|
||
def test_dtype_float_converter() -> None: | ||
assert convert_to_type("123.456", "float") == 123.456 | ||
assert convert_to_type(123, "float") == 123.0 | ||
assert convert_to_type(123.456, "float") == 123.456 | ||
|
||
|
||
def test_dtype_date_converter() -> None: | ||
import datetime | ||
|
||
test_datetime_isoformat = "1994-06-28T10:20:30+00:00" | ||
test_datetime = datetime.datetime.fromisoformat(test_datetime_isoformat) | ||
assert convert_to_type("1994-06-28T10:20:30Z", "date") == test_datetime_isoformat | ||
assert convert_to_type(test_datetime.timestamp(), "date") == test_datetime_isoformat | ||
assert convert_to_type(object(), "date") is None | ||
|
||
|
||
def test_dtype_converter_invalid_dtype_raises() -> None: | ||
with pytest.raises(ValueError, match="Invalid dtype description."): | ||
convert_to_type("test", "invalid_type") |