From 0383352cab7331e99e5917d310c7235194eb2bef Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:05:41 +0200 Subject: [PATCH 1/7] Add mapping between model types --- src/scitacean/_base_model.py | 44 +++++- src/scitacean/model.py | 144 ++++++++++++++++++ .../model-generation/templates/model.py.jinja | 34 ++++- 3 files changed, 218 insertions(+), 4 deletions(-) diff --git a/src/scitacean/_base_model.py b/src/scitacean/_base_model.py index a57110d6..fed457c8 100644 --- a/src/scitacean/_base_model.py +++ b/src/scitacean/_base_model.py @@ -86,6 +86,31 @@ def _delete_ignored_args(self, args: Dict[str, Any]) -> None: for key in self._masked_fields: args.pop(key, None) + @classmethod + def user_model_type(cls) -> Optional[type]: + """Return the user model type for this model. + + Returns ``None`` if there is no user model, e.g., for ``Dataset`` + where there is a custom class instead of a plain model. + """ + return None + + @classmethod + def upload_model_type(cls) -> Optional[type]: + """Return the upload model type for this model. + + Returns ``None`` if the model cannot be uploaded or this is an upload model. + """ + return None + + @classmethod + def download_model_type(cls) -> Optional[type]: + """Return the download model type for this model. + + Returns ``None`` if this is a download model. + """ + return None + if is_pydantic_v1(): @classmethod @@ -143,7 +168,23 @@ def _upload_model_dict(self) -> Dict[str, Any]: @classmethod def from_download_model(cls, download_model: Any) -> BaseUserModel: - raise NotImplementedError() + raise NotImplementedError("Function does not exist for BaseUserModel") + + @classmethod + def upload_model_type(cls) -> Optional[type]: + """Return the upload model type for this user model. + + Returns ``None`` if the model cannot be uploaded. + """ + return None + + @classmethod + def download_model_type(cls) -> type: + """Return the download model type for this user model.""" + # There is no sensible default value here as there always exists a download + # model. + # All child classes must implement this function. + raise NotImplementedError("Function does not exist for BaseUserModel") def construct( @@ -159,6 +200,7 @@ def construct( ------- If the model is created without validation, no fields will be converted to their proper type but will simply be whatever arguments are passed. + See ``model_construct`` or :class:`pydantic.BaseModel` for more information. A warning will be emitted in this case. diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 54b67de2..f3f5462c 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -288,6 +288,14 @@ class DownloadAttachment(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def user_model_type(cls) -> Optional[type]: + return Attachment + + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadAttachment + class UploadAttachment(BaseModel): caption: str @@ -300,6 +308,14 @@ class UploadAttachment(BaseModel): sampleId: Optional[str] = None thumbnail: Optional[str] = None + @classmethod + def user_model_type(cls) -> Optional[type]: + return Attachment + + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadAttachment + class DownloadOrigDatablock(BaseModel): dataFileList: Optional[List[DownloadDataFile]] = None @@ -319,6 +335,10 @@ class DownloadOrigDatablock(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadOrigDatablock + class UploadOrigDatablock(BaseModel): dataFileList: List[UploadDataFile] @@ -329,6 +349,10 @@ class UploadOrigDatablock(BaseModel): chkAlg: Optional[str] = None instrumentGroup: Optional[str] = None + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadOrigDatablock + class DownloadDatablock(BaseModel): archiveId: Optional[str] = None @@ -351,6 +375,10 @@ class DownloadDatablock(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadDatablock + class UploadDatablock(BaseModel): archiveId: str @@ -360,6 +388,10 @@ class UploadDatablock(BaseModel): version: str chkAlg: Optional[str] = None + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadDatablock + class DownloadLifecycle(BaseModel): archivable: Optional[bool] = None @@ -387,26 +419,62 @@ class DownloadLifecycle(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def user_model_type(cls) -> Optional[type]: + return Lifecycle + class DownloadTechnique(BaseModel): name: Optional[str] = None pid: Optional[str] = None + @classmethod + def user_model_type(cls) -> Optional[type]: + return Technique + + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadTechnique + class UploadTechnique(BaseModel): name: str pid: str + @classmethod + def user_model_type(cls) -> Optional[type]: + return Technique + + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadTechnique + class DownloadRelationship(BaseModel): pid: Optional[PID] = None relationship: Optional[str] = None + @classmethod + def user_model_type(cls) -> Optional[type]: + return Relationship + + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadRelationship + class UploadRelationship(BaseModel): pid: PID relationship: str + @classmethod + def user_model_type(cls) -> Optional[type]: + return Relationship + + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadRelationship + class DownloadHistory(BaseModel): id: Optional[str] = pydantic.Field(alias="_id", default=None) @@ -417,6 +485,10 @@ class DownloadHistory(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def user_model_type(cls) -> Optional[type]: + return History + class DownloadDataFile(BaseModel): path: Optional[str] = None @@ -431,6 +503,10 @@ class DownloadDataFile(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadDataFile + class UploadDataFile(BaseModel): path: str @@ -445,6 +521,10 @@ class UploadDataFile(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadDataFile + class DownloadInstrument(BaseModel): customMetadata: Optional[Dict[str, Any]] = None @@ -452,6 +532,10 @@ class DownloadInstrument(BaseModel): pid: Optional[str] = None uniqueName: Optional[str] = None + @classmethod + def user_model_type(cls) -> Optional[type]: + return Instrument + class DownloadSample(BaseModel): ownerGroup: Optional[str] = None @@ -471,6 +555,14 @@ class DownloadSample(BaseModel): def _validate_datetime(cls, value: Any) -> Any: return validate_datetime(value) + @classmethod + def user_model_type(cls) -> Optional[type]: + return Sample + + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadSample + class UploadSample(BaseModel): ownerGroup: str @@ -482,6 +574,14 @@ class UploadSample(BaseModel): sampleCharacteristics: Optional[Dict[str, Any]] = None sampleId: Optional[str] = None + @classmethod + def user_model_type(cls) -> Optional[type]: + return Sample + + @classmethod + def download_model_type(cls) -> Optional[type]: + return DownloadSample + @dataclass_optional_args(kw_only=True, slots=True) class Attachment(BaseUserModel): @@ -524,6 +624,14 @@ def make_upload_model(self) -> UploadAttachment: """Construct a SciCat upload model from self.""" return UploadAttachment(**self._upload_model_dict()) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadAttachment + + @classmethod + def download_model_type(cls) -> type: + return DownloadAttachment + @dataclass_optional_args(kw_only=True, slots=True) class Lifecycle(BaseUserModel): @@ -603,6 +711,10 @@ def from_download_model(cls, download_model: DownloadLifecycle) -> Lifecycle: """Construct an instance from an associated SciCat download model.""" return cls(**cls._download_model_dict(download_model)) + @classmethod + def download_model_type(cls) -> type: + return DownloadLifecycle + @dataclass_optional_args(kw_only=True, slots=True) class Technique(BaseUserModel): @@ -618,6 +730,14 @@ def make_upload_model(self) -> UploadTechnique: """Construct a SciCat upload model from self.""" return UploadTechnique(**self._upload_model_dict()) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadTechnique + + @classmethod + def download_model_type(cls) -> type: + return DownloadTechnique + @dataclass_optional_args(kw_only=True, slots=True) class Relationship(BaseUserModel): @@ -633,6 +753,14 @@ def make_upload_model(self) -> UploadRelationship: """Construct a SciCat upload model from self.""" return UploadRelationship(**self._upload_model_dict()) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadRelationship + + @classmethod + def download_model_type(cls) -> type: + return DownloadRelationship + @dataclass_optional_args(kw_only=True, slots=True) class History(BaseUserModel): @@ -657,6 +785,10 @@ def from_download_model(cls, download_model: DownloadHistory) -> History: """Construct an instance from an associated SciCat download model.""" return cls(**cls._download_model_dict(download_model)) + @classmethod + def download_model_type(cls) -> type: + return DownloadHistory + @dataclass_optional_args(kw_only=True, slots=True) class Instrument(BaseUserModel): @@ -686,6 +818,10 @@ def from_download_model(cls, download_model: DownloadInstrument) -> Instrument: """Construct an instance from an associated SciCat download model.""" return cls(**cls._download_model_dict(download_model)) + @classmethod + def download_model_type(cls) -> type: + return DownloadInstrument + @dataclass_optional_args(kw_only=True, slots=True) class Sample(BaseUserModel): @@ -727,6 +863,14 @@ def make_upload_model(self) -> UploadSample: """Construct a SciCat upload model from self.""" return UploadSample(**self._upload_model_dict()) + @classmethod + def upload_model_type(cls) -> Optional[type]: + return UploadSample + + @classmethod + def download_model_type(cls) -> type: + return DownloadSample + # Some models contain fields that are other models which are defined # further down in the file. diff --git a/tools/model-generation/templates/model.py.jinja b/tools/model-generation/templates/model.py.jinja index 02a9a77d..958d392c 100644 --- a/tools/model-generation/templates/model.py.jinja +++ b/tools/model-generation/templates/model.py.jinja @@ -135,11 +135,29 @@ class Upload{{ dset_type|capitalize }}Dataset(BaseModel{{ mask_keyword(dset_spec {%- set fields = spec.fields_for(kind) -%} {%- if fields -%} class {{ name }}(BaseModel{{ mask_keyword(spec, kind) }}): -{%- for field in fields -%} +{% for field in fields -%} {{ model_field(field, kind) }} {%- endfor %} {{- validations(fields) }} +{%- if spec.name not in ["DataFile", "Datablock", "OrigDatablock"] %} + @classmethod + def user_model_type(cls) -> Optional[type]: + return {{ spec.name }} +{%- endif %} + +{%- if kind == "download" and spec.upload_name %} + @classmethod + def upload_model_type(cls) -> Optional[type]: + return {{ spec.upload_name }} +{%- endif %} + +{%- if kind == "upload" %} + @classmethod + def download_model_type(cls) -> Optional[type]: + return {{ spec.download_name }} +{%- endif %} + {% endif -%} {% endfor -%} {% endfor -%} @@ -147,8 +165,8 @@ class {{ name }}(BaseModel{{ mask_keyword(spec, kind) }}): {% for spec in specs.values()|rejectattr("name", "in", ["DataFile", "Datablock", "OrigDatablock"]) -%} @dataclass_optional_args(kw_only=True, slots=True) class {{ spec.name }}(BaseUserModel): -{%- set fields = spec.fields_for("user")|sort(attribute="upload", reverse=True) -%} -{%- for field in fields %} +{% set fields = spec.fields_for("user")|sort(attribute="upload", reverse=True) -%} +{% for field in fields %} {% if field.upload %}{{ field.name }}{% else %}_{{ field.name }}{% endif %}: {{ field.full_type_for("user") }}{{ field_default(field) }} {%- endfor %} {% for field in fields|rejectattr("upload") %} @@ -166,6 +184,16 @@ class {{ spec.name }}(BaseUserModel): return {{ spec.upload_name }}(**self._upload_model_dict()) {% endif %} +{%- if spec.upload_name %} + @classmethod + def upload_model_type(cls) -> Optional[type]: + return {{ spec.upload_name }} +{%- endif %} + + @classmethod + def download_model_type(cls) -> type: + return {{ spec.download_name }} + {% endfor -%} # Some models contain fields that are other models which are defined From b0f93743bf41ed0abeeac52deab65e8fc8d2a407 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:06:02 +0200 Subject: [PATCH 2/7] Add intersphinx mapping for pydantic --- docs/conf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 0eef7dba..7ed7b652 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,9 +40,10 @@ intersphinx_mapping = { "fabric": ("https://docs.fabfile.org/en/latest", None), - "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None), - "python": ("https://docs.python.org/3", None), + "hypothesis": ("https://hypothesis.readthedocs.io/en/latest", None), "paramiko": ("https://docs.paramiko.org/en/latest", None), + "pydantic": ("https://docs.pydantic.dev/latest", None), + "python": ("https://docs.python.org/3", None), } # autodocs includes everything, even irrelevant API internals. autosummary From b79346d5df7c85bd2cb0dceb16a5c152398aaee7 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:12:21 +0200 Subject: [PATCH 3/7] Convert all constituent models --- src/scitacean/_dataset_fields.py | 48 ++++- src/scitacean/dataset.py | 13 -- tests/dataset_test.py | 165 +++++++++++++++++- .../templates/dataset_fields.py.jinja | 52 +++++- 4 files changed, 250 insertions(+), 28 deletions(-) diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 7da70f36..7d1b4447 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -12,7 +12,7 @@ from __future__ import annotations from datetime import datetime, timezone -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union import dateutil.parser @@ -21,7 +21,11 @@ from .datablock import OrigDatablock from .filesystem import RemotePath from .model import ( + construct, + BaseModel, + BaseUserModel, DownloadDataset, + DownloadLifecycle, Lifecycle, Relationship, Technique, @@ -1062,12 +1066,44 @@ def _prepare_fields_from_download( init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata - DatasetBase._convert_readonly_fields_in_place(read_only) + _convert_download_fields_in_place(init_args, read_only) return init_args, read_only @staticmethod - def _convert_readonly_fields_in_place(read_only: Dict[str, Any]) -> Dict[str, Any]: - if "_pid" in read_only: - read_only["_pid"] = _parse_pid(read_only["_pid"]) - return read_only + def _convert_readonly_fields_in_place(read_only: Dict[str, Any]) -> None: + if (pid := read_only.get("_pid")) is not None: + read_only["_pid"] = _parse_pid(pid) + + +def _convert_download_fields_in_place( + init_args: Dict[str, Any], read_only: Dict[str, Any] +) -> None: + for mod, key in ((Technique, "techniques"), (Relationship, "relationships")): + init_args[key] = _list_field_from_download(mod, init_args.get(key)) + + DatasetBase._convert_readonly_fields_in_place(read_only) + if (lifecycle := read_only.get("_lifecycle")) is not None: + read_only["_lifecycle"] = Lifecycle.from_download_model( + _as_model(DownloadLifecycle, lifecycle) + ) + + +def _list_field_from_download( + mod: Type[BaseUserModel], value: Optional[List[Any]] +) -> Optional[List[BaseUserModel]]: + if value is None: + return None + return [ + mod.from_download_model(_as_model(mod.download_model_type(), item)) + for item in value + ] + + +# If validation fails, sub models are not converted automatically by Pydantic. +def _as_model( + mod: Type[BaseModel], value: Union[BaseModel, Dict[str, Any]] +) -> BaseModel: + if isinstance(value, dict): + return construct(mod, **value, _strict_validation=False) + return value diff --git a/src/scitacean/dataset.py b/src/scitacean/dataset.py index 3c0e37ad..596599d9 100644 --- a/src/scitacean/dataset.py +++ b/src/scitacean/dataset.py @@ -25,12 +25,9 @@ from .datablock import OrigDatablock from .file import File from .model import ( - BaseUserModel, DatasetType, DownloadDataset, DownloadOrigDatablock, - Relationship, - Technique, UploadDerivedDataset, UploadOrigDatablock, UploadRawDataset, @@ -60,8 +57,6 @@ def from_download_models( A new Dataset instance. """ init_args, read_only = DatasetBase._prepare_fields_from_download(dataset_model) - for mod, key in ((Technique, "techniques"), (Relationship, "relationships")): - init_args[key] = _list_field_from_download(mod, init_args[key]) dset = cls(**init_args) for key, val in read_only.items(): setattr(dset, key, val) @@ -459,11 +454,3 @@ def _list_field_for_upload(value: Optional[List[Any]]) -> Optional[List[Any]]: if value is None: return None return [item.make_upload_model() for item in value] - - -def _list_field_from_download( - mod: Type[BaseUserModel], value: Optional[List[Any]] -) -> Optional[List[Any]]: - if value is None: - return None - return [mod.from_download_model(item) for item in value] diff --git a/tests/dataset_test.py b/tests/dataset_test.py index 8834514e..f93cece4 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -6,16 +6,179 @@ from pathlib import Path import pytest +from dateutil.parser import parse as parse_datetime from hypothesis import assume, given, settings from hypothesis import strategies as st -from scitacean import PID, Dataset, DatasetType, File, model +from scitacean import PID, Dataset, DatasetType, File, RemotePath, model from scitacean.testing import strategies as sst from scitacean.testing.client import process_uploaded_dataset from .common.files import make_file +@pytest.fixture() +def raw_download_model(): + return model.DownloadDataset( + contactEmail="p.stibbons@uu.am", + creationLocation="UnseenUniversity", + creationTime=parse_datetime("1995-08-06T14:14:14Z"), + inputDatasets=None, + investigator=None, + numberOfFilesArchived=None, + owner="pstibbons", + ownerGroup="faculty", + principalInvestigator="Ponder Stibbons", + sourceFolder=RemotePath("/uu/hex"), + type=DatasetType.RAW, + usedSoftware=None, + accessGroups=["uu"], + version="3", + classification="IN=medium,AV=low,CO=low", + comment="Where did this come from?", + createdAt=parse_datetime("1995-08-06T14:14:14Z"), + createdBy="pstibbons", + dataFormat=".thaum", + dataQualityMetrics=24, + description="Some shady data", + endTime=parse_datetime("1995-08-03T00:00:00Z"), + history=None, + instrumentGroup="professors", + instrumentId="0000-aa", + isPublished=True, + jobLogData=None, + jobParameters=None, + keywords=["thaum", "shady"], + license="NoThouchy", + datasetName="Flux44", + numberOfFiles=1, + orcidOfOwner="https://orcid.org/0000-0001-2345-6789", + ownerEmail="m.ridcully@uu.am", + packedSize=0, + pid=PID.parse("123.cc/948.f7.2a"), + proposalId="33.dc", + sampleId="bac.a4", + sharedWith=["librarian"], + size=400, + sourceFolderHost="ftp://uu.am/data", + updatedAt=parse_datetime("1995-08-06T17:30:18Z"), + updatedBy="librarian", + validationStatus="ok", + datasetlifecycle=model.DownloadLifecycle( + isOnCentralDisk=True, + retrievable=False, + ), + relationships=[ + model.DownloadRelationship( + pid=PID.parse("123.cc/020.0a.4e"), + relationship="calibration", + ) + ], + techniques=[ + model.DownloadTechnique( + name="reflorbment", + pid="aa.90.4", + ) + ], + scientificMetadata={ + "confidence": "low", + "price": {"value": "606", "unit": "AM$"}, + }, + ) + + +@pytest.fixture() +def derived_download_model(): + return model.DownloadDataset( + contactEmail="p.stibbons@uu.am", + creationLocation="UnseenUniversity", + creationTime=parse_datetime("1995-08-06T14:14:14Z"), + inputDatasets=[PID.parse("123.cc/948.f7.2a")], + investigator="Ponder Stibbons", + numberOfFilesArchived=None, + owner="pstibbons", + ownerGroup="faculty", + principalInvestigator=None, + sourceFolder=RemotePath("/uu/hex"), + type=DatasetType.DERIVED, + usedSoftware=["scitacean"], + accessGroups=["uu"], + version="3", + classification="IN=medium,AV=low,CO=low", + comment="Why did we actually make this data?", + createdAt=parse_datetime("1995-08-06T14:14:14Z"), + createdBy="pstibbons", + dataFormat=None, + dataQualityMetrics=24, + description="Dubiously analyzed data", + endTime=parse_datetime("1995-08-03T00:00:00Z"), + history=None, + instrumentGroup="professors", + instrumentId=None, + isPublished=True, + jobLogData="process interrupted", + jobParameters={"nodes": 4}, + keywords=["thaum", "dubious"], + license="NoThouchy", + datasetName="Flux peaks", + numberOfFiles=1, + orcidOfOwner="https://orcid.org/0000-0001-2345-6789", + ownerEmail="m.ridcully@uu.am", + packedSize=0, + pid=PID.parse("123.cc/948.f7.2a"), + proposalId=None, + sampleId=None, + sharedWith=["librarian"], + size=400, + sourceFolderHost="ftp://uu.am/data", + updatedAt=parse_datetime("1995-08-06T17:30:18Z"), + updatedBy="librarian", + validationStatus="ok", + datasetlifecycle=model.DownloadLifecycle( + isOnCentralDisk=True, + retrievable=False, + ), + relationships=[ + model.DownloadRelationship( + pid=PID.parse("123.cc/020.0a.4e"), + relationship="calibration", + ) + ], + techniques=[ + model.DownloadTechnique( + name="reflorbment", + pid="aa.90.4", + ) + ], + scientificMetadata={ + "confidence": "low", + "price": {"value": "606", "unit": "AM$"}, + }, + ) + + +@pytest.fixture(params=["raw_download_model", "derived_download_model"]) +def dataset_download_model(request): + return request.getfixturevalue(request.param) + + +def test_from_download_models_initializes_fields(dataset_download_model): + def get_model_field(name): + val = getattr(dataset_download_model, name) + if name == "relationships": + return [model.Relationship.from_download_model(v) for v in val] + if name == "techniques": + return [model.Technique.from_download_model(v) for v in val] + if name == "datasetlifecycle": + return model.Lifecycle.from_download_model(val) + return val + + dset = Dataset.from_download_models(dataset_download_model, []) + for field in dset.fields(): + if field.used_by(dataset_download_model.type): + assert getattr(dset, field.name) == get_model_field(field.scicat_name) + + @pytest.mark.parametrize("typ", (DatasetType.RAW, DatasetType.DERIVED)) def test_new_dataset_has_no_files(typ): dset = Dataset(type=typ) diff --git a/tools/model-generation/templates/dataset_fields.py.jinja b/tools/model-generation/templates/dataset_fields.py.jinja index 479ab192..96bda517 100644 --- a/tools/model-generation/templates/dataset_fields.py.jinja +++ b/tools/model-generation/templates/dataset_fields.py.jinja @@ -33,7 +33,7 @@ type(None) from __future__ import annotations from datetime import datetime, timezone -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union import dateutil.parser @@ -42,7 +42,11 @@ from ._internal.dataclass_wrapper import dataclass_optional_args from .datablock import OrigDatablock from .filesystem import RemotePath from .model import ( + construct, + BaseModel, + BaseUserModel, DownloadDataset, + DownloadLifecycle, Lifecycle, Relationship, Technique, @@ -201,16 +205,48 @@ class DatasetBase: init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata - DatasetBase._convert_readonly_fields_in_place(read_only) + _convert_download_fields_in_place(init_args, read_only) return init_args, read_only @staticmethod - def _convert_readonly_fields_in_place(read_only: Dict[str, Any]) -> Dict[str, Any]: -{% for field in spec.user_dset_fields(manual=False)|sort(attribute="name")|selectattr("conversion") %} + def _convert_readonly_fields_in_place(read_only: Dict[str, Any]) -> None: + {% for field in spec.user_dset_fields(manual=False)|sort(attribute="name")|selectattr("conversion") %} {%- if not field.upload %} - if "_{{ field.name }}" in read_only: - read_only["_{{ field.name }}"] = {{ field.conversion.func }}(read_only["_{{ field.name }}"]) + if ({{ field.name }} := read_only.get("_{{ field.name }}")) is not None: + read_only["_{{ field.name }}"] = {{ field.conversion.func }}({{ field.name }}) {%- endif -%} -{%- endfor %} - return read_only + {%- endfor %} + + +def _convert_download_fields_in_place( + init_args: Dict[str, Any], read_only: Dict[str, Any] +) -> None: + for mod, key in ((Technique, "techniques"), (Relationship, "relationships")): + init_args[key] = _list_field_from_download(mod, init_args.get(key)) + + DatasetBase._convert_readonly_fields_in_place(read_only) + if (lifecycle := read_only.get("_lifecycle")) is not None: + read_only["_lifecycle"] = Lifecycle.from_download_model( + _as_model(DownloadLifecycle, lifecycle) + ) + + +def _list_field_from_download( + mod: Type[BaseUserModel], value: Optional[List[Any]] +) -> Optional[List[BaseUserModel]]: + if value is None: + return None + return [ + mod.from_download_model(_as_model(mod.download_model_type(), item)) + for item in value + ] + + +# If validation fails, sub models are not converted automatically by Pydantic. +def _as_model( + mod: Type[BaseModel], value: Union[BaseModel, Dict[str, Any]] +) -> BaseModel: + if isinstance(value, dict): + return construct(mod, **value, _strict_validation=False) + return value From a72cbabbcca6c4b21927467ce610ac2db5fbd3ab Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:18:15 +0200 Subject: [PATCH 4/7] Allow untyped calls in tests --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 13a69ed0..f497e637 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,7 @@ warn_unreachable = true [[tool.mypy.overrides]] module = "tests.*" disallow_untyped_defs = false +disallow_untyped_calls = false [tool.ruff] line-length = 88 From 8ff2622655ac93ed27f6aa525872b636d5e75bde Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:20:06 +0200 Subject: [PATCH 5/7] Fix type hint --- src/scitacean/testing/ssh/fixtures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scitacean/testing/ssh/fixtures.py b/src/scitacean/testing/ssh/fixtures.py index 37660873..3e1c200f 100644 --- a/src/scitacean/testing/ssh/fixtures.py +++ b/src/scitacean/testing/ssh/fixtures.py @@ -5,7 +5,7 @@ import logging from pathlib import Path -from typing import Callable, Optional +from typing import Callable, Generator, Optional import fabric import fabric.config @@ -93,7 +93,7 @@ def ssh_fileserver( ssh_base_dir: Optional[Path], ssh_data_dir: Optional[Path], ssh_connect_with_username_password, -) -> bool: +) -> Generator[bool, None, None]: """Fixture to declare that a test needs a local SSH server. If SSH tests are enabled, this fixture configures and starts an SSH server From 89b961c68ff3c5a0158aab0584ec08d7bc4ce5a1 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:31:41 +0200 Subject: [PATCH 6/7] Test that wrong fields are not initialized --- tests/dataset_test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/dataset_test.py b/tests/dataset_test.py index f93cece4..fa197b6d 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -91,7 +91,7 @@ def raw_download_model(): def derived_download_model(): return model.DownloadDataset( contactEmail="p.stibbons@uu.am", - creationLocation="UnseenUniversity", + creationLocation=None, creationTime=parse_datetime("1995-08-06T14:14:14Z"), inputDatasets=[PID.parse("123.cc/948.f7.2a")], investigator="Ponder Stibbons", @@ -111,7 +111,7 @@ def derived_download_model(): dataFormat=None, dataQualityMetrics=24, description="Dubiously analyzed data", - endTime=parse_datetime("1995-08-03T00:00:00Z"), + endTime=None, history=None, instrumentGroup="professors", instrumentId=None, @@ -179,6 +179,13 @@ def get_model_field(name): assert getattr(dset, field.name) == get_model_field(field.scicat_name) +def test_from_download_models_does_not_initialize_wrong_fields(dataset_download_model): + dset = Dataset.from_download_models(dataset_download_model, []) + for field in dset.fields(): + if not field.used_by(dataset_download_model.type): + assert getattr(dset, field.name) is None + + @pytest.mark.parametrize("typ", (DatasetType.RAW, DatasetType.DERIVED)) def test_new_dataset_has_no_files(typ): dset = Dataset(type=typ) From bd5b8197a0b572e2db252549c76e13780d82146f Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Wed, 16 Aug 2023 16:32:25 +0200 Subject: [PATCH 7/7] Add release note --- docs/release-notes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/release-notes.rst b/docs/release-notes.rst index 2179e946..2c3b7403 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -50,6 +50,8 @@ Breaking changes Bugfixes ~~~~~~~~ +* Fields of derived datasets are no longer initialized when downloading raw datasets and vice versa. + Documentation ~~~~~~~~~~~~~