From 5cfed34df73ebb37e62f81f966e1004852ec1e36 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Wed, 13 Apr 2022 14:06:24 +0100 Subject: [PATCH 01/14] Added raw and derived upsert functions --- pyscicat/client.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/pyscicat/client.py b/pyscicat/client.py index 9901055..6d30521 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -226,6 +226,81 @@ def upload_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: + """Upsert a raw dataset + + Parameters + ---------- + dataset : Dataset + Dataset to load + + filter_fields + Filters to locate where to upsert dataset + + Returns + ------- + str + pid (or unique identifier) of the dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + query_results = self.get_datasets(filter_fields) + if query_results.json(): + filter_fields = json.dumps(filter_fields) + raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting raw dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset updated {new_pid}") + return new_pid + else: + logger.info(f"dataset does not exist, could not upsert") + raise ScicatCommError(f"Dataset does not exist, could not upsert.") + + def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: + """Upsert a derived dataset + + Parameters + ---------- + dataset : Dataset + Dataset to upsert + + filter_fields + Filters to locate where to upsert dataset + + Returns + ------- + str + pid (or unique identifier) of the dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + + query_results = self.get_datasets(filter_fields) + if query_results.json(): + filter_fields = json.dumps(filter_fields) + derived_dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + resp = self._send_to_scicat( + derived_dataset_url, dataset.dict(exclude_none=True) + ) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting derived dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset updated {new_pid}") + return new_pid + else: + logger.info(f"dataset does not exist, could not upsert") + raise ScicatCommError(f"Dataset does not exist, could not upsert.") + def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): """Upload a Datablock From 96155e15440d9e8e5b9428d2f0c53c97013e6583 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Thu, 21 Apr 2022 10:34:40 +0100 Subject: [PATCH 02/14] Added upsert_dataset. Fixed bug in upsert funcs --- pyscicat/client.py | 47 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 6d30521..4c9c6b4 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -226,6 +226,49 @@ def upload_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid + def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: + """Upsert a dataset + + Parameters + ---------- + dataset : Dataset + Dataset to load + + filter_fields + Filters to locate where to upsert dataset + + Returns + ------- + str + pid (or unique identifier) of the dataset + + Raises + ------ + ScicatCommError + Raises if a non-20x message is returned + """ + query_results = self.get_datasets(filter_fields) + if query_results: + filter_fields = json.dumps(filter_fields) + if isinstance(dataset, RawDataset): + dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + elif isinstance(dataset, DerivedDataset): + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + else: + logging.error( + "Dataset type not recognized, not Raw or Derived type" + ) + resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset updated {new_pid}") + return new_pid + else: + logger.info(f"dataset does not exist, could not upsert") + raise ScicatCommError(f"Dataset does not exist, could not upsert.") + def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a raw dataset @@ -248,7 +291,7 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: Raises if a non-20x message is returned """ query_results = self.get_datasets(filter_fields) - if query_results.json(): + if query_results: filter_fields = json.dumps(filter_fields) raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?{{"where":{filter_fields}}}' resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) @@ -285,7 +328,7 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """ query_results = self.get_datasets(filter_fields) - if query_results.json(): + if query_results: filter_fields = json.dumps(filter_fields) derived_dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?{{"where":{filter_fields}}}' resp = self._send_to_scicat( From d7b23306b44ae54907725cd1454fac2a53af8ad9 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Thu, 21 Apr 2022 13:46:49 +0100 Subject: [PATCH 03/14] Removed unneeded f in log/error statements causing bug --- pyscicat/client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 4c9c6b4..00ba139 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -266,8 +266,8 @@ def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info(f"dataset updated {new_pid}") return new_pid else: - logger.info(f"dataset does not exist, could not upsert") - raise ScicatCommError(f"Dataset does not exist, could not upsert.") + logger.info("dataset does not exist, could not upsert") + raise ScicatCommError("Dataset does not exist, could not upsert.") def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a raw dataset @@ -302,8 +302,8 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info(f"dataset updated {new_pid}") return new_pid else: - logger.info(f"dataset does not exist, could not upsert") - raise ScicatCommError(f"Dataset does not exist, could not upsert.") + logger.info("dataset does not exist, could not upsert") + raise ScicatCommError("Dataset does not exist, could not upsert.") def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a derived dataset @@ -341,8 +341,8 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info(f"dataset updated {new_pid}") return new_pid else: - logger.info(f"dataset does not exist, could not upsert") - raise ScicatCommError(f"Dataset does not exist, could not upsert.") + logger.info("dataset does not exist, could not upsert") + raise ScicatCommError("Dataset does not exist, could not upsert.") def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): """Upload a Datablock From 094ce4d29d133550152aa86a9bc0936d2167f931 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Tue, 26 Apr 2022 13:15:22 +0100 Subject: [PATCH 04/14] Added initial upsert test --- pyscicat/tests/test_client.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 5c470e6..887dff6 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -15,6 +15,7 @@ Datablock, DataFile, Dataset, + RawDataset, Ownable, ) @@ -28,6 +29,10 @@ def add_mock_requests(mock_request): ) mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"}) mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"}) + mock_request.get(local_url + "Datasets?filter=%7B%22where%22%3A+%7B%22sampleId%22%3A+%22gargleblaster%22%7D%7D", + json = {"response": "random"}) + mock_request.post(local_url + "/RawDatasets/upsertWithWhere?where=%22%3A+%7B%22sampleId%22%3A+%22gargleblaster%22%7D%7D", + json={"pid": "42"}) mock_request.post( local_url + "RawDatasets/42/origdatablocks", json={"response": "random"}, @@ -59,7 +64,7 @@ def test_scicate_ingest(): assert size is not None # RawDataset - dataset = Dataset( + dataset = RawDataset( path="/foo/bar", size=42, owner="slartibartfast", @@ -78,6 +83,28 @@ def test_scicate_ingest(): ) dataset_id = scicat.upload_raw_dataset(dataset) + # new dataset + dataset = Dataset( + path="/foo/bar", + size=42, + owner="slartibartfast", + contactEmail="slartibartfast@magrathea.org", + creationLocation="magrathea", + creationTime=str(datetime.now()), + type="raw", + instrumentId="earth", + proposalId="deepthought", + dataFormat="planet", + principalInvestigator="A. Mouse", + sourceFolder="/foo/bar", + scientificMetadata={"a": "newfield"}, + sampleId="gargleblaster", + **ownable.dict() + ) + + dataset_id = scicat.upsert_raw_dataset(dataset, {"sampleId": "gargleblaster"}) + assert dataset_id.pid == "42" + # Datablock with DataFiles data_file = DataFile(path="/foo/bar", size=42) data_block = Datablock( From 5f0205cad9213995425816b929857ddaf61f7faf Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Wed, 27 Apr 2022 11:01:50 +0100 Subject: [PATCH 05/14] Fixed urls for upsert func and test --- pyscicat/client.py | 8 ++++---- pyscicat/tests/test_client.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 00ba139..145b86c 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -251,9 +251,9 @@ def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: if query_results: filter_fields = json.dumps(filter_fields) if isinstance(dataset, RawDataset): - dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' elif isinstance(dataset, DerivedDataset): - dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' else: logging.error( "Dataset type not recognized, not Raw or Derived type" @@ -293,7 +293,7 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: query_results = self.get_datasets(filter_fields) if query_results: filter_fields = json.dumps(filter_fields) - raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) if not resp.ok: err = resp.json()["error"] @@ -330,7 +330,7 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: query_results = self.get_datasets(filter_fields) if query_results: filter_fields = json.dumps(filter_fields) - derived_dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?{{"where":{filter_fields}}}' + derived_dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' resp = self._send_to_scicat( derived_dataset_url, dataset.dict(exclude_none=True) ) diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 887dff6..2080f05 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -29,9 +29,9 @@ def add_mock_requests(mock_request): ) mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"}) mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"}) - mock_request.get(local_url + "Datasets?filter=%7B%22where%22%3A+%7B%22sampleId%22%3A+%22gargleblaster%22%7D%7D", + mock_request.get(local_url + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", json = {"response": "random"}) - mock_request.post(local_url + "/RawDatasets/upsertWithWhere?where=%22%3A+%7B%22sampleId%22%3A+%22gargleblaster%22%7D%7D", + mock_request.post(local_url + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", json={"pid": "42"}) mock_request.post( local_url + "RawDatasets/42/origdatablocks", @@ -103,7 +103,7 @@ def test_scicate_ingest(): ) dataset_id = scicat.upsert_raw_dataset(dataset, {"sampleId": "gargleblaster"}) - assert dataset_id.pid == "42" + assert dataset_id == "42" # Datablock with DataFiles data_file = DataFile(path="/foo/bar", size=42) From 9d31fe304dcf308f03144b25a57c44f3048412dd Mon Sep 17 00:00:00 2001 From: Dylan McReynolds <40469975+dylanmcreynolds@users.noreply.github.com> Date: Wed, 27 Apr 2022 11:24:21 -0700 Subject: [PATCH 06/14] remove pre-commit and re-add flake8 --- .github/workflows/linting.yml | 14 -------------- .github/workflows/testing.yml | 8 ++++++++ 2 files changed, 8 insertions(+), 14 deletions(-) delete mode 100644 .github/workflows/linting.yml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml deleted file mode 100644 index 392e281..0000000 --- a/.github/workflows/linting.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: pre-commit - -on: - pull_request: - push: - branches: [main] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.3 \ No newline at end of file diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 5b95152..f3c75db 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -7,6 +7,7 @@ on: - cron: '00 4 * * *' # daily at 4AM jobs: + build: runs-on: ubuntu-latest @@ -33,6 +34,13 @@ jobs: set -vxeuo pipefail python -m pip install -r requirements-dev.txt python -m pip list + + - name: Lint with flake8 + shell: bash -l {0} + run: | + set -vxeuo pipefail + python -m flake8 + - name: Test with pytest shell: bash -l {0} run: | From 46737e1f8b490bd8f39fbe9e746021eebed68872 Mon Sep 17 00:00:00 2001 From: Dylan McReynolds <40469975+dylanmcreynolds@users.noreply.github.com> Date: Wed, 27 Apr 2022 11:27:17 -0700 Subject: [PATCH 07/14] More remove pre-commit files --- .pre-commit-config.yaml | 15 --------------- requirements-dev.txt | 2 -- 2 files changed, 17 deletions(-) delete mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 0baa145..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -default_language_version: - python: python3 -repos: - - repo: https://github.com/ambv/black - rev: 21.12b0 - hooks: - - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.0.0 - hooks: - - id: flake8 - - repo: https://github.com/kynan/nbstripout - rev: 0.5.0 - hooks: - - id: nbstripout diff --git a/requirements-dev.txt b/requirements-dev.txt index 2759b55..7c4c01d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,9 +6,7 @@ flake8 pytest sphinx twine -pre-commit black -nbstripout requests_mock # These are dependencies of various sphinx extensions for documentation. ipython From e4cdf4d3bae9abd2a4398813d965fd595792c1cd Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Thu, 28 Apr 2022 11:05:24 +0100 Subject: [PATCH 08/14] Black formatting fixes --- pyscicat/client.py | 12 ++++++------ pyscicat/tests/test_client.py | 14 ++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 145b86c..ea93b36 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -255,9 +255,7 @@ def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: elif isinstance(dataset, DerivedDataset): dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' else: - logging.error( - "Dataset type not recognized, not Raw or Derived type" - ) + logging.error("Dataset type not recognized, not Raw or Derived type") resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) if not resp.ok: err = resp.json()["error"] @@ -294,7 +292,9 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: if query_results: filter_fields = json.dumps(filter_fields) raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) + resp = self._send_to_scicat( + raw_dataset_url, dataset.dict(exclude_none=True) + ) if not resp.ok: err = resp.json()["error"] raise ScicatCommError(f"Error upserting raw dataset {err}") @@ -330,9 +330,9 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: query_results = self.get_datasets(filter_fields) if query_results: filter_fields = json.dumps(filter_fields) - derived_dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' resp = self._send_to_scicat( - derived_dataset_url, dataset.dict(exclude_none=True) + dataset_url, dataset.dict(exclude_none=True) ) if not resp.ok: err = resp.json()["error"] diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 2080f05..fb2d256 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -29,10 +29,16 @@ def add_mock_requests(mock_request): ) mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"}) mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"}) - mock_request.get(local_url + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", - json = {"response": "random"}) - mock_request.post(local_url + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", - json={"pid": "42"}) + mock_request.get( + local_url + + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", + json={"response": "random"}, + ) + mock_request.post( + local_url + + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", + json={"pid": "42"}, + ) mock_request.post( local_url + "RawDatasets/42/origdatablocks", json={"response": "random"}, From 88ce96e3972b9bfb8d91dca3aba2d1f377a37768 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Thu, 28 Apr 2022 11:15:27 +0100 Subject: [PATCH 09/14] Raise ValueError in upsert_dataset --- pyscicat/client.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index ea93b36..0639160 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -247,15 +247,15 @@ def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: ScicatCommError Raises if a non-20x message is returned """ + filters = json.dumps(filter_fields) + if isinstance(dataset, RawDataset): + dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filters}}}' + elif isinstance(dataset, DerivedDataset): + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filters}}}' + else: + raise ValueError("Dataset type not recognised, not Raw or Derived type") query_results = self.get_datasets(filter_fields) if query_results: - filter_fields = json.dumps(filter_fields) - if isinstance(dataset, RawDataset): - dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - elif isinstance(dataset, DerivedDataset): - dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - else: - logging.error("Dataset type not recognized, not Raw or Derived type") resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) if not resp.ok: err = resp.json()["error"] @@ -331,9 +331,7 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: if query_results: filter_fields = json.dumps(filter_fields) dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - resp = self._send_to_scicat( - dataset_url, dataset.dict(exclude_none=True) - ) + resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) if not resp.ok: err = resp.json()["error"] raise ScicatCommError(f"Error upserting derived dataset {err}") From 6852dc5839ac9a1e483d19b01d5338c72e4e9d28 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Fri, 29 Apr 2022 11:14:37 +0100 Subject: [PATCH 10/14] Added additional upsert test case --- pyscicat/tests/test_client.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index fb2d256..4d97625 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -34,11 +34,21 @@ def add_mock_requests(mock_request): + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", json={"response": "random"}, ) + mock_request.get( + local_url + + "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D", + json={"response": "random"}, + ) mock_request.post( local_url + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D", json={"pid": "42"}, ) + mock_request.post( + local_url + + "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D", + json={"pid": "54"}, + ) mock_request.post( local_url + "RawDatasets/42/origdatablocks", json={"response": "random"}, @@ -70,7 +80,7 @@ def test_scicate_ingest(): assert size is not None # RawDataset - dataset = RawDataset( + dataset = Dataset( path="/foo/bar", size=42, owner="slartibartfast", @@ -90,7 +100,7 @@ def test_scicate_ingest(): dataset_id = scicat.upload_raw_dataset(dataset) # new dataset - dataset = Dataset( + dataset = RawDataset( path="/foo/bar", size=42, owner="slartibartfast", @@ -108,9 +118,14 @@ def test_scicate_ingest(): **ownable.dict() ) + # Update existing record dataset_id = scicat.upsert_raw_dataset(dataset, {"sampleId": "gargleblaster"}) assert dataset_id == "42" + # Upsert non-existing record + dataset_id_2 = scicat.upsert_raw_dataset(dataset, {"sampleId": "wowza"}) + assert dataset_id_2 == "54" + # Datablock with DataFiles data_file = DataFile(path="/foo/bar", size=42) data_block = Datablock( From 29eeb9fffb0f2ef9fe6ebd534ca9d3efc5159c0f Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Fri, 29 Apr 2022 11:15:17 +0100 Subject: [PATCH 11/14] Removed generic upsert, use raw/derived instead --- pyscicat/client.py | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 0639160..266ab39 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -226,47 +226,6 @@ def upload_derived_dataset(self, dataset: Dataset) -> str: logger.info(f"new dataset created {new_pid}") return new_pid - def upsert_dataset(self, dataset: Dataset, filter_fields) -> str: - """Upsert a dataset - - Parameters - ---------- - dataset : Dataset - Dataset to load - - filter_fields - Filters to locate where to upsert dataset - - Returns - ------- - str - pid (or unique identifier) of the dataset - - Raises - ------ - ScicatCommError - Raises if a non-20x message is returned - """ - filters = json.dumps(filter_fields) - if isinstance(dataset, RawDataset): - dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filters}}}' - elif isinstance(dataset, DerivedDataset): - dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filters}}}' - else: - raise ValueError("Dataset type not recognised, not Raw or Derived type") - query_results = self.get_datasets(filter_fields) - if query_results: - resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) - if not resp.ok: - err = resp.json()["error"] - raise ScicatCommError(f"Error upserting dataset {err}") - new_pid = resp.json().get("pid") - logger.info(f"dataset updated {new_pid}") - return new_pid - else: - logger.info("dataset does not exist, could not upsert") - raise ScicatCommError("Dataset does not exist, could not upsert.") - def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a raw dataset From 66caeaa9e4152e9384b8c8e6adae95269c55c9b3 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Fri, 29 Apr 2022 11:31:01 +0100 Subject: [PATCH 12/14] Upsert inserts if dataset doesnt exist yet --- pyscicat/client.py | 53 ++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index 266ab39..81aa357 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -248,21 +248,20 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: Raises if a non-20x message is returned """ query_results = self.get_datasets(filter_fields) - if query_results: - filter_fields = json.dumps(filter_fields) - raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - resp = self._send_to_scicat( - raw_dataset_url, dataset.dict(exclude_none=True) - ) - if not resp.ok: - err = resp.json()["error"] - raise ScicatCommError(f"Error upserting raw dataset {err}") - new_pid = resp.json().get("pid") - logger.info(f"dataset updated {new_pid}") - return new_pid - else: - logger.info("dataset does not exist, could not upsert") - raise ScicatCommError("Dataset does not exist, could not upsert.") + if not query_results: + logger.info("Dataset does not exist already, will be inserted") + filter_fields = json.dumps(filter_fields) + raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' + resp = self._send_to_scicat( + raw_dataset_url, dataset.dict(exclude_none=True) + ) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting raw dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset upserted {new_pid}") + return new_pid + def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a derived dataset @@ -287,19 +286,17 @@ def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """ query_results = self.get_datasets(filter_fields) - if query_results: - filter_fields = json.dumps(filter_fields) - dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) - if not resp.ok: - err = resp.json()["error"] - raise ScicatCommError(f"Error upserting derived dataset {err}") - new_pid = resp.json().get("pid") - logger.info(f"dataset updated {new_pid}") - return new_pid - else: - logger.info("dataset does not exist, could not upsert") - raise ScicatCommError("Dataset does not exist, could not upsert.") + if not query_results: + logger.info("Dataset does not exist already, will be inserted") + filter_fields = json.dumps(filter_fields) + dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' + resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True)) + if not resp.ok: + err = resp.json()["error"] + raise ScicatCommError(f"Error upserting derived dataset {err}") + new_pid = resp.json().get("pid") + logger.info(f"dataset upserted {new_pid}") + return new_pid def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"): """Upload a Datablock From 7b0465fc6ac20e936d9c88b0d4a4120f2ca3120b Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Mon, 9 May 2022 10:40:03 +0100 Subject: [PATCH 13/14] Black format fix --- pyscicat/client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyscicat/client.py b/pyscicat/client.py index b89b369..d90aea4 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -299,16 +299,13 @@ def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str: logger.info("Dataset does not exist already, will be inserted") filter_fields = json.dumps(filter_fields) raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}' - resp = self._send_to_scicat( - raw_dataset_url, dataset.dict(exclude_none=True) - ) + resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True)) if not resp.ok: err = resp.json()["error"] raise ScicatCommError(f"Error upserting raw dataset {err}") new_pid = resp.json().get("pid") logger.info(f"dataset upserted {new_pid}") return new_pid - def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str: """Upsert a derived dataset From 1c1f1783f12100a433e74a38f90ad6f419964861 Mon Sep 17 00:00:00 2001 From: Abigail Alexander Date: Mon, 9 May 2022 15:37:59 +0100 Subject: [PATCH 14/14] Fixed indentation causing merge conflict --- .github/workflows/testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index f3c75db..fd2ce7e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -38,8 +38,8 @@ jobs: - name: Lint with flake8 shell: bash -l {0} run: | - set -vxeuo pipefail - python -m flake8 + set -vxeuo pipefail + python -m flake8 - name: Test with pytest shell: bash -l {0}