Skip to content

Commit

Permalink
Merge pull request #12 from rosalindfranklininstitute/upsert-dataset
Browse files Browse the repository at this point in the history
Upsert dataset
  • Loading branch information
dylanmcreynolds authored Jun 1, 2022
2 parents 75d8f5b + a1c6554 commit c1e5369
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 1 deletion.
1 change: 1 addition & 0 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- cron: '00 4 * * *' # daily at 4AM

jobs:

build:

runs-on: ubuntu-latest
Expand Down
69 changes: 69 additions & 0 deletions pyscicat/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,75 @@ def upload_derived_dataset(self, dataset: Dataset) -> str:
logger.info(f"new dataset created {new_pid}")
return new_pid

def upsert_raw_dataset(self, dataset: Dataset, filter_fields) -> str:
"""Upsert a raw dataset
Parameters
----------
dataset : Dataset
Dataset to load
filter_fields
Filters to locate where to upsert dataset
Returns
-------
str
pid (or unique identifier) of the dataset
Raises
------
ScicatCommError
Raises if a non-20x message is returned
"""
query_results = self.get_datasets(filter_fields)
if not query_results:
logger.info("Dataset does not exist already, will be inserted")
filter_fields = json.dumps(filter_fields)
raw_dataset_url = f'{self._base_url}/RawDatasets/upsertWithWhere?where={{"where":{filter_fields}}}'
resp = self._send_to_scicat(raw_dataset_url, dataset.dict(exclude_none=True))
if not resp.ok:
err = resp.json()["error"]
raise ScicatCommError(f"Error upserting raw dataset {err}")
new_pid = resp.json().get("pid")
logger.info(f"dataset upserted {new_pid}")
return new_pid

def upsert_derived_dataset(self, dataset: Dataset, filter_fields) -> str:
"""Upsert a derived dataset
Parameters
----------
dataset : Dataset
Dataset to upsert
filter_fields
Filters to locate where to upsert dataset
Returns
-------
str
pid (or unique identifier) of the dataset
Raises
------
ScicatCommError
Raises if a non-20x message is returned
"""

query_results = self.get_datasets(filter_fields)
if not query_results:
logger.info("Dataset does not exist already, will be inserted")
filter_fields = json.dumps(filter_fields)
dataset_url = f'{self._base_url}/DerivedDatasets/upsertWithWhere?where={{"where":{filter_fields}}}'
resp = self._send_to_scicat(dataset_url, dataset.dict(exclude_none=True))
if not resp.ok:
err = resp.json()["error"]
raise ScicatCommError(f"Error upserting derived dataset {err}")
new_pid = resp.json().get("pid")
logger.info(f"dataset upserted {new_pid}")
return new_pid

def upload_datablock(self, datablock: Datablock, datasetType: str = "RawDatasets"):
"""Upload a Datablock
Expand Down
48 changes: 48 additions & 0 deletions pyscicat/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Datablock,
DataFile,
Dataset,
RawDataset,
Ownable,
)

Expand All @@ -28,6 +29,26 @@ def add_mock_requests(mock_request):
)
mock_request.post(local_url + "Samples", json={"sampleId": "dataset_id"})
mock_request.post(local_url + "RawDatasets/replaceOrCreate", json={"pid": "42"})
mock_request.get(
local_url
+ "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D",
json={"response": "random"},
)
mock_request.get(
local_url
+ "/Datasets/?filter=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D",
json={"response": "random"},
)
mock_request.post(
local_url
+ "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22gargleblaster%22%7D%7D",
json={"pid": "42"},
)
mock_request.post(
local_url
+ "/RawDatasets/upsertWithWhere?where=%7B%22where%22:%7B%22sampleId%22:%20%22wowza%22%7D%7D",
json={"pid": "54"},
)
mock_request.post(
local_url + "RawDatasets/42/origdatablocks",
json={"response": "random"},
Expand Down Expand Up @@ -80,6 +101,33 @@ def test_scicate_ingest():
)
dataset_id = scicat.upload_raw_dataset(dataset)

# new dataset
dataset = RawDataset(
path="/foo/bar",
size=42,
owner="slartibartfast",
contactEmail="[email protected]",
creationLocation="magrathea",
creationTime=str(datetime.now()),
type="raw",
instrumentId="earth",
proposalId="deepthought",
dataFormat="planet",
principalInvestigator="A. Mouse",
sourceFolder="/foo/bar",
scientificMetadata={"a": "newfield"},
sampleId="gargleblaster",
**ownable.dict()
)

# Update existing record
dataset_id = scicat.upsert_raw_dataset(dataset, {"sampleId": "gargleblaster"})
assert dataset_id == "42"

# Upsert non-existing record
dataset_id_2 = scicat.upsert_raw_dataset(dataset, {"sampleId": "wowza"})
assert dataset_id_2 == "54"

# Datablock with DataFiles
data_file = DataFile(path="/foo/bar", size=42)
data_block = Datablock(
Expand Down
1 change: 0 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ pytest
sphinx
twine
black
nbstripout
requests_mock
# These are dependencies of various sphinx extensions for documentation.
ipython
Expand Down

0 comments on commit c1e5369

Please sign in to comment.