Skip to content

Commit

Permalink
feature(service): Adds checks for OSTI service
Browse files Browse the repository at this point in the history
+ changes service.osti.submit to a private method
+ Adds checking to service.osti._submit for status and correct doi prefix, raises service exception
  if either of the checks fail.
+ Extends OSTIRecord dataclase to collect site_url and populates it from the osti response for a later check
+ Refactors service.osti.publish to check the site_url returned in the OSTIRecord, raises a ServiceAccountExcepton
  on failure.
+ Fixes logic error in service.osti.to_osti_xml where xml is prepared for publication.  There was a possibility of creating
  a duplicate record or submitting for publication to osti before the dataset is approved.  The first case can happen
  if a doi is created manually during draft and then the user clicks submit.  The second case happens if the
  data is synchronized manually before approval.  The solution is to check for the publication date and dataset.status == APPROVED before
  marking the dataset for publication. Additionally, set_reserved should only be set if dataset is None(dummy data) or there
  is no doi yet. Also, adds check if there is already a doi before
  adding set_reserved.
+ Updates existing tests for new logic.

Close #405
  • Loading branch information
vchendrix committed Sep 11, 2023
1 parent 7c416e4 commit 0e626d1
Show file tree
Hide file tree
Showing 7 changed files with 192 additions and 33 deletions.
5 changes: 3 additions & 2 deletions archive_api/fixtures/test_archive_api.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@
"fields": {
"ngt_id": 2,
"description": "Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.",
"status": 1,
"status": 2,
"status_comment": "",
"name": "Data Set 3",
"doi": "",
"doi": "https://doi.org/10.15486/ngt/8343947",
"start_date": "2016-10-29",
"end_date": null,
"qaqc_status": null,
Expand All @@ -241,6 +241,7 @@
"access_level": 0,
"additional_access_information": "",
"submission_date": "2016-10-29T19:15:35.013361Z",
"publication_date": "2016-10-29T19:15:35.013361Z",
"contact": 2,
"sites": [
1
Expand Down
55 changes: 41 additions & 14 deletions archive_api/service/osti.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
@dataclass
class OSTIRecord:
status: str
site_url: Optional[str]
doi: Optional[str]
doi_status: Optional[str]
status_message: Optional[str]


def submit(osti_xml) -> OSTIRecord:
def _submit(osti_xml) -> OSTIRecord:
"""
Submit OSTI xml record
:param dataset_id:
Expand All @@ -50,10 +51,20 @@ def submit(osti_xml) -> OSTIRecord:
doi_status = doi is not None and doi.get("status") or None
record_status = root.find("./record/status")
record_status_message = root.find("./record/status_message")
return OSTIRecord(status=record_status.text,
site_url = root.find("./record/site_url")
osti_record = OSTIRecord(status=record_status.text,
site_url=site_url is not None and site_url.text or None,
doi=doi is not None and f"https://doi.org/{doi.text}" or None,
doi_status=doi_status,
status_message=record_status_message is not None and record_status_message.text or None)
if osti_record.doi and not osti_record.doi.startswith("https://doi.org/10.15486/ngt/"):
raise ServiceAccountException(
f"Error in publish data; There was a problem minting the DOI - "
f"The doi, {osti_record.doi}, is not a valid doi", 0)
if not osti_record or osti_record.status != "SUCCESS":
raise ServiceAccountException(
f"Error in doi record - {osti_record.status_message}", 0)
return osti_record
else:
raise ServiceAccountException(f"HTTP Status: {r.status_code} HTTP Response: {r.content}",
doi_service.service)
Expand All @@ -73,7 +84,7 @@ def mint(dataset_id) -> Optional[OSTIRecord]:

dataset = DataSet.objects.get(pk=dataset_id)
if not dataset.doi:
osti_record = submit(str(to_osti_xml()))
osti_record = _submit(str(to_osti_xml()))
if osti_record:
if osti_record.status == "SUCCESS":
dataset.doi = osti_record.doi
Expand All @@ -89,18 +100,24 @@ def publish(dataset_id) -> OSTIRecord:
:param dataset_id: the database identifier of the dataset to mint a doi for
:return: OSTIRecord
:raise: Except if no `ServiceAccount` defintion exists or any unexpected errors occur
:rtype: archive_api.service.common.ServiceAccountException
:raises: archive_api.service.common.ServiceAccountException
"""

dataset = DataSet.objects.get(pk=dataset_id)

if not dataset.doi:
osti_record = mint(dataset_id)
if not osti_record or osti_record.status != "SUCCESS":
else:
osti_record = _submit(str(to_osti_xml(dataset_id)))

dataset.refresh_from_db()
# Only check the site url on publish
if osti_record and dataset.doi and dataset.status >= dataset.STATUS_APPROVED:
if osti_record.site_url != _get_site_url(dataset):
raise ServiceAccountException(
f"Cannot publish; There was a problem minting the DOI - {osti_record.status_message}", 0)
f"Error in publish data; There was a problem minting the DOI - "
f"The site url, {osti_record.site_url}, does not match our record, {_get_site_url(dataset)}.", 0)
return osti_record

return submit(str(to_osti_xml(dataset_id)))


def to_osti_xml(dataset_id=None):
Expand Down Expand Up @@ -130,11 +147,12 @@ def to_osti_xml(dataset_id=None):
osti_id = dataset.doi.split("/")[-1]
_set_value(record, 'osti_id', osti_id)

if dataset and dataset.submission_date:
_set_value(record, 'site_url', f'https://{settings.SERVICE_HOSTNAME}/dois/{dataset.data_set_id()}')
_set_value(record, 'publication_date', dataset.submission_date.strftime("%Y"))

else:
# Should this dataset be published or reserved
if dataset and dataset.publication_date and dataset.status == dataset.STATUS_APPROVED:
_set_value(record, 'site_url', _get_site_url(dataset))
_set_value(record, 'publication_date', dataset.publication_date.strftime("%Y"))
elif dataset and not dataset.doi or dataset is None:
# reserve a doi before publication or for a dummy dataset
_set_value(record, 'set_reserved', "")

# DataSet Type: Dataset Type refers to the main content of the
Expand Down Expand Up @@ -190,3 +208,12 @@ def _creators(record, dataset):
_set_value(creator_detail, 'last_name', a.author.last_name)
_set_value(creator_detail, 'private_email', a.author.email)
_set_value(creator_detail, 'affiliation_name', a.author.institution_affiliation)

def _get_site_url(dataset):
"""
Build the site url from the dataset
:param dataset: The NGT Archive dataset to build creators for
:return: string representing the site url
"""
return f'https://{settings.SERVICE_HOSTNAME}/dois/{dataset.data_set_id()}'
2 changes: 1 addition & 1 deletion archive_api/tests/osti_response_publish.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<keywords></keywords>
<accession_num></accession_num>
<description>Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.</description>
<site_url>https://vchendrix/dois/NGT0001</site_url>
<site_url>https://ngt-data.lbl.gov/dois/NGT0001</site_url>
<date_first_submitted>2022-03-24</date_first_submitted>
<date_last_submitted>2022-03-24</date_last_submitted>
<doi status="PENDING">10.15486/ngt/1525114</doi>
Expand Down
47 changes: 47 additions & 0 deletions archive_api/tests/osti_response_publish_error_doi.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<records>
<record status="Pending" released="N">
<site_input_code>NGEE-TRPC</site_input_code>
<osti_id>1525114</osti_id>
<dataset_type>SM</dataset_type>
<title>Data Set 2</title>
<status>SUCCESS</status>
<authors>
<author>
<first_name>Luke</first_name>
<last_name>Cage</last_name>
<private_email>[email protected]</private_email>
<affiliation_name>POWER</affiliation_name>
</author>
</authors>
<related_resource></related_resource>
<product_nos>NGT0001</product_nos>
<product_type>DA</product_type>
<contract_nos>LBNL NGEE-Tropics UC, Berkeley NGEE-Tropics</contract_nos>
<other_identifying_numbers></other_identifying_numbers>
<originating_research_org>Next-Generation Ecosystem Experiments Tropics; LBNL</originating_research_org>
<availability></availability>
<collaboration_names></collaboration_names>
<publication_date>2016</publication_date>
<language>English</language>
<country>US</country>
<sponsor_org>A few funding organizations</sponsor_org>
<subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code>
<keywords></keywords>
<accession_num></accession_num>
<description>Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.</description>
<site_url>https://ngt-data.lbl.gov/dois/NGT0001</site_url>
<date_first_submitted>2022-03-24</date_first_submitted>
<date_last_submitted>2022-03-24</date_last_submitted>
<doi status="PENDING">10.15486/fob/1525114</doi>
<doi_infix>ngt</doi_infix>
<file_extension></file_extension>
<software_needed></software_needed>
<dataset_size></dataset_size>
<contact_name>NGEE Tropics Archive Team, Support Organization</contact_name>
<contact_org>Lawrence Berkeley National Lab</contact_org>
<contact_email>ngeet-team@testserver</contact_email>
<contact_phone>(510) 495-2905</contact_phone>
<othnondoe_contract_nos></othnondoe_contract_nos>
</record>
</records>
47 changes: 47 additions & 0 deletions archive_api/tests/osti_response_publish_error_site_url.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<records>
<record status="Pending" released="N">
<site_input_code>NGEE-TRPC</site_input_code>
<osti_id>1525114</osti_id>
<dataset_type>SM</dataset_type>
<title>Data Set 2</title>
<status>SUCCESS</status>
<authors>
<author>
<first_name>Luke</first_name>
<last_name>Cage</last_name>
<private_email>[email protected]</private_email>
<affiliation_name>POWER</affiliation_name>
</author>
</authors>
<related_resource></related_resource>
<product_nos>NGT0001</product_nos>
<product_type>DA</product_type>
<contract_nos>LBNL NGEE-Tropics UC, Berkeley NGEE-Tropics</contract_nos>
<other_identifying_numbers></other_identifying_numbers>
<originating_research_org>Next-Generation Ecosystem Experiments Tropics; LBNL</originating_research_org>
<availability></availability>
<collaboration_names></collaboration_names>
<publication_date>2016</publication_date>
<language>English</language>
<country>US</country>
<sponsor_org>A few funding organizations</sponsor_org>
<subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code>
<keywords></keywords>
<accession_num></accession_num>
<description>Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.</description>
<site_url>https://vchendrix/dois/NGT0001</site_url>
<date_first_submitted>2022-03-24</date_first_submitted>
<date_last_submitted>2022-03-24</date_last_submitted>
<doi status="PENDING">10.15486/ngt/1525114</doi>
<doi_infix>ngt</doi_infix>
<file_extension></file_extension>
<software_needed></software_needed>
<dataset_size></dataset_size>
<contact_name>NGEE Tropics Archive Team, Support Organization</contact_name>
<contact_org>Lawrence Berkeley National Lab</contact_org>
<contact_email>ngeet-team@testserver</contact_email>
<contact_phone>(510) 495-2905</contact_phone>
<othnondoe_contract_nos></othnondoe_contract_nos>
</record>
</records>
62 changes: 49 additions & 13 deletions archive_api/tests/test_osti.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from archive_api.models import ServiceAccount


OSTI_XML = '<records><record><title>Data Set 2</title><contract_nos>LBNL NGEE-Tropics &amp; UC, Berkeley NGEE-Tropics</contract_nos><non-doe_contract_nos>LBNL NGEE-Tropics &amp; UC, Berkeley NGEE-Tropics</non-doe_contract_nos><originating_research_org>LBNL</originating_research_org><description>Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.</description><sponsor_org>A few funding organizations</sponsor_org><related_resource /><product_nos>NGT0001</product_nos><osti_id>892375dkfnsi</osti_id><site_url>https://ngt-data.lbl.gov/dois/NGT0001</site_url><publication_date>2016</publication_date><dataset_type>SM</dataset_type><contact_name>NGEE Tropics Archive Team, Support Organization</contact_name><contact_email>NGEE Tropics Archive Test &lt;ngeet-team@testserver&gt;</contact_email><contact_org>Lawrence Berkeley National Lab</contact_org><site_code>NGEE-TRPC</site_code><doi_infix>ngt</doi_infix><subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code><language>English</language><country>US</country><creatorsblock><creators_detail><first_name>Luke</first_name><last_name>Cage</last_name><private_email>[email protected]</private_email><affiliation_name>POWER</affiliation_name></creators_detail></creatorsblock></record></records>'
OSTI_XML = '<records><record><title>Data Set 3</title><contract_nos>None</contract_nos><non-doe_contract_nos /><originating_research_org /><description>Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.</description><sponsor_org>A few funding organizations for my self</sponsor_org><related_resource /><product_nos>NGT0002</product_nos><osti_id>8343947</osti_id><site_url>https://ngt-data.lbl.gov/dois/NGT0002</site_url><publication_date>2016</publication_date><dataset_type>SM</dataset_type><contact_name>NGEE Tropics Archive Team, Support Organization</contact_name><contact_email>NGEE Tropics Archive Test &lt;ngeet-team@testserver&gt;</contact_email><contact_org>Lawrence Berkeley National Lab</contact_org><site_code>NGEE-TRPC</site_code><doi_infix>ngt</doi_infix><subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code><language>English</language><country>US</country><creatorsblock><creators_detail><first_name>Luke</first_name><last_name>Cage</last_name><private_email>[email protected]</private_email><affiliation_name>POWER</affiliation_name></creators_detail></creatorsblock></record></records>'
OSTI_XML_DUMMY = '<records><record><title /><contract_nos>None</contract_nos><non-doe_contract_nos /><originating_research_org /><description /><sponsor_org /><related_resource /><product_nos /><set_reserved /><dataset_type>SM</dataset_type><contact_name>NGEE Tropics Archive Team, Support Organization</contact_name><contact_email>NGEE Tropics Archive Test &lt;ngeet-team@testserver&gt;</contact_email><contact_org>Lawrence Berkeley National Lab</contact_org><site_code>NGEE-TRPC</site_code><doi_infix>ngt</doi_infix><subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code><language>English</language><country>US</country></record></records>'
BASEPATH = os.path.dirname(__file__)

Expand All @@ -25,10 +25,11 @@ def django_load_data(django_db_setup, django_db_blocker):
@pytest.mark.django_db
@pytest.mark.parametrize("dataset_id,expected_osti_xml",
[(None, OSTI_XML_DUMMY),
(2, OSTI_XML)], ids=["OSTI dummy", "OSTI publish"])
(3, OSTI_XML)], ids=["OSTI-dummy", "OSTI-publish"])
def test_to_osti(django_load_data, dataset_id, expected_osti_xml):
"""Test the generation of OSTI dummy xml"""
osti_xml = to_osti_xml(dataset_id)
print(osti_xml)
assert osti_xml == expected_osti_xml


Expand Down Expand Up @@ -63,28 +64,29 @@ def mock_post(*args, **kwargs):
def test_osti_error(django_load_data, monkeypatch, status_code):
"""Test publish"""

osti_response = open(os.path.join(BASEPATH, "osti_response_error.xml")).read()

def mock_post(*args, **kwargs):
assert "auth" in kwargs
assert kwargs["auth"] == ("myuseraccount", 'foobar')

return type('Dummy', (object,), {
"content": open(os.path.join(BASEPATH, "osti_response_error.xml")).read(),
"content": osti_response,
"status_code": status_code,
"url": "/testurl"})

monkeypatch.setattr(requests, 'post', mock_post)

with pytest.raises(ServiceAccountException) as exec_info:
publish(2)

if status_code == 200:
osti_record = publish(2)
assert osti_record
assert osti_record.status == "FAILURE"
assert osti_record.doi_status is None
assert osti_record.doi is None
assert osti_record.status_message == ('Title is required.; DOE Contract/Award Number(s) is required.; Author(s) is '
'required.; Publication Date is required.; Sponsoring Organization is '
'required.; Missing required URL.')
assert str(exec_info.value) == ('Service Account OSTI Elink: Error in doi record - Title is required.; DOE '
'Contract/Award Number(s) is required.; Author(s) is required.; Publication '
'Date is required.; Sponsoring Organization is required.; Missing required '
'URL.')
else:
pytest.raises(ServiceAccountException, publish, 2)
assert str(exec_info.value) == 'Service Account OSTI Elink: HTTP Status: 500 HTTP Response: '+osti_response


@pytest.mark.django_db
Expand Down Expand Up @@ -116,4 +118,38 @@ def mock_post(*args, **kwargs):
@pytest.mark.django_db
def test_osti_mint_not_needed(django_load_data):
"""Test osti mint returns None. The dataset already has a DOI """
assert mint(2) is None
assert mint(2) is None



@pytest.mark.django_db
@pytest.mark.parametrize("osti_xml, error_message",
[("osti_response_publish_error_site_url.xml",
('Service Account OSTI Elink: Error in publish data; There was a problem '
'minting the DOI - The site url, https://vchendrix/dois/NGT0001, does not '
'match our record, https://ngt-data.lbl.gov/dois/NGT0002.')
),
("osti_response_publish_error_doi.xml",
('Service Account OSTI Elink: Error in publish data; There was a problem '
'minting the DOI - The doi, https://doi.org/10.15486/fob/1525114, is not a '
'valid doi'))],
ids=['site_url', 'doi'])
def test_osti_publish_errors(django_load_data, monkeypatch, osti_xml, error_message):
"""Test publish errors"""

def mock_post(*args, **kwargs):
assert "auth" in kwargs
assert kwargs["auth"] == ("myuseraccount", 'foobar')

return type('Dummy', (object,), {
"content": open(os.path.join(BASEPATH, osti_xml)).read(),
"status_code": 200,
"url": "/testurl"})

monkeypatch.setattr(requests, 'post', mock_post)

with pytest.raises(ServiceAccountException) as exec_info:
publish(3)


assert str(exec_info.value) == error_message
Loading

0 comments on commit 0e626d1

Please sign in to comment.