From 0e626d1d1d9f9eb317d74f11cade3b9e5239ad7c Mon Sep 17 00:00:00 2001 From: Val Hendrix Date: Mon, 28 Aug 2023 09:36:21 -0700 Subject: [PATCH] feature(service): Adds checks for OSTI service + changes service.osti.submit to a private method + Adds checking to service.osti._submit for status and correct doi prefix, raises service exception if either of the checks fail. + Extends OSTIRecord dataclase to collect site_url and populates it from the osti response for a later check + Refactors service.osti.publish to check the site_url returned in the OSTIRecord, raises a ServiceAccountExcepton on failure. + Fixes logic error in service.osti.to_osti_xml where xml is prepared for publication. There was a possibility of creating a duplicate record or submitting for publication to osti before the dataset is approved. The first case can happen if a doi is created manually during draft and then the user clicks submit. The second case happens if the data is synchronized manually before approval. The solution is to check for the publication date and dataset.status == APPROVED before marking the dataset for publication. Additionally, set_reserved should only be set if dataset is None(dummy data) or there is no doi yet. Also, adds check if there is already a doi before adding set_reserved. + Updates existing tests for new logic. Close #405 --- archive_api/fixtures/test_archive_api.json | 5 +- archive_api/service/osti.py | 55 +++++++++++----- archive_api/tests/osti_response_publish.xml | 2 +- .../tests/osti_response_publish_error_doi.xml | 47 ++++++++++++++ .../osti_response_publish_error_site_url.xml | 47 ++++++++++++++ archive_api/tests/test_osti.py | 62 +++++++++++++++---- archive_api/tests/test_views.py | 7 ++- 7 files changed, 192 insertions(+), 33 deletions(-) create mode 100644 archive_api/tests/osti_response_publish_error_doi.xml create mode 100644 archive_api/tests/osti_response_publish_error_site_url.xml diff --git a/archive_api/fixtures/test_archive_api.json b/archive_api/fixtures/test_archive_api.json index f6a25112..c0524049 100644 --- a/archive_api/fixtures/test_archive_api.json +++ b/archive_api/fixtures/test_archive_api.json @@ -224,10 +224,10 @@ "fields": { "ngt_id": 2, "description": "Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.", - "status": 1, + "status": 2, "status_comment": "", "name": "Data Set 3", - "doi": "", + "doi": "https://doi.org/10.15486/ngt/8343947", "start_date": "2016-10-29", "end_date": null, "qaqc_status": null, @@ -241,6 +241,7 @@ "access_level": 0, "additional_access_information": "", "submission_date": "2016-10-29T19:15:35.013361Z", + "publication_date": "2016-10-29T19:15:35.013361Z", "contact": 2, "sites": [ 1 diff --git a/archive_api/service/osti.py b/archive_api/service/osti.py index cfb89de8..c16db0c6 100644 --- a/archive_api/service/osti.py +++ b/archive_api/service/osti.py @@ -23,12 +23,13 @@ @dataclass class OSTIRecord: status: str + site_url: Optional[str] doi: Optional[str] doi_status: Optional[str] status_message: Optional[str] -def submit(osti_xml) -> OSTIRecord: +def _submit(osti_xml) -> OSTIRecord: """ Submit OSTI xml record :param dataset_id: @@ -50,10 +51,20 @@ def submit(osti_xml) -> OSTIRecord: doi_status = doi is not None and doi.get("status") or None record_status = root.find("./record/status") record_status_message = root.find("./record/status_message") - return OSTIRecord(status=record_status.text, + site_url = root.find("./record/site_url") + osti_record = OSTIRecord(status=record_status.text, + site_url=site_url is not None and site_url.text or None, doi=doi is not None and f"https://doi.org/{doi.text}" or None, doi_status=doi_status, status_message=record_status_message is not None and record_status_message.text or None) + if osti_record.doi and not osti_record.doi.startswith("https://doi.org/10.15486/ngt/"): + raise ServiceAccountException( + f"Error in publish data; There was a problem minting the DOI - " + f"The doi, {osti_record.doi}, is not a valid doi", 0) + if not osti_record or osti_record.status != "SUCCESS": + raise ServiceAccountException( + f"Error in doi record - {osti_record.status_message}", 0) + return osti_record else: raise ServiceAccountException(f"HTTP Status: {r.status_code} HTTP Response: {r.content}", doi_service.service) @@ -73,7 +84,7 @@ def mint(dataset_id) -> Optional[OSTIRecord]: dataset = DataSet.objects.get(pk=dataset_id) if not dataset.doi: - osti_record = submit(str(to_osti_xml())) + osti_record = _submit(str(to_osti_xml())) if osti_record: if osti_record.status == "SUCCESS": dataset.doi = osti_record.doi @@ -89,18 +100,24 @@ def publish(dataset_id) -> OSTIRecord: :param dataset_id: the database identifier of the dataset to mint a doi for :return: OSTIRecord - :raise: Except if no `ServiceAccount` defintion exists or any unexpected errors occur - :rtype: archive_api.service.common.ServiceAccountException + :raises: archive_api.service.common.ServiceAccountException """ - dataset = DataSet.objects.get(pk=dataset_id) + if not dataset.doi: osti_record = mint(dataset_id) - if not osti_record or osti_record.status != "SUCCESS": + else: + osti_record = _submit(str(to_osti_xml(dataset_id))) + + dataset.refresh_from_db() + # Only check the site url on publish + if osti_record and dataset.doi and dataset.status >= dataset.STATUS_APPROVED: + if osti_record.site_url != _get_site_url(dataset): raise ServiceAccountException( - f"Cannot publish; There was a problem minting the DOI - {osti_record.status_message}", 0) + f"Error in publish data; There was a problem minting the DOI - " + f"The site url, {osti_record.site_url}, does not match our record, {_get_site_url(dataset)}.", 0) + return osti_record - return submit(str(to_osti_xml(dataset_id))) def to_osti_xml(dataset_id=None): @@ -130,11 +147,12 @@ def to_osti_xml(dataset_id=None): osti_id = dataset.doi.split("/")[-1] _set_value(record, 'osti_id', osti_id) - if dataset and dataset.submission_date: - _set_value(record, 'site_url', f'https://{settings.SERVICE_HOSTNAME}/dois/{dataset.data_set_id()}') - _set_value(record, 'publication_date', dataset.submission_date.strftime("%Y")) - - else: + # Should this dataset be published or reserved + if dataset and dataset.publication_date and dataset.status == dataset.STATUS_APPROVED: + _set_value(record, 'site_url', _get_site_url(dataset)) + _set_value(record, 'publication_date', dataset.publication_date.strftime("%Y")) + elif dataset and not dataset.doi or dataset is None: + # reserve a doi before publication or for a dummy dataset _set_value(record, 'set_reserved', "") # DataSet Type: Dataset Type refers to the main content of the @@ -190,3 +208,12 @@ def _creators(record, dataset): _set_value(creator_detail, 'last_name', a.author.last_name) _set_value(creator_detail, 'private_email', a.author.email) _set_value(creator_detail, 'affiliation_name', a.author.institution_affiliation) + +def _get_site_url(dataset): + """ + Build the site url from the dataset + + :param dataset: The NGT Archive dataset to build creators for + :return: string representing the site url + """ + return f'https://{settings.SERVICE_HOSTNAME}/dois/{dataset.data_set_id()}' \ No newline at end of file diff --git a/archive_api/tests/osti_response_publish.xml b/archive_api/tests/osti_response_publish.xml index 42e0ec8c..062128f4 100644 --- a/archive_api/tests/osti_response_publish.xml +++ b/archive_api/tests/osti_response_publish.xml @@ -30,7 +30,7 @@ Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam. - https://vchendrix/dois/NGT0001 + https://ngt-data.lbl.gov/dois/NGT0001 2022-03-24 2022-03-24 10.15486/ngt/1525114 diff --git a/archive_api/tests/osti_response_publish_error_doi.xml b/archive_api/tests/osti_response_publish_error_doi.xml new file mode 100644 index 00000000..034d1ae3 --- /dev/null +++ b/archive_api/tests/osti_response_publish_error_doi.xml @@ -0,0 +1,47 @@ + + + + NGEE-TRPC + 1525114 + SM + Data Set 2 + SUCCESS + + + Luke + Cage + lcage@foobar.baz + POWER + + + + NGT0001 + DA + LBNL NGEE-Tropics UC, Berkeley NGEE-Tropics + + Next-Generation Ecosystem Experiments Tropics; LBNL + + + 2016 + English + US + A few funding organizations + 54 ENVIRONMENTAL SCIENCES + + + Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam. + https://ngt-data.lbl.gov/dois/NGT0001 + 2022-03-24 + 2022-03-24 + 10.15486/fob/1525114 + ngt + + + + NGEE Tropics Archive Team, Support Organization + Lawrence Berkeley National Lab + ngeet-team@testserver + (510) 495-2905 + + + \ No newline at end of file diff --git a/archive_api/tests/osti_response_publish_error_site_url.xml b/archive_api/tests/osti_response_publish_error_site_url.xml new file mode 100644 index 00000000..42e0ec8c --- /dev/null +++ b/archive_api/tests/osti_response_publish_error_site_url.xml @@ -0,0 +1,47 @@ + + + + NGEE-TRPC + 1525114 + SM + Data Set 2 + SUCCESS + + + Luke + Cage + lcage@foobar.baz + POWER + + + + NGT0001 + DA + LBNL NGEE-Tropics UC, Berkeley NGEE-Tropics + + Next-Generation Ecosystem Experiments Tropics; LBNL + + + 2016 + English + US + A few funding organizations + 54 ENVIRONMENTAL SCIENCES + + + Qui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam. + https://vchendrix/dois/NGT0001 + 2022-03-24 + 2022-03-24 + 10.15486/ngt/1525114 + ngt + + + + NGEE Tropics Archive Team, Support Organization + Lawrence Berkeley National Lab + ngeet-team@testserver + (510) 495-2905 + + + \ No newline at end of file diff --git a/archive_api/tests/test_osti.py b/archive_api/tests/test_osti.py index 20aba39c..d4ee0fec 100644 --- a/archive_api/tests/test_osti.py +++ b/archive_api/tests/test_osti.py @@ -9,7 +9,7 @@ from archive_api.models import ServiceAccount -OSTI_XML = 'Data Set 2LBNL NGEE-Tropics & UC, Berkeley NGEE-TropicsLBNL NGEE-Tropics & UC, Berkeley NGEE-TropicsLBNLQui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.A few funding organizationsNGT0001892375dkfnsihttps://ngt-data.lbl.gov/dois/NGT00012016SMNGEE Tropics Archive Team, Support OrganizationNGEE Tropics Archive Test <ngeet-team@testserver>Lawrence Berkeley National LabNGEE-TRPCngt54 ENVIRONMENTAL SCIENCESEnglishUSLukeCagelcage@foobar.bazPOWER' +OSTI_XML = 'Data Set 3NoneQui illud verear persequeris te. Vis probo nihil verear an, zril tamquam philosophia eos te, quo ne fugit movet contentiones. Quas mucius detraxit vis an, vero omnesque petentium sit ea. Id ius inimicus comprehensam.A few funding organizations for my selfNGT00028343947https://ngt-data.lbl.gov/dois/NGT00022016SMNGEE Tropics Archive Team, Support OrganizationNGEE Tropics Archive Test <ngeet-team@testserver>Lawrence Berkeley National LabNGEE-TRPCngt54 ENVIRONMENTAL SCIENCESEnglishUSLukeCagelcage@foobar.bazPOWER' OSTI_XML_DUMMY = '<contract_nos>None</contract_nos><non-doe_contract_nos /><originating_research_org /><description /><sponsor_org /><related_resource /><product_nos /><set_reserved /><dataset_type>SM</dataset_type><contact_name>NGEE Tropics Archive Team, Support Organization</contact_name><contact_email>NGEE Tropics Archive Test <ngeet-team@testserver></contact_email><contact_org>Lawrence Berkeley National Lab</contact_org><site_code>NGEE-TRPC</site_code><doi_infix>ngt</doi_infix><subject_categories_code>54 ENVIRONMENTAL SCIENCES</subject_categories_code><language>English</language><country>US</country></record></records>' BASEPATH = os.path.dirname(__file__) @@ -25,10 +25,11 @@ def django_load_data(django_db_setup, django_db_blocker): @pytest.mark.django_db @pytest.mark.parametrize("dataset_id,expected_osti_xml", [(None, OSTI_XML_DUMMY), - (2, OSTI_XML)], ids=["OSTI dummy", "OSTI publish"]) + (3, OSTI_XML)], ids=["OSTI-dummy", "OSTI-publish"]) def test_to_osti(django_load_data, dataset_id, expected_osti_xml): """Test the generation of OSTI dummy xml""" osti_xml = to_osti_xml(dataset_id) + print(osti_xml) assert osti_xml == expected_osti_xml @@ -63,28 +64,29 @@ def mock_post(*args, **kwargs): def test_osti_error(django_load_data, monkeypatch, status_code): """Test publish""" + osti_response = open(os.path.join(BASEPATH, "osti_response_error.xml")).read() + def mock_post(*args, **kwargs): assert "auth" in kwargs assert kwargs["auth"] == ("myuseraccount", 'foobar') return type('Dummy', (object,), { - "content": open(os.path.join(BASEPATH, "osti_response_error.xml")).read(), + "content": osti_response, "status_code": status_code, "url": "/testurl"}) monkeypatch.setattr(requests, 'post', mock_post) + with pytest.raises(ServiceAccountException) as exec_info: + publish(2) + if status_code == 200: - osti_record = publish(2) - assert osti_record - assert osti_record.status == "FAILURE" - assert osti_record.doi_status is None - assert osti_record.doi is None - assert osti_record.status_message == ('Title is required.; DOE Contract/Award Number(s) is required.; Author(s) is ' - 'required.; Publication Date is required.; Sponsoring Organization is ' - 'required.; Missing required URL.') + assert str(exec_info.value) == ('Service Account OSTI Elink: Error in doi record - Title is required.; DOE ' + 'Contract/Award Number(s) is required.; Author(s) is required.; Publication ' + 'Date is required.; Sponsoring Organization is required.; Missing required ' + 'URL.') else: - pytest.raises(ServiceAccountException, publish, 2) + assert str(exec_info.value) == 'Service Account OSTI Elink: HTTP Status: 500 HTTP Response: '+osti_response @pytest.mark.django_db @@ -116,4 +118,38 @@ def mock_post(*args, **kwargs): @pytest.mark.django_db def test_osti_mint_not_needed(django_load_data): """Test osti mint returns None. The dataset already has a DOI """ - assert mint(2) is None \ No newline at end of file + assert mint(2) is None + + + +@pytest.mark.django_db +@pytest.mark.parametrize("osti_xml, error_message", + [("osti_response_publish_error_site_url.xml", + ('Service Account OSTI Elink: Error in publish data; There was a problem ' + 'minting the DOI - The site url, https://vchendrix/dois/NGT0001, does not ' + 'match our record, https://ngt-data.lbl.gov/dois/NGT0002.') + ), + ("osti_response_publish_error_doi.xml", + ('Service Account OSTI Elink: Error in publish data; There was a problem ' + 'minting the DOI - The doi, https://doi.org/10.15486/fob/1525114, is not a ' + 'valid doi'))], + ids=['site_url', 'doi']) +def test_osti_publish_errors(django_load_data, monkeypatch, osti_xml, error_message): + """Test publish errors""" + + def mock_post(*args, **kwargs): + assert "auth" in kwargs + assert kwargs["auth"] == ("myuseraccount", 'foobar') + + return type('Dummy', (object,), { + "content": open(os.path.join(BASEPATH, osti_xml)).read(), + "status_code": 200, + "url": "/testurl"}) + + monkeypatch.setattr(requests, 'post', mock_post) + + with pytest.raises(ServiceAccountException) as exec_info: + publish(3) + + + assert str(exec_info.value) == error_message diff --git a/archive_api/tests/test_views.py b/archive_api/tests/test_views.py index 72d10b57..2fd3a02f 100644 --- a/archive_api/tests/test_views.py +++ b/archive_api/tests/test_views.py @@ -77,7 +77,7 @@ <td></td> <td> <h5 class="title">Approved</h5></td> - <td style="text-align: right">0</td> + <td style="text-align: right">1</td> <td></td> </tr> @@ -169,11 +169,11 @@ def test_download(self): # Using assertInHTML because it is more forgiving. self.assertInHTML("""NGT ID,Access Level,Title,Approval Date,Contact,Authors,DOI,Downloads,Citation -NGT0002,Private,Data Set 3,,"Cage, Luke - POWER",Cage L,,0,Citation information not available currently. Contact dataset author(s) for citation or acknowledgement text. +NGT0002,Private,Data Set 3,2016-10-29 19:15:35.013361+00:00,"Cage, Luke - POWER",Cage L,https://doi.org/10.15486/ngt/8343947,0,Cage L (2016): Data Set 3. 0.0. NGEE Tropics Data Collection. (dataset). https://doi.org/10.15486/ngt/8343947 NGT0000,Public,Data Set 1,,"Cage, Luke - POWER",,,0,Citation information not available currently. Contact dataset author(s) for citation or acknowledgement text. NGT0001,Private,Data Set 2,,"Cage, Luke - POWER",Cage L,https://dx.doi.org/10.1111/892375dkfnsi,0,Cage L (2016): Data Set 2. 0.0. NGEE Tropics Data Collection. (dataset). https://dx.doi.org/10.1111/892375dkfnsi NGT0003,Private,Data Set 4,,"Cage, Luke - POWER",,,0,Citation information not available currently. Contact dataset author(s) for citation or acknowledgement text. -""", response.content.decode()) + """, response.content.decode()) def test_unauthenticated(self): """Test unauthenticated user""" @@ -192,6 +192,7 @@ def test_admin(self): """Test that the metrics page is deplayed""" client = self.login("admin") response = self.check_all_login_view(client) + print(response.content.decode()) self.assertInHTML(HTML_METRICS_ADMIN, response.content.decode()) def test_staff_access(self):