From 7318430796b6b663542b1b9602f45bf6d2110639 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru Date: Wed, 9 Oct 2024 20:16:16 +0530 Subject: [PATCH] reject articles when uploaded before oa start date --- .../unit/test_tasks_ingestDOAJarticles.py | 75 ++++++++++++++++++- portality/bll/exceptions.py | 6 ++ portality/bll/services/article.py | 18 ++++- .../tasks/helpers/articles_upload_helper.py | 3 + portality/ui/messages.py | 1 + 5 files changed, 100 insertions(+), 3 deletions(-) diff --git a/doajtest/unit/test_tasks_ingestDOAJarticles.py b/doajtest/unit/test_tasks_ingestDOAJarticles.py index 4421d97ef9..77f5914b1b 100644 --- a/doajtest/unit/test_tasks_ingestDOAJarticles.py +++ b/doajtest/unit/test_tasks_ingestDOAJarticles.py @@ -23,7 +23,7 @@ from portality.crosswalks import article_doaj_xml from portality.tasks import ingestarticles from portality.ui.messages import Messages - +from portality.lib import dates class TestIngestArticlesDoajXML(DoajTestCase): @@ -1048,3 +1048,76 @@ def test_61_journal_not_indoaj(self): assert file_upload.status == "failed" assert file_upload.error == Messages.EXCEPTION_ADDING_ARTICLE_TO_WITHDRAWN_JOURNAL + + def test_62_article_before_oa_start(self): + journal = article_upload_tester.create_simple_journal("testowner", pissn="1234-5678", eissn="9876-5432") + journal.bibjson().oa_start = dates.now().year + 2 + helpers.save_all_block_last([ journal, + article_upload_tester.create_simple_publisher("testowner") + ]) + + # make both handles, as we want as little gap as possible between requests in a moment + handle1 = DoajXmlArticleFixtureFactory.upload_2_issns_correct() + + f1 = FileMockFactory(stream=handle1) + + job1 = ingestarticles.IngestArticlesBackgroundTask.prepare("testowner", schema="doaj", upload_file=f1) + id1 = job1.params.get("ingest_articles__file_upload_id") + self.cleanup_ids.append(id1) + + # because file upload gets created and saved by prepare + time.sleep(1) + + task1 = ingestarticles.IngestArticlesBackgroundTask(job1) + + task1.run() + + # because file upload needs to be re-saved + time.sleep(1) + + fu1 = models.FileUpload.pull(id1) + + assert fu1.status == "failed", "received status: {}".format(fu1.status) + assert job1.outcome_status == "fail" + + assert any('Articles before OA start date: Imaginaires autochtones contemporains. Introduction' in entry['message'] for entry in + job1.audit), "No message found with 'Articles before OA start date'" + + # check that article not created + assert models.Article.count_by_issns(["1234-5678", "9876-5432"]) == 0 + + def test_63_article_after_oa_start(self): + journal = article_upload_tester.create_simple_journal("testowner", pissn="1234-5678", eissn="9876-5432") + journal.bibjson().oa_start = dates.now().year - 2 + helpers.save_all_block_last([ journal, + article_upload_tester.create_simple_publisher("testowner") + ]) + + # make both handles, as we want as little gap as possible between requests in a moment + handle1 = DoajXmlArticleFixtureFactory.upload_2_issns_correct() + + f1 = FileMockFactory(stream=handle1) + + job1 = ingestarticles.IngestArticlesBackgroundTask.prepare("testowner", schema="doaj", upload_file=f1) + id1 = job1.params.get("ingest_articles__file_upload_id") + self.cleanup_ids.append(id1) + + # because file upload gets created and saved by prepare + time.sleep(1) + + task1 = ingestarticles.IngestArticlesBackgroundTask(job1) + + task1.run() + + # because file upload needs to be re-saved + time.sleep(1) + + fu1 = models.FileUpload.pull(id1) + + assert fu1.status == "processed", "received status: {}".format(fu1.status) + + assert not any('Articles before OA start date: Imaginaires autochtones contemporains. Introduction' in entry['message'] for entry in + job1.audit), "No message found with 'Articles before OA start date'" + + # check that article not created + assert models.Article.count_by_issns(["1234-5678", "9876-5432"]) == 1 diff --git a/portality/bll/exceptions.py b/portality/bll/exceptions.py index ca1d0bd8e6..d90edd8bf1 100644 --- a/portality/bll/exceptions.py +++ b/portality/bll/exceptions.py @@ -73,6 +73,12 @@ def __str__(self): super(ArticleNotAcceptable, self).__str__() return self.message +class ArticleBeforeOAStartDate(ArticleNotAcceptable): + """ + Exception to raise when the article is uploaded before OA start date of the Journal + """ + pass + class ArticleMergeConflict(Exception): """ Exception to raise when it's not clear which article to merge an update with diff --git a/portality/bll/services/article.py b/portality/bll/services/article.py index b5e829cd24..7bcbb89524 100644 --- a/portality/bll/services/article.py +++ b/portality/bll/services/article.py @@ -55,6 +55,7 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d all_shared = set() all_unowned = set() all_unmatched = set() + all_before_oa_start_date = set() # Hold on to the exception so we can raise it later e_not_acceptable = None @@ -70,6 +71,9 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d dry_run=True) except (exceptions.ArticleMergeConflict, exceptions.ConfigurationException): raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT) + except exceptions.ArticleBeforeOAStartDate as e: + e_not_acceptable = e + result = {'fail': 1, 'before_oa_start_date': article.bibjson().title} except exceptions.ArticleNotAcceptable as e: # The ArticleNotAcceptable exception is a superset of reasons we can't match a journal to this article e_not_acceptable = e @@ -82,9 +86,12 @@ def batch_create_articles(self, articles, account, duplicate_check=True, merge_d all_shared.update(result.get("shared", set())) all_unowned.update(result.get("unowned", set())) all_unmatched.update(result.get("unmatched", set())) + article_before_oa_start = result.get("before_oa_start_date") + if isinstance(article_before_oa_start, str) and article_before_oa_start: + all_before_oa_start_date.add(article_before_oa_start) report = {"success": success, "fail": fail, "update": update, "new": new, "shared": all_shared, - "unowned": all_unowned, "unmatched": all_unmatched} + "unowned": all_unowned, "unmatched": all_unmatched, "before_oa_start_date":all_before_oa_start_date} # if there were no failures in the batch, then we can do the save if fail == 0: @@ -235,8 +242,15 @@ def create_article(self, article, account, duplicate_check=True, merge_duplicate except (exceptions.DuplicateArticleException, exceptions.ArticleMergeConflict, exceptions.ConfigurationException) as e: raise e + # Check if article is uploaded before OA start date of Journal and reject the article + journal = article.get_journal() + oa_start_date = journal.has_oa_start_date() + if oa_start_date and dates.now().year < oa_start_date: + raise exceptions.ArticleBeforeOAStartDate(message=Messages.EXCEPTION_ARTICLE_BEFORE_OA_START_DATE. + format(title=article.bibjson().title)) + if add_journal_info: - article.add_journal_metadata() + article.add_journal_metadata(j=journal) # finally, save the new article if not dry_run: diff --git a/portality/tasks/helpers/articles_upload_helper.py b/portality/tasks/helpers/articles_upload_helper.py index 114e4d7a9a..4f1ac4e72a 100644 --- a/portality/tasks/helpers/articles_upload_helper.py +++ b/portality/tasks/helpers/articles_upload_helper.py @@ -84,6 +84,7 @@ def upload_process(articles_upload: BaseArticlesUpload, shared = result.get("shared", []) unowned = result.get("unowned", []) unmatched = result.get("unmatched", []) + before_oa_start_date = result.get("before_oa_start_date", []) if success == 0 and fail > 0 and not ingest_exception: articles_upload.failed("All articles in file failed to import") @@ -99,6 +100,8 @@ def upload_process(articles_upload: BaseArticlesUpload, job.add_audit_message("Shared ISSNs: " + ", ".join(list(shared))) job.add_audit_message("Unowned ISSNs: " + ", ".join(list(unowned))) job.add_audit_message("Unmatched ISSNs: " + ", ".join(list(unmatched))) + if len(before_oa_start_date) > 0: + job.add_audit_message("Articles before OA start date: " + ", ".join(list(before_oa_start_date))) if new: ids = [a.id for a in articles] diff --git a/portality/ui/messages.py b/portality/ui/messages.py index 5702b45aa4..c974419d9b 100644 --- a/portality/ui/messages.py +++ b/portality/ui/messages.py @@ -68,6 +68,7 @@ class Messages(object): EXCEPTION_IDENTICAL_PISSN_AND_EISSN = "The Print and Online ISSNs supplied are identical. If you supply two ISSNs, they must be different." EXCEPTION_NO_ISSNS = "Neither the Print ISSN nor Online ISSN have been supplied. DOAJ requires at least one ISSN." EXCEPTION_INVALID_BIBJSON = "Invalid article bibjson: " # + Dataobj exception message + EXCEPTION_ARTICLE_BEFORE_OA_START_DATE = "Article '{title}' cannot be uploaded before OA start date of the Journal" EXCEPTION_IDENTIFIER_CHANGE_CLASH = "DOI or Fulltext URL has been changed to match another article that already exists in DOAJ" EXCEPTION_IDENTIFIER_CHANGE = "Either the DOI or Fulltext URL has been changed. This operation is not permitted; please contact an administrator for help."