diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8ee9b3549c..a8027abeaa 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,12 +1,10 @@ -# Title <- provide a title for the PR - -*Please don't delete any sections when completing this PR template; instead enter **N/A** for checkboxes or sections which are not applicable, unless otherwise stated below* +* Issue: [enter link to issue here] -See # <- enter link to issue on main board +--- -Describe the scope/purpose of the PR here in as much detail as you like +# Title <- provide a title for the PR -## Categorisation +*briefly describe the PR here* This PR... - [ ] has scripts to run @@ -18,121 +16,52 @@ This PR... - [ ] affects the publisher area - [ ] affects the monitoring -## Basic PR Checklist - -Instructions for developers: -* For each checklist item, if it is N/A to your PR check the N/A box -* For each item that you have done and confirmed for yourself, check Developer box (including if you have checked the N/A box) - -Instructions for reviewers: -* For each checklist item that has been confirmed by the Developer, check the Reviewer box if you agree -* For multiple reviewers, feel free to add your own checkbox with your github username next to it if that helps with review tracking - -### Code Style - -- No deprecated methods are used - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- No magic strings/numbers - all strings are in `constants` or `messages` files - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- ES queries are wrapped in a Query object rather than inlined in the code - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Where possible our common library functions have been used (e.g. dates manipulated via `dates`) - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Cleaned up commented out code, etc - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Urls are constructed with `url_for` not hard-coded - - [ ] N/A - - [ ] Developer - - [ ] Reviewer -### Testing - -- Unit tests have been added/modified - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Functional tests have been added/modified - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Code has been run manually in development, and functional tests followed locally - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Have CSS/style changes been implemented? If they are of a global scope (e.g. on base HTML elements) have the downstream impacts of the change in other areas of the system been considered? - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -### Documentation - -- FeatureMap annotations have been added - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Documentation updates - if needed - have been identified and prepared for inclusion into main documentation (e.g. added and highlighted/commented as appropriate to this PR) - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Core model documentation has been added to if needed: https://docs.google.com/spreadsheets/d/1lun2S9vwGbyfy3WjIjgXBm05D-3wWDZ4bp8xiIYfImM/edit - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Events and consumers documentation has been added if needed: https://docs.google.com/spreadsheets/d/1oIeG5vg-blm2MZCE-7YhwulUlSz6TOUeY8jAftdP9JE/edit - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- The docs for this branch have been generated and pushed to the doc site (see docs/README.md for details) - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - - -### Release Readiness - -- If needed, migration has been created and tested locally - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- Release sheet has been created, and completed as far as is possible https://docs.google.com/spreadsheets/d/1Bqx23J1MwXzjrmAygbqlU3YHxN1Wf7zkkRv14eTVLZQ/edit - - [ ] N/A - - [ ] Developer - - [ ] Reviewer - -- There has been a recent merge up from `develop` (or other base branch). List the dates of the merges up from develop below - - [date of merge up] - +## Developer Checklist + +*Developers should review and confirm each of these items before requesting review* + +* [ ] Code meets acceptance criteria from issue +* [ ] Unit tests are written and all pass +* [ ] User Test Scripts (if required) are written and have been run through +* [ ] Project's coding standards are met + - No deprecated methods are used + - No magic strings/numbers - all strings are in `constants` or `messages` files + - ES queries are wrapped in a Query object rather than inlined in the code + - Where possible our common library functions have been used (e.g. dates manipulated via `dates`) + - Cleaned up commented out code, etc + - Urls are constructed with `url_for` not hard-coded +* [ ] Code documentation and related non-code documentation has all been updated + - Core model documentation has been added to if needed: https://docs.google.com/spreadsheets/d/1lun2S9vwGbyfy3WjIjgXBm05D-3wWDZ4bp8xiIYfImM/edit + - Events and consumers documentation has been added if needed: https://docs.google.com/spreadsheets/d/1oIeG5vg-blm2MZCE-7YhwulUlSz6TOUeY8jAftdP9JE/edit +* [ ] Migation has been created and tested +* [ ] There is a recent merge from `develop` + +## Reviewer Checklist + +*Reviewers should review and confirm each of these items before approval* +*If there are multiple reviewers, this section should be duplicated for each reviewer* + +* [ ] Code meets acceptance criteria from issue +* [ ] Unit tests are written and all pass +* [ ] User Test Scripts (if required) are written and have been run through +* [ ] Project's coding standards are met + - No deprecated methods are used + - No magic strings/numbers - all strings are in `constants` or `messages` files + - ES queries are wrapped in a Query object rather than inlined in the code + - Where possible our common library functions have been used (e.g. dates manipulated via `dates`) + - Cleaned up commented out code, etc + - Urls are constructed with `url_for` not hard-coded +* [ ] Code documentation and related non-code documentation has all been updated + - Core model documentation has been added to if needed: https://docs.google.com/spreadsheets/d/1lun2S9vwGbyfy3WjIjgXBm05D-3wWDZ4bp8xiIYfImM/edit + - Events and consumers documentation has been added if needed: https://docs.google.com/spreadsheets/d/1oIeG5vg-blm2MZCE-7YhwulUlSz6TOUeY8jAftdP9JE/edit +* [ ] Migation has been created and tested +* [ ] There is a recent merge from `develop` ## Testing -List the Functional Tests that must be run to confirm this feature - -1. ... -2. ... - +*List user test scripts that need to be run* +*List any non-unit test scripts that need to be run by reviewers* ## Deployment @@ -161,5 +90,3 @@ What new infrastructure does this PR require (e.g. new services that need to run ### Continuous Integration What CI changes are required for this - - diff --git a/cms/assets/img/sponsors/Logo1.png b/cms/assets/img/sponsors/Logo1.png new file mode 100644 index 0000000000..0aa4ceebaf Binary files /dev/null and b/cms/assets/img/sponsors/Logo1.png differ diff --git a/cms/assets/img/sponsors/igf.png b/cms/assets/img/sponsors/igf.png new file mode 100644 index 0000000000..6a61f6e5c3 Binary files /dev/null and b/cms/assets/img/sponsors/igf.png differ diff --git a/cms/assets/img/sponsors/scup-logo.png b/cms/assets/img/sponsors/scup-logo.png new file mode 100644 index 0000000000..dae5c6dd20 Binary files /dev/null and b/cms/assets/img/sponsors/scup-logo.png differ diff --git a/cms/assets/img/volunteers/Popova-1.jpeg b/cms/assets/img/volunteers/Popova-1.jpeg new file mode 120000 index 0000000000..238ea46773 --- /dev/null +++ b/cms/assets/img/volunteers/Popova-1.jpeg @@ -0,0 +1 @@ +../ambassadors/Popova-1.jpeg \ No newline at end of file diff --git a/cms/assets/img/volunteers/ina-smith.png b/cms/assets/img/volunteers/ina-smith.png new file mode 120000 index 0000000000..878b93f265 --- /dev/null +++ b/cms/assets/img/volunteers/ina-smith.png @@ -0,0 +1 @@ +../ambassadors/ina-smith.png \ No newline at end of file diff --git a/cms/assets/img/volunteers/max.png b/cms/assets/img/volunteers/max.png new file mode 120000 index 0000000000..7cad0ebe9d --- /dev/null +++ b/cms/assets/img/volunteers/max.png @@ -0,0 +1 @@ +../ambassadors/max.png \ No newline at end of file diff --git a/cms/data/publisher-supporters.yml b/cms/data/publisher-supporters.yml index 562e07450e..2011fa06d1 100644 --- a/cms/data/publisher-supporters.yml +++ b/cms/data/publisher-supporters.yml @@ -1,21 +1 @@ # ~~PublisherSupporters:Data~~ -- name: Faculty of Communication, Universitas Tarumanagara - url: https://fikom.untar.ac.id/ - -- name: Gruppo Italiano Frattura - url: http://www.gruppofrattura.it/sito/en/ - -- name: INCAS - National Institute for Aerospace Research “Elie Carafoli”, INCAS Bucuresti - url: https://www.gruppofrattura.it/sito/en/ - -- name: Italian Society of Victimology - url: https://www.vittimologia.it/rivista - -- name: Scandinavian University Press (Universitetsforlaget AS) - url: https://www.universitetsforlaget.no/ - -- name: Scientia Agropecuaria - url: https://revistas.unitru.edu.pe/index.php/scientiaagrop - -- name: Tsinghua University Press - url: https://www.tsinghua.edu.cn/en/ diff --git a/cms/data/sponsors.yml b/cms/data/sponsors.yml index 7ae985bfb5..6d71697ca5 100644 --- a/cms/data/sponsors.yml +++ b/cms/data/sponsors.yml @@ -73,6 +73,10 @@ url: https://www.frontiersin.org/ logo: frontiers.svg +- name: Gruppo Italiano Frattura + url: https://www.gruppofrattura.eu/ + logo: igf.png + - name: Iași University of Life Sciences url: https://iuls.ro/en/ logo: Lasi.png @@ -85,6 +89,10 @@ url: https://www.theiet.org/ logo: iet.svg +- name: INCAS - National Institute for Aerospace Research “Elie Carafoli” + url: https://www.incas.ro/ + logo: Logo1.png + - name: Institute of Physics url: https://www.iop.org/ logo: iop.jpg @@ -137,6 +145,10 @@ url: https://www.sagepublications.com/ logo: sage.svg +- name: Scandinavian University Press + url: https://www.universitetsforlaget.no/en/ + logo: scup-logo.png + - name: SciFree url: https://scifree.se/ logo: scifree.svg diff --git a/cms/data/volunteers.yml b/cms/data/volunteers.yml index 1126b62034..17529e2b69 100644 --- a/cms/data/volunteers.yml +++ b/cms/data/volunteers.yml @@ -364,6 +364,14 @@ ass_ed: language: Indonesian, English photo: "handoko.jpg" +- name: Ina Smith + area: Scholarly Publishing, Instructional Design + year_since: + city: Pretoria + country: South Africa + language: English, Africaans + photo: "ina-smith.png" + - name: Iryna Kuchma area: Humanities, Social Sciences year_since: @@ -545,7 +553,15 @@ ass_ed: country: Poland language: Polish, English photo: "Martyna.JPG" - + +- name: Maxim Mitrofanov + area: International Relations, Political Science + year_since: + city: Moscow + country: Russian Federation + language: Russian, English + photo: "max.png" + - name: Melih Sever area: Social Sciences year_since: @@ -576,6 +592,14 @@ ass_ed: language: Indonesian, English photo: "MuhamadTaufik.jpg" +- name: Natalia Popova + area: Sociology + year_since: + city: Ekaterinburg + country: Russian Federation + language: Russian, English + photo: "Popova-1.jpeg" + - name: Nataliia Kaliuzhna area: Library and Information Science year_since: @@ -584,7 +608,7 @@ ass_ed: language: Ukrainian, Russian, English, Polish featured: true photo: "Nataliia.jpg" - + - name: Natia Gabedava area: Humanities, Education year_since: @@ -707,13 +731,6 @@ ass_ed: city: Tabriz country: Iran language: Persian, Azari, Turkish, English - -- name: Sara Ricetto - area: Scholarly Publishing - year_since: - city: Milan - country: Italy - language: Italian, English, German - name: Shiying Li area: Forensic Science diff --git a/cms/pages/support/publisher-supporters.md b/cms/pages/support/publisher-supporters.md index d661055486..5d0cf13f95 100644 --- a/cms/pages/support/publisher-supporters.md +++ b/cms/pages/support/publisher-supporters.md @@ -66,5 +66,3 @@ Please [contact us](/contact/) to discuss further. ## Supporting publishers, aggregators, and other organizations
{% include '/data/sponsors.html' %}
- -## Other publisher supporters diff --git a/doajtest/unit/test_cookie_consent_own_domain.py b/doajtest/unit/test_cookie_consent_own_domain.py new file mode 100644 index 0000000000..d93458426c --- /dev/null +++ b/doajtest/unit/test_cookie_consent_own_domain.py @@ -0,0 +1,44 @@ +from doajtest.helpers import DoajTestCase +from urllib.parse import quote_plus, urlparse + + +class TestCookieConsent(DoajTestCase): + + def test_01_cookie_consent_permitted_domains(self): + """ Ensure we only redirect to our own domain via cookie consent """ + + with self.app_test.test_client() as t_client: + # Ensure only relative redirects are permitted + empty_redirect = t_client.get('/cookie_consent') + assert empty_redirect.status_code == 200 + + permitted_redirect = t_client.get('/cookie_consent?continue=%2Farticle%2Fuuid') + assert permitted_redirect.status_code == 302 + assert permitted_redirect.location == '/article/uuid' + + permitted_redirect_params = t_client.get('/cookie_consent?continue=' + quote_plus('/apply?errors=numerous')) + assert permitted_redirect_params.status_code == 302 + assert permitted_redirect_params.location == '/apply?errors=numerous' + + def test_02_cookie_consent_invalid_domains(self): + """ Any redirect to another domain is rejected via cookie consent """ + + with self.app_test.test_client() as t_client: + invalid_redirect = t_client.get( + '/cookie_consent?continue=https%3A%2F%2Fa_nasty_phishing_site.com%2Femailform%3Fdeeds%3Devil') + assert invalid_redirect.status_code == 400 + + # The best we can do - a redirect that looks like a path should try to serve from our domain, fail with 404 + invalid_redirect_no_scheme = t_client.get( + '/cookie_consent?continue=a_nasty_phishing_site.com%2Femailform%3Fdeeds%3Devil') + assert invalid_redirect_no_scheme.status_code == 302 + assert not invalid_redirect_no_scheme.location.startswith('http') + assert urlparse(invalid_redirect_no_scheme.location).path == 'a_nasty_phishing_site.com/emailform' + assert urlparse(invalid_redirect_no_scheme.location).netloc == '' + + invalid_redirect_ip = t_client.get( + '/cookie_consent?continue=1.2.3.4%2Femailform%3Fdeeds%3Devil') + assert invalid_redirect_ip.status_code == 302 + assert not invalid_redirect_ip.location.startswith('http') + assert urlparse(invalid_redirect_ip.location).path == '1.2.3.4/emailform' + assert urlparse(invalid_redirect_ip.location).netloc == '' diff --git a/doajtest/unit/test_task_datalog_journal_added_update.py b/doajtest/unit/test_task_datalog_journal_added_update.py index 46d5bff5c1..c887e8232a 100644 --- a/doajtest/unit/test_task_datalog_journal_added_update.py +++ b/doajtest/unit/test_task_datalog_journal_added_update.py @@ -22,19 +22,16 @@ DatalogJournalAdded(title='titlec', issn='1234-3000', date_added='2021-01-01', - has_seal=True, has_continuations=True, ), DatalogJournalAdded(title='titleb', issn='1234-2000', date_added='2021-01-01', - has_seal=True, has_continuations=True, ), DatalogJournalAdded(title='titlea', issn='1234-1000', date_added='2020-01-01', - has_seal=True, has_continuations=True, ), ] @@ -94,20 +91,18 @@ def test_find_new_xlsx_rows(self): ] def test_to_display_data(self): - assert ['titleg', '1234-7000', '01-January-2222', 'Seal', 'Yes', ] == to_display_data( + assert ['titleg', '1234-7000', '01-January-2222', 'Yes', ] == to_display_data( DatalogJournalAdded(title='titleg', issn='1234-7000', date_added='2222-01-01', - has_seal=True, has_continuations=True, ), ) - assert ['titlexxx', '1234-9999', '02-January-2222', '', ''] == to_display_data( + assert ['titlexxx', '1234-9999', '02-January-2222', ''] == to_display_data( DatalogJournalAdded(title='titlexxx', issn='1234-9999', date_added='2222-01-02', - has_seal=False, has_continuations=False, ), ) diff --git a/portality/app.py b/portality/app.py index eb606c42cf..2b6e5115a2 100644 --- a/portality/app.py +++ b/portality/app.py @@ -67,16 +67,13 @@ if 'api3' in app.config['FEATURES']: from portality.view.api_v3 import blueprint as api_v3 app.register_blueprint(api_v3, name='api_v3', url_prefix='/api/v3') # ~~-> APIv3:Blueprint~~ - # Remove this when we move to API v4 if app.config.get("CURRENT_API_MAJOR_VERSION") == "3": app.register_blueprint(api_v3, name='api', url_prefix='/api') if 'api4' in app.config['FEATURES']: from portality.view.api_v4 import blueprint as api_v4 app.register_blueprint(api_v4, name='api_v4', url_prefix='/api/v4') # ~~-> APIv4:Blueprint~~ - # uncomment this when we want API v4 to become the current API if app.config.get("CURRENT_API_MAJOR_VERSION", "4") == "4": - app.register_blueprint(api_v3, name='api', url_prefix='/api') - # app.register_blueprint(api_v4, name='api', url_prefix='/api') # ~~-> APIv4:Blueprint~~ + app.register_blueprint(api_v4, name='api', url_prefix='/api') app.register_blueprint(status, name='status', url_prefix='/status') # ~~-> Status:Blueprint~~ app.register_blueprint(status, name='_status', url_prefix='/_status') diff --git a/portality/migrate/3829_remove_seal_column/README.md b/portality/migrate/3829_remove_seal_column/README.md new file mode 100644 index 0000000000..13b52aa38f --- /dev/null +++ b/portality/migrate/3829_remove_seal_column/README.md @@ -0,0 +1,7 @@ +# 2024-03-21; Issue 3829 - Remove seal column + +## Execution + +Run the migration with + + python portality/upgrade.py -u portality/migrate/3829_remove_seal_column/migrate.json \ No newline at end of file diff --git a/portality/migrate/3829_remove_seal_column/__init__.py b/portality/migrate/3829_remove_seal_column/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/migrate/3829_remove_seal_column/migrate.json b/portality/migrate/3829_remove_seal_column/migrate.json new file mode 100644 index 0000000000..57bf924be0 --- /dev/null +++ b/portality/migrate/3829_remove_seal_column/migrate.json @@ -0,0 +1,14 @@ +{ + "batch": 10000, + "types": [ + { + "type": "datalog_journal_added", + "init_with_model": false, + "action": "index", + "keepalive": "20m", + "functions" : [ + "portality.migrate.3829_remove_seal_column.operations.remove_has_seal" + ] + } + ] +} \ No newline at end of file diff --git a/portality/migrate/3829_remove_seal_column/operations.py b/portality/migrate/3829_remove_seal_column/operations.py new file mode 100644 index 0000000000..78f1e8685a --- /dev/null +++ b/portality/migrate/3829_remove_seal_column/operations.py @@ -0,0 +1,5 @@ +def remove_has_seal(obj): + if 'has_seal' in obj: + print(f'update record {obj}') + del obj['has_seal'] + return obj diff --git a/portality/models/datalog_journal_added.py b/portality/models/datalog_journal_added.py index dfea539840..51bb22d424 100644 --- a/portality/models/datalog_journal_added.py +++ b/portality/models/datalog_journal_added.py @@ -14,7 +14,6 @@ class DatalogJournalAdded(SeamlessMixin, DomainObject): "title": {"coerce": "unicode"}, "issn": {"coerce": "unicode"}, "date_added": {"coerce": "utcdatetime-datalog"}, - "has_seal": {"coerce": "bool"}, "has_continuations": {"coerce": "bool"}, "journal_id": {"coerce": "unicode"}, "created_date": {"coerce": "utcdatetime"}, @@ -63,14 +62,6 @@ def date_added(self, val): def date_added_str(self): return self.date_added.strftime(self.DATE_FMT) - @property - def has_seal(self): - return self.__seamless__.get_single("has_seal") - - @has_seal.setter - def has_seal(self, val): - self.__seamless__.set_single('has_seal', val) - @property def has_continuations(self): return self.__seamless__.get_single("has_continuations") diff --git a/portality/scripts/3918_received_app_by_country_and_year/__init__.py b/portality/scripts/3918_received_app_by_country_and_year/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/scripts/3918_received_app_by_country_and_year/script.py b/portality/scripts/3918_received_app_by_country_and_year/script.py new file mode 100644 index 0000000000..29d43ccd2a --- /dev/null +++ b/portality/scripts/3918_received_app_by_country_and_year/script.py @@ -0,0 +1,84 @@ +from portality import models +from portality.bll import exceptions +import csv +from datetime import datetime + +QUERY = { + "track_total_hits": True, + "size": 0, + "query": { + "bool": { + "must": [ + { + "term": { + "admin.application_type.exact": "new_application" + } + } + ], + "filter": [ + { + "range": { + "created_date": { + "gte": "2019-01-01T00:00:00Z", + "lte": "2023-12-31T23:59:59Z" + } + } + } + ] + } + }, + "aggs": { + "applications_by_country": { + "aggs": { + "applications_by_year": { + "date_histogram": { + "field": "created_date", + "calendar_interval": "year", + "format": "yyyy", + "min_doc_count": 0 + } + } + }, + "terms": { + "field": "index.country.exact", + "size": 1000 + } + } + } +} + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--out", help="output file", required=True) + args = parser.parse_args() + + # Initialize CSV writers for both reports + with open(args.out, "w", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + writer.writerow(["Country", "Year", "Count"]) + + res = models.Application.send_query(QUERY) + country_buckets = res["aggregations"]["applications_by_country"]["buckets"] + + def get_country(country_bucket): + return country_bucket["key"] + + def get_years_data(country_bucket): + return country_bucket["applications_by_year"]["buckets"] + + def get_year(year_bucket): + return year_bucket["key_as_string"] + + def get_count(year_bucket): + return year_bucket["doc_count"] + + + for country_bucket in country_buckets: + years_buckets = get_years_data(country_bucket) + for years_bucket in years_buckets: + writer.writerow([get_country(country_bucket), get_year(years_bucket), get_count(years_bucket)]) + + print("Reports generated successfully.") diff --git a/portality/scripts/githubpri/github_prioritisation.py b/portality/scripts/githubpri/github_prioritisation.py index 644a5f2650..88529c88c6 100644 --- a/portality/scripts/githubpri/github_prioritisation.py +++ b/portality/scripts/githubpri/github_prioritisation.py @@ -1,24 +1,28 @@ +""" +main script of team priorities sheet generation +""" + import logging import os import sys +from collections import OrderedDict import pandas as pd from gspread.utils import ValueInputOption from portality.lib import gsheet -from portality.scripts.githubpri import pri_data_serv, gdrive_sheet_serv -from portality.scripts.githubpri.gdrive_sheet_serv import create_or_load_worksheet -from collections import OrderedDict +from portality.scripts.githubpri import pridata, pri_gsheets, github_utils +from portality.scripts.githubpri.pri_gsheets import create_or_load_worksheet log = logging.getLogger(__name__) def to_ordered_df_by_user_pri_map(user_pri_map): user_pri_map = user_pri_map.copy() - claimable_df = user_pri_map.pop(pri_data_serv.DEFAULT_USER, None) + claimable_df = user_pri_map.pop(pridata.DEFAULT_USER, None) user_pri_map = OrderedDict(sorted(user_pri_map.items(), key=lambda x: x[0].lower())) if claimable_df is not None: - user_pri_map[pri_data_serv.DEFAULT_USER] = claimable_df + user_pri_map[pridata.DEFAULT_USER] = claimable_df return pd.concat(user_pri_map, axis=1) @@ -28,8 +32,8 @@ def priorities(priorities_file, gdrive_filename=None, github_username=None, github_password_key=None, ): - sender = pri_data_serv.GithubReqSender(username=github_username, password_key=github_password_key) - user_pri_map = pri_data_serv.create_priorities_excel_data(priorities_file, sender) + sender = github_utils.GithubReqSender(token_password=github_password_key, username=github_username) + user_pri_map = pridata.create_priorities_excel_data(priorities_file, sender) if outfile is not None: to_ordered_df_by_user_pri_map(user_pri_map).to_csv(outfile) @@ -66,7 +70,7 @@ def priorities(priorities_file, cell.value = f'=HYPERLINK("{link}", "{title}")' worksheet.update_cells(cells, ValueInputOption.user_entered) - gdrive_sheet_serv.apply_prilist_styles(worksheet, display_df) + pri_gsheets.apply_prilist_styles(worksheet, display_df) print(f'[End] update google sheet [{gdrive_filename}]') @@ -80,13 +84,27 @@ def main(): description = """ Generate a excel for DOAJ github issue prioritisation queue for each user +Environment variables +--------------------- you need github and google drive api key to run this script: -* `DOAJ_PRILIST_KEY_PATH` is json file path for google drive api - the `DOAJ_PRILIST_KEY_PATH` json file generated by `console.cloud.google.com` and should be enabled for - * google drive api - * google sheet api -* `DOAJ_GITHUB_KEY` is github api key, this key is optional, if not provided, you can input github password instead by -p option - """ + +DOAJ_PRILIST_KEY_PATH + `DOAJ_PRILIST_KEY_PATH` is path of json file path for google drive api, which generated by + `console.cloud.google.com` and should be enabled for + * google drive api + * google sheet api + + +DOAJ_GITHUB_KEY + `DOAJ_GITHUB_KEY` is github api key, this key is optional, if not provided, you can input github + password instead by -p option + + +Example +--------------------- +github_prioritisation -g 'test-githubpri' -r '/tmp/githubpri-rule.csv' + +""" parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-u", "--username", diff --git a/portality/scripts/githubpri/github_utils.py b/portality/scripts/githubpri/github_utils.py new file mode 100644 index 0000000000..3a0add3e66 --- /dev/null +++ b/portality/scripts/githubpri/github_utils.py @@ -0,0 +1,286 @@ +""" +functions to interact with "Github" for githubpri +""" +from __future__ import annotations + +import functools +import logging +import os +import warnings +from typing import Union, Iterable, TypedDict + +import requests +from requests import Response +from requests.auth import HTTPBasicAuth + +URL_API = "https://api.github.com" + +AuthLike = Union[dict, tuple, HTTPBasicAuth, None] + +log = logging.getLogger(__name__) + + +class GithubReqSender: + def __init__(self, token_password, username=None): + """ + + Parameters + ---------- + token_password + password of username or github api key + username + """ + if token_password is None: + raise ValueError("api_key or password must be provided") + self.username_password = (username, token_password) + + self.url_json_cache = {} + + def get(self, url, **req_kwargs) -> Response: + warnings.warn("use send instead of get", DeprecationWarning) + return send_request(url, auth=self.username_password, **req_kwargs) + + def send(self, url, method='get', **req_kwargs) -> Response: + return send_request(url, method=method, auth=self.username_password, **req_kwargs) + + def query_graphql(self, query: str) -> dict: + return self.send("https://api.github.com/graphql", method='post', json={'query': query}).json() + + @functools.lru_cache(maxsize=102400) + def send_cached_json(self, url): + if url in self.url_json_cache: + return self.url_json_cache[url] + + result = self.send(url).json() + self.url_json_cache[url] = result + return result + + def yield_all(self, url, params=None, n_per_page=100) -> Iterable[dict]: + return yields_all(url, auth=self.username_password, params=params, n_per_page=n_per_page) + + def __hash__(self): + return hash(self.username_password) + + def __eq__(self, other): + if not isinstance(other, GithubReqSender): + return False + return self.__hash__() == other.__hash__() + + +def send_request(url, method='get', + auth: AuthLike = None, + **req_kwargs) -> Response: + final_req_kwargs = {} + auth = create_auth(auth) + if auth is not None: + final_req_kwargs = {'auth': auth} + final_req_kwargs.update(req_kwargs) + resp = requests.request(method, url, **final_req_kwargs) + if resp.status_code >= 400: + raise ConnectionError(f'Something wrong in api response: {resp.status_code} {resp.text}') + return resp + + +def create_auth(auth: AuthLike) -> HTTPBasicAuth | None: + """ + + Parameters + ---------- + auth + accept HTTPBasicAuth, Tuple[username, password], Dict or None + + Returns + ------- + HTTPBasicAuth + + """ + + if auth is not None: + if isinstance(auth, tuple): + auth = HTTPBasicAuth(*auth) + if isinstance(auth, dict): + auth = HTTPBasicAuth(auth['username'], auth['password']) + return auth + + +def get_projects(full_name, auth: AuthLike) -> list[dict]: + """ + + Parameters + ---------- + full_name + owner/repo_name -- e.g. 'DOAJ/doajPM' + auth + + Returns + ------- + + """ + url = f'{URL_API}/repos/{full_name}/projects' + resp = send_request(url, auth=auth) + project_list = resp.json() + return project_list + + +def get_project(full_name, project_name, auth: AuthLike) -> dict | None: + project_list = get_projects(full_name, auth) + names = [p for p in project_list if p.get("name") == project_name] + if len(names) == 0: + return None + if len(names) > 1: + log.warning(f"Multiple projects found: {project_name}") + return names[0] + + +def yields_all(url, auth: AuthLike, params=None, n_per_page=100) -> Iterable[dict]: + final_params = {"per_page": n_per_page, "page": 1} + if params is not None: + final_params.update(params) + + while True: + items = send_request(url, params=final_params, auth=auth).json() + yield from items + if len(items) < n_per_page: + break + + final_params["page"] += 1 + + +@functools.lru_cache(maxsize=102400) +def get_column_issues(columns_url, col, sender: GithubReqSender): + print(f"Fetching column issues {col}") + col_data = sender.send_cached_json(columns_url) + column_records = [c for c in col_data if c.get("name") == col] + if len(column_records) == 0: + log.warning(f"Column not found: {col}") + return [] + if len(column_records) > 1: + log.warning(f"Multiple columns found: {col}") + + issues = [] + for card_data in sender.yield_all(column_records[0].get("cards_url")): + issue_data = sender.send(card_data.get("content_url")).json() + issues.append(issue_data) + + print("Column issues {x}".format(x=[i.get("number") for i in issues])) + return issues + + +class Issue(TypedDict): + number: int + title: str + status: str + url: str + assignees: list[str] + label_names: list[str] + + +def find_all_issues(owner, repo, project_number, sender: GithubReqSender) -> Iterable[Issue]: + query_template = """ + { + repository(owner: "%s", name: "%s") { + projectV2(number: %s) { + url + title + items(first: 100, after: AFTER_CURSOR) { + pageInfo { + endCursor + hasNextPage + } + nodes { + content { + ... on Issue { + id + number + title + state + url + stateReason + labels (first:100) { + nodes { + name + } + } + assignees(first: 100) { + nodes { + name + login + } + } + } + } + fieldValues(first: 100) { + nodes { + ... on ProjectV2ItemFieldSingleSelectValue { + name + field { + ... on ProjectV2SingleSelectField { + name + } + } + } + } + } + } + } + } + } + } + """ % (owner, repo, project_number) + + # Function to fetch all items with pagination + def fetch_all_items(): + # all_projects = [] + after_cursor = None + while True: + # Replace AFTER_CURSOR placeholder in the query template + query = query_template.replace("AFTER_CURSOR", f'"{after_cursor}"' if after_cursor else "null") + + data = sender.query_graphql(query) + + # Process the data + project = data['data']['repository']['projectV2'] + items = project['items']['nodes'] + yield from items + # print(f'items: {len(items)}') + # all_projects.extend(items) + + # Check if there are more pages + page_info = project['items']['pageInfo'] + if page_info['hasNextPage']: + after_cursor = page_info['endCursor'] + else: + break + + def _to_issue(item): + content = item['content'] + return Issue( + number=content['number'], + title=content['title'], + url=content['url'], + assignees=[a['login'] for a in content['assignees']['nodes']], + status=next((f['name'] for f in item['fieldValues']['nodes'] + if f and f['field']['name'] == 'Status'), None), + label_names=[l['name'] for l in content['labels']['nodes']], + ) + + # Fetch all items + all_items = fetch_all_items() + all_items = (i for i in all_items if i['content']) + return map(_to_issue, all_items) + # + # # Filter to include only issues + # issues = [item['content'] for item in all_items if 'id' in item['content']] + # for issue in issues: + # print(f"Issue ID: {issue['id']}, Title: {issue['title']}, State: {issue['state']}, URL: {issue['url']}, " + # f"State Reason: {issue['stateReason']}") + + +def main(): + sender = GithubReqSender(os.environ.get('DOAJ_GITHUB_KEY')) + for i in find_all_issues(sender): + print(i) + + +if __name__ == '__main__': + main() diff --git a/portality/scripts/githubpri/pri_data_serv.py b/portality/scripts/githubpri/pri_data_serv.py deleted file mode 100644 index b0480aed90..0000000000 --- a/portality/scripts/githubpri/pri_data_serv.py +++ /dev/null @@ -1,199 +0,0 @@ -import csv -import json -import os -from collections import defaultdict -from typing import TypedDict, List, Dict - -import pandas as pd -import requests -from requests.auth import HTTPBasicAuth - -REPO = "https://api.github.com/repos/DOAJ/doajPM/" -PROJECTS = REPO + "projects" -PROJECT_NAME = "DOAJ Kanban" -DEFAULT_COLUMNS = ["Review", "In progress", "To Do"] -HEADERS = {"Accept": "application/vnd.github+json"} - -DEFAULT_USER = 'Claimable' - - -class GithubReqSender: - def __init__(self, username=None, password_key=None): - """ - :param password_key: - password of username or github api key - """ - self.username = username - self.password_key = password_key - if self.password_key is None: - raise ValueError("api_key or password must be provided") - - def create_github_request_kwargs(self) -> dict: - req_kwargs = {'headers': dict(HEADERS)} - req_kwargs['auth'] = HTTPBasicAuth(self.username, self.password_key) - return req_kwargs - - def get(self, url, **req_kwargs): - final_req_kwargs = self.create_github_request_kwargs() - final_req_kwargs.update(req_kwargs) - return requests.get(url, **final_req_kwargs) - - -class Rule(TypedDict): - id: str - labels: List[str] - columns: List[str] - - -class PriIssue(TypedDict): - rule_id: str - title: str - issue_url: str - - -class GithubIssue(TypedDict): - api_url: str - issue_number: str - status: str # e.g. 'To Do', 'In progress', 'Review' - title: str - - -def load_rules(rules_file) -> List[Rule]: - if not os.path.exists(rules_file): - raise FileNotFoundError(f"Rules file [{rules_file}] not found") - with open(rules_file, "r") as f: - reader = csv.DictReader(f) - rules = [] - for row in reader: - rules.append({ - "id": row["id"], - "labels": [l.strip() for l in row["labels"].split(",") if l.strip() != ""], - "columns": [c.strip() for c in row["columns"].split(",") if c.strip() != ""] - }) - return rules - - -def create_priorities_excel_data(priorities_file, sender: GithubReqSender) -> Dict[str, pd.DataFrame]: - """ - - ENV VARIABLE `DOAJ_GITHUB_KEY` will be used if username and password are not provided - - :param priorities_file: - :param username: - :param password: - :return: - """ - resp = sender.get(PROJECTS) - if resp.status_code >= 400: - raise ConnectionError(f'Error fetching github projects: {resp.status_code} {resp.text}') - project_list = resp.json() - project = [p for p in project_list if p.get("name") == PROJECT_NAME][0] - user_priorities = defaultdict(list) - for priority in load_rules(priorities_file): - print("Applying rule {x}".format(x=json.dumps(priority))) - issues_by_user = _issues_by_user(project, priority, sender) - print("Unfiltered matches for rule {x}".format(x=issues_by_user)) - for user, issues in issues_by_user.items(): - issues: List[GithubIssue] - pri_issues = [PriIssue(rule_id=priority.get("id", 1), - title='[{}] {}'.format(github_issue['issue_number'], github_issue['title']), - issue_url=_ui_url(github_issue['api_url']), - status=github_issue['status'], - ) - for github_issue in issues] - pri_issues = [i for i in pri_issues if - i['issue_url'] not in {u['issue_url'] for u in user_priorities[user]}] - print("Novel issues for rule for user {x} {y}".format(x=user, y=pri_issues)) - user_priorities[user] += pri_issues - - df_list = {} - for user, pri_issues in user_priorities.items(): - df_list[user] = pd.DataFrame(pri_issues) - - return df_list - - -def _issues_by_user(project, priority, sender) -> Dict[str, List[GithubIssue]]: - cols = priority.get("columns", []) - if len(cols) == 0: - cols = DEFAULT_COLUMNS - - user_issues = defaultdict(list) - for status_col in cols: - column_issues = _get_column_issues(project, status_col, sender) - labels = priority.get("labels", []) - if len(labels) == 0: - _split_by_user(user_issues, column_issues, status_col) - continue - - labelled_issues = _filter_issues_by_label(column_issues, labels) - _split_by_user(user_issues, labelled_issues, status_col) - - return user_issues - - -COLUMN_CACHE = {} - - -def _get_column_issues(project, col, sender: GithubReqSender): - if col in COLUMN_CACHE: - return COLUMN_CACHE[col] - - print("Fetching column issues {x}".format(x=col)) - cols_url = project.get("columns_url") - resp = sender.get(cols_url) - col_data = resp.json() - - column_record = [c for c in col_data if c.get("name") == col][0] - cards_url = column_record.get("cards_url") - - params = {"per_page": 100, "page": 1} - issues = [] - - while True: - resp = sender.get(cards_url, params=params) - cards_data = resp.json() - if len(cards_data) == 0: - break - params["page"] += 1 - - for card_data in cards_data: - content_url = card_data.get("content_url") - resp = sender.get(content_url) - issue_data = resp.json() - issues.append(issue_data) - - COLUMN_CACHE[col] = issues - print("Column issues {x}".format(x=[i.get("url") for i in issues])) - return issues - - -def _filter_issues_by_label(issues, labels): - filtered = [] - for issue in issues: - issue_labels = issue.get("labels", []) - label_names = [l.get("name") for l in issue_labels] - found = 0 - for label in labels: - if label in label_names: - found += 1 - if found == len(labels): - filtered.append(issue) - return filtered - - -def _split_by_user(registry: defaultdict, issues: dict, status: str): - for issue in issues: - assignees = issue.get("assignees") - assignees = [a.get("login") for a in assignees] if assignees else [DEFAULT_USER] - github_issue = GithubIssue(api_url=issue.get("url"), - issue_number=issue.get("number"), - status=status, - title=issue.get("title"), - ) - for assignee in assignees: - registry[assignee].append(github_issue) - - -def _ui_url(api_url): - return "https://github.com/" + api_url[len("https://api.github.com/repos/"):] diff --git a/portality/scripts/githubpri/gdrive_sheet_serv.py b/portality/scripts/githubpri/pri_gsheets.py similarity index 97% rename from portality/scripts/githubpri/gdrive_sheet_serv.py rename to portality/scripts/githubpri/pri_gsheets.py index 49a9277c47..c8d584305d 100644 --- a/portality/scripts/githubpri/gdrive_sheet_serv.py +++ b/portality/scripts/githubpri/pri_gsheets.py @@ -1,3 +1,7 @@ +""" +functions to interact with "Google Drive Sheets" for githubpri +""" + import datetime import gspread @@ -26,7 +30,6 @@ def apply_prilist_styles(worksheet, display_df): latest_username = username gs_col_idx = col_idx + 1 - cells = worksheet.range(3, gs_col_idx, len(titles) + 3, gs_col_idx) gspfmt.format_cell_range(worksheet, range_idx_to_a1(1, gs_col_idx, n_row + 2, gs_col_idx), cell_format=gspfmt.CellFormat( diff --git a/portality/scripts/githubpri/pridata.py b/portality/scripts/githubpri/pridata.py new file mode 100644 index 0000000000..447c87a39d --- /dev/null +++ b/portality/scripts/githubpri/pridata.py @@ -0,0 +1,145 @@ +""" +functions and logic of priority data +core logic of githubpri +extract data from Github and convert to priority order format +""" + +from __future__ import annotations + +import csv +import json +import logging +import os +from collections import defaultdict +from typing import TypedDict, Iterable + +import pandas as pd + +from portality.scripts.githubpri import github_utils +from portality.scripts.githubpri.github_utils import GithubReqSender, Issue + +PROJECT_NAME = "DOAJ Kanban" +DEFAULT_COLUMNS = ["Review", "In progress", "To Do"] + +DEFAULT_USER = 'Claimable' + +log = logging.getLogger(__name__) + + +class Rule(TypedDict): + id: str + labels: list[str] + columns: list[str] + + +class PriIssue(TypedDict): + rule_id: str + title: str + issue_url: str + status: str + + +class GithubIssue(TypedDict): + url: str + issue_number: str + status: str # e.g. 'To Do', 'In progress', 'Review' + title: str + assignees: list[str] + + +def load_rules(rules_file) -> list[Rule]: + if not os.path.exists(rules_file): + raise FileNotFoundError(f"Rules file [{rules_file}] not found") + with open(rules_file) as f: + reader = csv.DictReader(f) + rules = [] + for row in reader: + rules.append({ + "id": row["id"], + "labels": [l.strip() for l in row["labels"].split(",") if l.strip() != ""], + "columns": [c.strip() for c in row["columns"].split(",") if c.strip() != ""] + }) + return rules + + +def create_priorities_excel_data(priorities_file, sender: GithubReqSender) -> dict[str, pd.DataFrame]: + """ + ENV VARIABLE `DOAJ_GITHUB_KEY` will be used if username and password are not provided + + Parameters + ---------- + priorities_file + sender + + Returns + ------- + dict mapping 'username' to 'priority dataframe' + """ + github_owner = 'DOAJ' + github_repo = 'doajPM' + project_number = 8 + + print(f'Find issues from {github_owner}/{github_repo} project[{project_number}]') + print(f'Project url: http://github.com/orgs/{github_owner}/projects/{project_number}') + + all_issues = github_utils.find_all_issues(owner=github_owner, repo=github_repo, project_number=project_number, + sender=sender) + all_issues = list(all_issues) + print(f'Number of issues found: [{len(all_issues)}]') + + user_priorities = defaultdict(list) + for priority in load_rules(priorities_file): + print(f"Applying rule [{json.dumps(priority)}]") + issues_by_user = _issues_by_user(all_issues, priority) + print("Unfiltered matches for rule: [{}]".format( + sorted(i.get("issue_number") for user, issues in issues_by_user.items() for i in issues) + )) + + for user, issues in issues_by_user.items(): + issues: list[GithubIssue] + pri_issues = [PriIssue(rule_id=priority.get("id", 1), + title='[{}] {}'.format(github_issue['issue_number'], github_issue['title']), + issue_url=github_issue['url'], + status=github_issue['status'], + ) + for github_issue in issues] + pri_issues = [i for i in pri_issues if + i['issue_url'] not in {u['issue_url'] for u in user_priorities[user]}] + for i in pri_issues: + print(' * [{}]{}'.format(user, i.get('title'))) + user_priorities[user] += pri_issues + + df_list = {} + for user, pri_issues in user_priorities.items(): + df_list[user] = pd.DataFrame(pri_issues) + + return df_list + + +def _issues_by_user(issues: Iterable[Issue], priority) -> dict[str, list[GithubIssue]]: + cols = priority.get("columns", []) or DEFAULT_COLUMNS + + user_issues = defaultdict(list) + for status_col in cols: + status_issues = (issue for issue in issues if issue.get("status") == status_col) + labels = priority.get("labels", []) + if labels: + status_issues = (issue for issue in status_issues + if set(issue.get('label_names', [])).issuperset(set(labels))) + + status_issues = map(to_github_issue, status_issues) + for issue in status_issues: + for assignee in issue['assignees']: + user_issues[assignee].append(issue) + + return user_issues + + +def to_github_issue(issue: Issue): + github_issue = GithubIssue(url=issue.get("url"), + issue_number=issue.get("number"), + status=issue['status'], + title=issue.get("title"), + assignees=issue['assignees'] or [DEFAULT_USER] + ) + return github_issue diff --git a/portality/scripts/journal_urls.py b/portality/scripts/journal_urls.py index 3f1e8229aa..0d8b5c3514 100644 --- a/portality/scripts/journal_urls.py +++ b/portality/scripts/journal_urls.py @@ -51,7 +51,9 @@ def get_csv_file_name(): def extra_columns(j): """Add extra columns""" - return [('Journal ID', j.id)] + account = j.owner_account + return [('Journal ID', j.id), ('Account Name', account.name if account else ''), + ('Account Email', account.email if account else '')] def generate_journals_csv(csv_file): diff --git a/portality/scripts/link_checker_report.py b/portality/scripts/link_checker_report.py index 5bc1c922c5..ce1dec09af 100644 --- a/portality/scripts/link_checker_report.py +++ b/portality/scripts/link_checker_report.py @@ -103,8 +103,8 @@ def fetch_matching_rows(journal_url_index, report_values): # df_result_selected_columns = df_result[columns].copy() # create a copy to avoid SettingWithCopyWarning df_result_selected_columns = pd.DataFrame( data=[list(journal_data)], - columns=['Journal title', 'Added on Date', 'Last updated Date', "Journal ID", "Publisher", - "Country of publisher" ] + columns=['Journal title', 'Added on Date', 'Last updated Date', "Journal ID", 'Account Name', + 'Account Email', "Publisher", "Country of publisher" ] ) jid = df_result_selected_columns["Journal ID"].values[0] @@ -141,7 +141,7 @@ def _index_journals(df): # FIXME: assumes each URL only appears once if isinstance(cell, str) and cell.startswith("http"): # make an index of the URL to the journal title, added date, updated date and journal id - jidx[cell] = (row[0], row[50], row[51], row[54], row[9], row[10]) + jidx[cell] = (row[0], row[50], row[51], row[54], row[55], row[56], row[9], row[10]) return jidx @@ -192,8 +192,8 @@ def generate_report(csv_files, journal_csv_file): journal_url_index = _index_journals(journal_df) log("Indexed journal urls") - master_df = pd.DataFrame(columns=['Journal title', 'Added on Date', 'Last updated Date', "Journal ID", "Publisher", - "Country of publisher",]) + master_df = pd.DataFrame(columns=['Journal title', 'Added on Date', 'Last updated Date', "Journal ID", + 'Account Name', 'Account Email', "Publisher", "Country of publisher",]) for csv_file in csv_files: df = pd.read_csv(csv_file) log("Checking file {x}".format(x=csv_file)) diff --git a/portality/settings.py b/portality/settings.py index 7ec7ac799a..3b13f3e64a 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -9,8 +9,8 @@ # Application Version information # ~~->API:Feature~~ -DOAJ_VERSION = "6.7.3" -API_VERSION = "3.0.1" +DOAJ_VERSION = "6.8.1" +API_VERSION = "4.0.0" ###################################### # Deployment configuration @@ -67,7 +67,7 @@ ELASTIC_SEARCH_TEST_DB_PREFIX = "doajtest-" INITIALISE_INDEX = True # whether or not to try creating the index and required index types on startup -ELASTIC_SEARCH_VERSION = "1.7.5" +ELASTIC_SEARCH_VERSION = "7.10.2" ELASTIC_SEARCH_SNAPSHOT_REPOSITORY = None ELASTIC_SEARCH_SNAPSHOT_TTL = 366 @@ -149,7 +149,7 @@ # ~~->API:Feature~~ BASE_API_URL = "https://doaj.org/api/" API_CURRENT_BLUEPRINT_NAME = "api_v4" # change if upgrading API to new version and creating new view -CURRENT_API_MAJOR_VERSION = "3" +CURRENT_API_MAJOR_VERSION = "4" # URL used for the journal ToC URL in the journal CSV export # NOTE: must be the correct route as configured in view/doaj.py diff --git a/portality/tasks/datalog_journal_added_update.py b/portality/tasks/datalog_journal_added_update.py index cecd3721bd..1d967b4ee3 100644 --- a/portality/tasks/datalog_journal_added_update.py +++ b/portality/tasks/datalog_journal_added_update.py @@ -1,3 +1,14 @@ +""" +Background job that automatically populate a Google sheet that shows accepted journals. + +about how to setup google sheet API key for testing, please reference to +`Setup google API key for google sheet` in `how-to-setup.md` + +References +* [Origin](https://github.com/DOAJ/doajPM/issues/2810) +* [No longer display Seal](https://github.com/DOAJ/doajPM/issues/3829) +""" + import datetime import itertools import logging @@ -61,7 +72,6 @@ def to_datalog_journal_added(journal: Journal) -> DatalogJournalAdded: bibjson = journal.bibjson() title = bibjson.title issn = bibjson.eissn or bibjson.pissn - has_seal = journal.has_seal() try: has_continuations = any([journal.get_future_continuations() + journal.get_past_continuations()]) except RecursionError: @@ -69,7 +79,6 @@ def to_datalog_journal_added(journal: Journal) -> DatalogJournalAdded: record = DatalogJournalAdded(title=title, issn=issn, date_added=journal.created_timestamp, - has_seal=has_seal, has_continuations=has_continuations, journal_id=journal.id) @@ -151,7 +160,6 @@ def find_new_xlsx_rows(last_issn, page_size=400) -> list: def to_display_data(datalog: DatalogJournalAdded) -> list: return [datalog.title, datalog.issn, dates.reformat(datalog.date_added, out_format=DatalogJournalAdded.DATE_FMT), - 'Seal' if datalog.has_seal else '', 'Yes' if datalog.has_continuations else ''] diff --git a/portality/templates/api/v3/api_docs.html b/portality/templates/api/v3/api_docs.html index 9ef29972ec..fc95ca6569 100644 --- a/portality/templates/api/v3/api_docs.html +++ b/portality/templates/api/v3/api_docs.html @@ -16,9 +16,9 @@

Docs

API

-
-

This is the current version of the DOAJ API

-

A new version of the API is due in July 2024, and you can preview it here. +

+

This is an old version of the DOAJ API

+

A new version of the API is available here. You should review the release notes and migration instructions as soon as possible.

{% include "api/includes/swagger_description.html" %} diff --git a/portality/templates/api/v4/api_docs.html b/portality/templates/api/v4/api_docs.html index 802a204bae..a90016d56d 100644 --- a/portality/templates/api/v4/api_docs.html +++ b/portality/templates/api/v4/api_docs.html @@ -16,20 +16,20 @@

Docs

API

-
-

This is a preview release of the v4 API. This will become the current version of the API - in July 2024. Please review the below timeline and migration notes, and upgrade your integrations +

+

This is the current version of the DOAJ API

+

Please review the below timeline and migration notes, and upgrade your integrations as soon as possible.

-

Documentation for the existing current API (v3) is available here

+

Documentation for the previous version of the API (v3) is available here

This new version of the API introduces significant performance improvements on the bulk article upload endpoint (/api/bulk/articles).

-

This change is not backwards compatible with the existing API version, so if you rely on bulk article uploads, you will need to upgrade your integrations to use the new version.

+

This change is not backwards compatible with the previous API version, so if you rely on bulk article uploads, you will need to upgrade your integrations to use the new version.

This upgrade affects only the /api/bulk/articles endpoint. If you do not use this feature, your API integrations will continue to work normally.

-

The bulk articles endpoint will change from a synchronous upload to an asynchronous one. In the new version, you will upload a batch of articles to be ingested, and the system will respond immediately with an "Accepted" response, and a link to a status endpoint which will track the import progress of your request. This has been done for several reasons:

+

The bulk articles endpoint has changed from a synchronous upload to an asynchronous one. In this new version, you upload a batch of articles to be ingested, and the system will respond immediately with an "Accepted" response, and a link to a status endpoint which will track the import progress of your request. This has been done for several reasons:

  • It is consistent with the manual bulk upload approach we have in the user interface
  • It allows us to manage the performance of the API better
  • @@ -38,16 +38,9 @@

    API

    Timeline

      -
    1. 18 June 2024 - v4 of the API will be released and available at the base URL /api/v4. It will not replace the existing current API version (v3). If you are reliant on the bulk article upload, you should: -
        -
      1. Try out the new bulk endpoint; upgrade your integrations immediately if feasible.
      2. -
      3. If upgrading your integrations in the short term is not feasible, you should fix your existing integration to point to /api/v3, which will continue to work in the medium term.
      4. -
      -
    2. +
    3. 18th July 2024 The v4 API became the "current" API version and is available at /api AND /api/v4. At this point, old integrations with the bulk article upload have ceased to work, and you must switch to using /api/v3 if you want to get it to work again. If you wish to continue using this feature long-term, you must upgrade your integrations.
    4. -
    5. Approximately 1 month after 18th June 2024 (exact date to be confirmed) The v4 API will become the "current" API version and will be available at /api AND /api/v4. At this point, old integrations with the bulk article upload will cease to work, and you must switch to using /api/v3 if you want to get it to work again. If you wish to continue using this feature long-term, you must upgrade your integrations.
    6. - -
    7. Approximately 6 months after 18th June 2024 (exact date to be confirmed) All previous API versions (v1, v2 and v3) will cease to support bulk article uploads, and if you wish to use this feature, you must use the v4 API. All other backwards-compatible API features in those previous versions of the API will continue to work as normal.
    8. +
    9. Early 2025 (exact date to be confirmed) All previous API versions (v1, v2 and v3) will cease to support bulk article uploads, and if you wish to use this feature, you must use the v4 API. All other backwards-compatible API features in those previous versions of the API will continue to work as normal.
    diff --git a/portality/templates/data/publisher-supporters.html b/portality/templates/data/publisher-supporters.html index 74e7aa8c38..fd6f08dd60 100644 --- a/portality/templates/data/publisher-supporters.html +++ b/portality/templates/data/publisher-supporters.html @@ -1,7 +1,9 @@
      - {% for i in data.publisher_supporters %} -
    • - {{ i.name }} -
    • - {% endfor %} + {% if data.publisher_supporters %} + {% for i in data.publisher_supporters %} +
    • + {{ i.name }} +
    • + {% endfor %} + {% endif %}
    \ No newline at end of file diff --git a/portality/templates/doaj/index.html b/portality/templates/doaj/index.html index 2b2fb8cd36..499450abfa 100644 --- a/portality/templates/doaj/index.html +++ b/portality/templates/doaj/index.html @@ -52,7 +52,7 @@

    DOAJ in numbers

    {% set source = search_query_source(term=[{"bibjson.apc.has_apc":False},{"bibjson.other_charges.has_other_charges":False}]) %} {{ statistics.no_apc }} - journals without fees + journals without APCs

    diff --git a/portality/upgrade.py b/portality/upgrade.py index 09ecc436be..26149b7ce4 100644 --- a/portality/upgrade.py +++ b/portality/upgrade.py @@ -2,10 +2,12 @@ ~~Migrations:Framework~~ # FIXME: this script requires more work if it's to be used for specified source and target clusters """ -import json, os, dictdiffer -from datetime import datetime, timedelta -from copy import deepcopy +import dictdiffer +import json +import os from collections import OrderedDict +from copy import deepcopy +from datetime import timedelta from typing import TypedDict, List, Dict from portality import models @@ -13,7 +15,7 @@ from portality.lib import plugin, dates from portality.lib.dataobj import DataStructureException from portality.lib.seamless import SeamlessException -from portality.dao import ScrollTimeoutException +from portality.models.datalog_journal_added import DatalogJournalAdded MODELS = { "journal": models.Journal, # ~~->Journal:Model~~ @@ -21,7 +23,8 @@ "suggestion": models.Suggestion, # ~~->Application:Model~~ "application": models.Application, "account": models.Account, # ~~->Account:Model~~ - "background_job": models.BackgroundJob # ~~->BackgroundJob:Model~~ + "background_job": models.BackgroundJob, # ~~->BackgroundJob:Model~~ + 'datalog_journal_added': DatalogJournalAdded, } @@ -34,7 +37,12 @@ def upgrade_article(self, article): class UpgradeType(TypedDict): type: str # name / key of the MODELS class action: str # default is update - query: dict # ES query to use to find the records to upgrade + + """ + ES query to use to find the records to upgrade + default is match_all if query is None + """ + query: dict keepalive: str # ES keepalive time for the scroll, default 1m scroll_size: int # ES scroll size, default 1000 diff --git a/portality/view/doaj.py b/portality/view/doaj.py index 711549e17f..ed083446c2 100644 --- a/portality/view/doaj.py +++ b/portality/view/doaj.py @@ -39,11 +39,17 @@ def login(): def cookie_consent(): cont = request.values.get("continue") if cont is not None: - resp = redirect(cont) + # Only permit relative path redirects, to prevent phishing by supplying a full URI to a different domain + parsed_redirect = urllib.parse.urlparse(cont) + if parsed_redirect.netloc != '': + abort(400) + else: + resp = redirect(cont) else: resp = make_response() # set a cookie that lasts for one year - resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), Messages.CONSENT_COOKIE_VALUE, max_age=31536000, samesite=None, secure=True) + resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), + Messages.CONSENT_COOKIE_VALUE, max_age=31536000, samesite=None, secure=True) return resp @@ -145,7 +151,7 @@ def search_post(): ############################################# -# FIXME: this should really live somewhere else more appropirate to who can access it +# FIXME: this should really live somewhere else more appropriate to who can access it @blueprint.route("/journal/readonly/", methods=["GET"]) @login_required @ssl_required @@ -286,6 +292,7 @@ def find_toc_journal_by_identifier(identifier): def is_issn_by_identifier(identifier): return len(identifier) == 9 + def find_correct_redirect_identifier(identifier, bibjson) -> str: """ return None if identifier is correct and no redirect is needed @@ -345,6 +352,7 @@ def toc(identifier=None): def toc_articles_legacy(identifier=None): return redirect(url_for('doaj.toc_articles', identifier=identifier, volume=1, issue=1), 301) + @blueprint.route("/toc//articles") def toc_articles(identifier=None): journal = find_toc_journal_by_identifier(identifier) @@ -356,7 +364,6 @@ def toc_articles(identifier=None): return render_template('doaj/toc_articles.html', journal=journal, bibjson=bibjson ) - #~~->Article:Page~~ @blueprint.route("/article/") def article_page(identifier=None): diff --git a/setup.py b/setup.py index 48c059b4b5..ff411fe26d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='doaj', - version='6.7.3', + version='6.8.1', packages=find_packages(), install_requires=[ "awscli==1.20.50",