From 90ea70142043c4b254b863eb592514145cf79bc3 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 16 Dec 2020 14:23:44 -0700 Subject: [PATCH 01/14] Use str.partition to strip footer - Add footer field to default INI config - ReadmeClass: define curation_dict --- ldcoolp/config/default.ini | 2 ++ .../curation/inspection/readme/__init__.py | 26 +++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/ldcoolp/config/default.ini b/ldcoolp/config/default.ini index f4a924fc..2b79bd57 100644 --- a/ldcoolp/config/default.ini +++ b/ldcoolp/config/default.ini @@ -48,6 +48,8 @@ readme_template = README_template.md log_parent_dir = %(parent_dir)s log_dir = logs +# Footer to strip +footer =





For inquiries regarding # Qualtrics configuration [qualtrics] diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index e734b326..c5c3d664 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -95,25 +95,26 @@ def __init__(self, dn, config_dict=config_default_dict, update=False, else: self.log = log - curation_dict = self.config_dict['curation'] - self.root_directory_main = curation_dict[curation_dict['parent_dir']] + self.curation_dict = self.config_dict['curation'] + self.root_directory_main = self.curation_dict[self.curation_dict['parent_dir']] if not update: # Use 1.ToDo - self.root_directory = join(self.root_directory_main, curation_dict['folder_todo']) + self.root_directory = join(self.root_directory_main, + self.curation_dict['folder_todo']) else: # Use 2.UnderReview. Need to use admin.move module to find current path - mc = move.MoveClass(curation_dict=curation_dict) + mc = move.MoveClass(curation_dict=self.curation_dict) current_stage = mc.get_source_stage(self.folderName) self.root_directory = join(self.root_directory_main, current_stage) # Paths self.folder_path = join(self.root_directory, self.folderName) - self.data_path = join(self.folder_path, curation_dict['folder_copy_data']) # DATA + self.data_path = join(self.folder_path, self.curation_dict['folder_copy_data']) # DATA self.original_data_path = join(self.folder_path, - curation_dict['folder_data']) # ORIGINAL_DATA + self.curation_dict['folder_data']) # ORIGINAL_DATA # README template - self.readme_template = curation_dict['readme_template'] + self.readme_template = self.curation_dict['readme_template'] # This is the full path of the final README.txt file for creation self.readme_file_path = join(self.data_path, 'README.txt') @@ -253,7 +254,16 @@ def retrieve_article_metadata(self): # Retrieve description (single string), strip vertical white space description = self.article_dict['item']['description'].replace('

', '') description = description.replace('
', '') - readme_dict['description'] = html2text(description) + + # Strip ReDATA footer + if self.curation_dict['footer'] in description: + self.log.info("Stripping footer") + strip_text = description.partition(self.curation_dict['footer']) + readme_dict['description'] = html2text(strip_text[0]) + else: + self.log.info("No footer to strip") + readme_dict['description'] = html2text(description) + # Strip extra white space from html2text if readme_dict['description'][-2:] == "\n\n": readme_dict['description'] = readme_dict['description'][:-2] From 312a864cd062eabeb4ee4b115bfc3e9b42f7997d Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 4 May 2021 15:21:30 -0700 Subject: [PATCH 02/14] Define curation_dict instance vars [ci skip] --- ldcoolp/curation/inspection/readme/__init__.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index 5d748d09..f783b115 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -106,8 +106,8 @@ def __init__(self, dn, config_dict=config_default_dict, update=False, self.q = Qualtrics(qualtrics_dict=self.config_dict['qualtrics'], log=self.log) - curation_dict = self.config_dict['curation'] - self.root_directory_main = curation_dict[curation_dict['parent_dir']] + self.curation_dict = self.config_dict['curation'] + self.root_directory_main = self.curation_dict[self.curation_dict['parent_dir']] if not update: # Use 1.ToDo @@ -121,16 +121,18 @@ def __init__(self, dn, config_dict=config_default_dict, update=False, # Paths self.folder_path = join(self.root_directory, self.folderName) - self.metadata_path = join(self.folder_path, curation_dict['folder_metadata']) # METADATA - self.data_path = join(self.folder_path, curation_dict['folder_copy_data']) # DATA + self.metadata_path = join(self.folder_path, + self.curation_dict['folder_metadata']) # METADATA + self.data_path = join(self.folder_path, + self.curation_dict['folder_copy_data']) # DATA self.original_data_path = join(self.folder_path, - curation_dict['folder_data']) # ORIGINAL_DATA + self.curation_dict['folder_data']) # ORIGINAL_DATA # This is the full path of the final README.txt file for creation self.readme_file_path = join(self.data_path, 'README.txt') # Symlink template name in METADATA - self.default_readme_file = curation_dict['readme_template'] + self.default_readme_file = self.curation_dict['readme_template'] # Retrieve Figshare metadata for jinja template engine self.figshare_readme_dict = self.retrieve_article_metadata() From fc159b09a46699c1e5c711190fc6fdd0e6743b59 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 4 May 2021 15:42:24 -0700 Subject: [PATCH 03/14] Footer stripping via markdown instead of markup - Use horizontal line indicator - Use str.endswith() and string indexing to strip --- ldcoolp/config/default.ini | 2 +- .../curation/inspection/readme/__init__.py | 24 ++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ldcoolp/config/default.ini b/ldcoolp/config/default.ini index 3f125080..c635a7c2 100644 --- a/ldcoolp/config/default.ini +++ b/ldcoolp/config/default.ini @@ -50,7 +50,7 @@ log_parent_dir = %(parent_dir)s log_dir = logs # Footer to strip -footer =





For inquiries regarding +footer = * * * # Qualtrics configuration [qualtrics] diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index f783b115..ecf3b4ca 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -315,21 +315,27 @@ def retrieve_article_metadata(self): self.article_dict['item']['authors'][0]['full_name'] # Retrieve description (single string), strip vertical white space - description = self.article_dict['item']['description'].replace('

', '') - description = description.replace('
', '') + description = html2text(self.article_dict['item']['description']) + # Don't think we need this + # description = self.article_dict['item']['description'].replace('
', '') + # description = html2text(description.replace('
', '')) # Strip ReDATA footer if self.curation_dict['footer'] in description: self.log.info("Stripping footer") - strip_text = description.partition(self.curation_dict['footer']) - readme_dict['description'] = html2text(strip_text[0]) + + strip_text = description.partition(self.curation_dict['footer'])[0] + if not strip_text.endswith("\n\n"): + self.log.info("No carriage returns") + while strip_text.endswith(" \n\n"): + strip_text = strip_text[:-4] + while strip_text.endswith("\n\n"): + strip_text = strip_text[:-2] + + readme_dict['description'] = strip_text else: self.log.info("No footer to strip") - readme_dict['description'] = html2text(description) - - # Strip extra white space from html2text - if readme_dict['description'][-2:] == "\n\n": - readme_dict['description'] = readme_dict['description'][:-2] + readme_dict['description'] = description # Retrieve references as list readme_dict['references'] = self.article_dict['item']['references'] From 43d125e36a096aef073877e8620bd40b09da4b20 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 08:47:29 -0700 Subject: [PATCH 04/14] Add footer_check script [ci skip] --- ldcoolp/scripts/testing/footer_check | 169 +++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100755 ldcoolp/scripts/testing/footer_check diff --git a/ldcoolp/scripts/testing/footer_check b/ldcoolp/scripts/testing/footer_check new file mode 100755 index 00000000..9d5bbd36 --- /dev/null +++ b/ldcoolp/scripts/testing/footer_check @@ -0,0 +1,169 @@ +#!/usr/bin/env python +import shutil +from os.path import dirname, exists, join +from os import rename, makedirs + +import argparse +import configparser + +from datetime import date + +from requests import HTTPError + +from ldcoolp.curation.api import figshare +from redata.commons import logger +from figshare.figshare import Figshare + +from ldcoolp.curation.depositor_name import DepositorName +from ldcoolp.curation.inspection.readme import ReadmeClass + +# Version and branch info +from ldcoolp import __version__, CODE_NAME +from redata.commons.git_info import GitInfo +from ldcoolp import __file__ as library_path + + +# Config loader +from ldcoolp.config import dict_load + +library_root_path = dirname(dirname(library_path)) # Retrieve parent directory to ldcoolp + +today = date.today() + + +if __name__ == '__main__': + # Parse command-line arguments + parser = argparse.ArgumentParser(description='Command-line driver for checking footer.') + parser.add_argument('--config', required=True, help='path to configuration file') + args = parser.parse_args() + + if not exists(args.config): + raise FileNotFoundError(f"WARNING!!! Config file not found: {args.config}") + + gi = GitInfo(library_root_path) + + banner_message = f""" + This is the command-line tool that automates data retrieval, and curatorial review set-up. It: + 1. Perform folder management for each deposit + 2. Retrieves the dataset + 3. Download a copy of the curatorial review process + 4. Attempt to retrieve the Qualtrics deposit agreement or provide you with the link + + {CODE_NAME} branch: {gi.branch} + {CODE_NAME} version: {__version__} + {CODE_NAME} commit hash: {gi.short_commit} + Created by Chun Ly + Issues? Submit a GitHub ticket: https://github.com/ualibraries/LD_Cool_P/issues/new + """ + print(banner_message) + + # Load configuration + try: + config_dict = dict_load(args.config) + except configparser.ParsingError: + exit() + + curation_dict = config_dict['curation'] + + # Define logfile + root_directory_main = curation_dict[curation_dict['log_parent_dir']] + + log_dir = join(root_directory_main, curation_dict['log_dir']) + if not exists(log_dir): + makedirs(log_dir) + logfile_prefix = 'footer_check' + log = logger.log_setup(log_dir, logfile_prefix) + + fs_dict = config_dict['figshare'] + fs_admin = figshare.FigshareInstituteAdmin(figshare_dict=fs_dict) + + fs = Figshare() + + log.info("Retrieving list now ...") + + lc = logger.LogCommons(log, logfile_prefix, gi, + code_name=CODE_NAME, version=__version__) + + lc.script_start() + + # Retrieve username, hostname, IP + lc.script_sys_info() + + # Configuration information + log.info(f"Config file: {args.config}") + + curation_df = fs_admin.get_curation_list() + + log.info("Truncating to approved list ...") + published_curation_df = curation_df.loc[(curation_df['status'] == 'approved') & + (curation_df['group_id'] != 28925)] + published_curation_articles = published_curation_df['article_id'].tolist() + published_curation_versions = published_curation_df['version'].tolist() + published_curation_id = published_curation_df['id'].tolist() + + num_articles = len(published_curation_articles) + + log.info(f"Number of Deposits: {num_articles}") + + footer_dir = curation_dict[curation_dict['source']+'_path'] + if not exists(footer_dir): + log.info(f"Creating: {footer_dir}") + makedirs(footer_dir) + + count = 0 + for article, version, curation_id in zip(published_curation_articles, + published_curation_versions, + published_curation_id): + if count < 3: + print(f"Getting: {article}, v{version}") + + try: + dn = DepositorName(article, fs_admin, curation_id=curation_id, + log=log) + working_path = join(footer_dir, "1.ToDo", dn.folderName) + for folder in ['DATA', 'METADATA']: + dir0 = join(working_path, folder) + if not exists(dir0): + makedirs(dir0) + + rc = ReadmeClass(dn, config_dict=config_dict) + rc.main() + rename(rc.readme_file_path, + join(footer_dir, f"README_{article}_v{version}.txt")) + + for folder in ['DATA', 'METADATA']: + shutil.rmtree(join(working_path, folder)) + + count += 1 + except HTTPError: + log.warning("RECORDED UNAVAILABLE") + + # Change permission to mode=666 (rw for all) + lc.log_permission() + + lc.script_end() + +''' +# This is for earlier testing to check partitioning +try: + details_dict = fs.get_article_details(article, version=version) + description = html2text(details_dict['description']) + if curation_dict['footer'] in description: + footer_check.append(True) + strip_text = description.partition(curation_dict['footer'])[0] + print(description.partition(curation_dict['footer'])) + + if not strip_text.endswith(" \n\n"): + print("WARNING: no ending") + while strip_text.endswith(" \n\n"): + strip_text = strip_text[:-4] + while strip_text.endswith("\n\n"): + strip_text = strip_text[:-2] + else: + print("no footer") + footer_check.append(False) + print(html2text(description)) + print(details_dict['title']) +except HTTPError: + footer_check.append('N/A') +''' \ No newline at end of file From 21ec56e85bcbf5e78337c248bdcc3d016363ac84 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 08:48:00 -0700 Subject: [PATCH 05/14] Include __init__.py for scripts folder [ci skip] --- ldcoolp/scripts/__init__.py | 0 ldcoolp/scripts/testing/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 ldcoolp/scripts/__init__.py create mode 100644 ldcoolp/scripts/testing/__init__.py diff --git a/ldcoolp/scripts/__init__.py b/ldcoolp/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ldcoolp/scripts/testing/__init__.py b/ldcoolp/scripts/testing/__init__.py new file mode 100644 index 00000000..e69de29b From d78d44e5b38219af4ca52ab0690a73ad881037cc Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 08:58:45 -0700 Subject: [PATCH 06/14] ReadmeClass: Add interactive option --- .../curation/inspection/readme/__init__.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index ecf3b4ca..b710fa42 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -86,8 +86,9 @@ class ReadmeClass: """ def __init__(self, dn, config_dict=config_default_dict, update=False, - q: Qualtrics = None, log=None): + q: Qualtrics = None, interactive=True, log=None): self.config_dict = config_dict + self.interactive = interactive self.dn = dn self.folderName = self.dn.folderName @@ -180,8 +181,12 @@ def check_for_readme(self): self.log.info("Only one README file found!") self.log.info("PROMPT: Type 'Yes'/'yes' if you wish to use as template.") - src_input = input("PROMPT: Anything else will use 'default' : ") - self.log.info(f"RESPONSE: {src_input}") + if self.interactive: + src_input = input("PROMPT: Anything else will use 'default' : ") + self.log.info(f"RESPONSE: {src_input}") + else: + self.log.info("Interactive mode disabled. Using default") + src_input = '' if src_input.lower() == 'yes': template_source = 'user' @@ -411,17 +416,22 @@ def main(self): self.log.info("") self.log.info("** STARTING README.txt CONSTRUCTION **") - if self.template_source != 'unknown': - self.log.info("PROMPT: Do you wish to create a README file?") - user_response = input("PROMPT: Type 'Yes'/'yes'. Anything else will exit : ") - self.log.info(f"RESPONSE: {user_response}") - if user_response.lower() == "yes": - self.construct() + if self.interactive: + if self.template_source != 'unknown': + self.log.info("PROMPT: Do you wish to create a README file?") + user_response = input("PROMPT: Type 'Yes'/'yes'. Anything else will exit : ") + self.log.info(f"RESPONSE: {user_response}") else: - self.log.warn("Exiting script") - return + self.log.warn(f"Multiple README files. Unable to save {self.readme_template} and README.txt") + else: + self.log.info("Interactive mode disabled. Always creating README.txt") + user_response = 'yes' + + if user_response.lower() == "yes": + self.construct() else: - self.log.warn(f"Multiple README files. Unable to save {self.readme_template} and README.txt") + self.log.warn("Exiting script") + return def walkthrough(data_path, ignore='', log=None): From 3552389527a5e402498fff4a190dc60a980f4c91 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 09:15:23 -0700 Subject: [PATCH 07/14] Qualtrics: Add interactive option #203 [ci skip] - Update ReadmeClass with Qualtrics call --- ldcoolp/curation/api/qualtrics.py | 34 ++++++++++++++----- .../curation/inspection/readme/__init__.py | 2 +- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/ldcoolp/curation/api/qualtrics.py b/ldcoolp/curation/api/qualtrics.py index 6896233e..f0c8b364 100644 --- a/ldcoolp/curation/api/qualtrics.py +++ b/ldcoolp/curation/api/qualtrics.py @@ -110,7 +110,11 @@ class Qualtrics: Generate URL with customized query strings based on Figshare metadata """ - def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None): + def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None, + interactive=True): + + self.interactive = interactive + self.dict = qualtrics_dict self.token = self.dict['token'] self.data_center = self.dict['datacenter'] @@ -324,11 +328,16 @@ def retrieve_deposit_agreement(self, dn_dict=None, ResponseId=None, out_path='', except ValueError: self.log.warn("Error with retrieving ResponseId and SurveyId") self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.") - ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") - self.log.info(f"RESPONSE: {ResponseId}") - self.log.info("PROMPT: If you wish, you can manually enter SurveyId to retrieve.") - SurveyId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") - self.log.info(f"RESPONSE: {SurveyId}") + if self.interactive: + ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") + self.log.info(f"RESPONSE: {ResponseId}") + self.log.info("PROMPT: If you wish, you can manually enter SurveyId to retrieve.") + SurveyId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") + self.log.info(f"RESPONSE: {SurveyId}") + else: + self.log.info("Interactive mode disabled. Skipping manual input") + ResponseId = '' + SurveyId = '' if ResponseId == '' or SurveyId == '': custom_url = self.generate_url(dn_dict) @@ -356,7 +365,10 @@ def retrieve_deposit_agreement(self, dn_dict=None, ResponseId=None, out_path='', # Retrieve PDF via direct URL link if out_path: - pdf_url = 'retrieve' + if self.interactive: + pdf_url = 'retrieve' + else: + pdf_url = '' while pdf_url == 'retrieve': pdf_url = input("To retrieve PDF via API, provide PDF URL here. Hit enter to skip : ") @@ -539,8 +551,12 @@ def retrieve_qualtrics_readme(self, dn=None, ResponseId='', browser=True): except ValueError: self.log.warn("Error with retrieving ResponseId") self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.") - ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") - self.log.info(f"RESPONSE: {ResponseId}") + if self.interactive: + ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") + self.log.info(f"RESPONSE: {ResponseId}") + else: + self.log.info("Interactive mode disabled. Skipping manual input") + ResponseId = '' if ResponseId: response_df = self.get_survey_response(self.readme_survey_id, ResponseId) diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index b710fa42..7c245723 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -105,7 +105,7 @@ def __init__(self, dn, config_dict=config_default_dict, update=False, self.q = q else: self.q = Qualtrics(qualtrics_dict=self.config_dict['qualtrics'], - log=self.log) + interactive=interactive, log=self.log) self.curation_dict = self.config_dict['curation'] self.root_directory_main = self.curation_dict[self.curation_dict['parent_dir']] From f9528233a54fc31e1c1ea4cfef51e2ef2819e88f Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 09:17:32 -0700 Subject: [PATCH 08/14] Update footer_check script for interactive call #203 [ci skip] --- ldcoolp/scripts/testing/footer_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldcoolp/scripts/testing/footer_check b/ldcoolp/scripts/testing/footer_check index 9d5bbd36..06f3d367 100755 --- a/ldcoolp/scripts/testing/footer_check +++ b/ldcoolp/scripts/testing/footer_check @@ -126,7 +126,7 @@ if __name__ == '__main__': if not exists(dir0): makedirs(dir0) - rc = ReadmeClass(dn, config_dict=config_dict) + rc = ReadmeClass(dn, config_dict=config_dict, interactive=False) rc.main() rename(rc.readme_file_path, join(footer_dir, f"README_{article}_v{version}.txt")) From 57debbd953276e04031e9e2df7411ab28cea6e96 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 09:35:54 -0700 Subject: [PATCH 09/14] footer_check: Remove count limit [ci skip] --- ldcoolp/scripts/testing/footer_check | 44 +++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/ldcoolp/scripts/testing/footer_check b/ldcoolp/scripts/testing/footer_check index 06f3d367..e6900564 100755 --- a/ldcoolp/scripts/testing/footer_check +++ b/ldcoolp/scripts/testing/footer_check @@ -114,29 +114,27 @@ if __name__ == '__main__': for article, version, curation_id in zip(published_curation_articles, published_curation_versions, published_curation_id): - if count < 3: - print(f"Getting: {article}, v{version}") - - try: - dn = DepositorName(article, fs_admin, curation_id=curation_id, - log=log) - working_path = join(footer_dir, "1.ToDo", dn.folderName) - for folder in ['DATA', 'METADATA']: - dir0 = join(working_path, folder) - if not exists(dir0): - makedirs(dir0) - - rc = ReadmeClass(dn, config_dict=config_dict, interactive=False) - rc.main() - rename(rc.readme_file_path, - join(footer_dir, f"README_{article}_v{version}.txt")) - - for folder in ['DATA', 'METADATA']: - shutil.rmtree(join(working_path, folder)) - - count += 1 - except HTTPError: - log.warning("RECORDED UNAVAILABLE") + print(f"Getting: {article}, v{version}") + + try: + dn = DepositorName(article, fs_admin, curation_id=curation_id, + log=log) + working_path = join(footer_dir, "1.ToDo", dn.folderName) + for folder in ['DATA', 'METADATA']: + dir0 = join(working_path, folder) + if not exists(dir0): + makedirs(dir0) + + rc = ReadmeClass(dn, config_dict=config_dict, interactive=False) + rc.main() + rename(rc.readme_file_path, + join(footer_dir, f"README_{article}_v{version}.txt")) + + shutil.rmtree(working_path) + + count += 1 + except HTTPError: + log.warning("RECORDED UNAVAILABLE") # Change permission to mode=666 (rw for all) lc.log_permission() From c9e8a24fd5753c0215fbf5facf2932384d580df8 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 10:03:52 -0700 Subject: [PATCH 10/14] footer_check: Only run if file does not exist [ci skip] --- ldcoolp/scripts/testing/footer_check | 52 ++++++++++++++++------------ 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/ldcoolp/scripts/testing/footer_check b/ldcoolp/scripts/testing/footer_check index e6900564..7d94eae6 100755 --- a/ldcoolp/scripts/testing/footer_check +++ b/ldcoolp/scripts/testing/footer_check @@ -1,6 +1,6 @@ #!/usr/bin/env python import shutil -from os.path import dirname, exists, join +from os.path import dirname, exists, join, basename from os import rename, makedirs import argparse @@ -114,27 +114,35 @@ if __name__ == '__main__': for article, version, curation_id in zip(published_curation_articles, published_curation_versions, published_curation_id): - print(f"Getting: {article}, v{version}") - - try: - dn = DepositorName(article, fs_admin, curation_id=curation_id, - log=log) - working_path = join(footer_dir, "1.ToDo", dn.folderName) - for folder in ['DATA', 'METADATA']: - dir0 = join(working_path, folder) - if not exists(dir0): - makedirs(dir0) - - rc = ReadmeClass(dn, config_dict=config_dict, interactive=False) - rc.main() - rename(rc.readme_file_path, - join(footer_dir, f"README_{article}_v{version}.txt")) - - shutil.rmtree(working_path) - - count += 1 - except HTTPError: - log.warning("RECORDED UNAVAILABLE") + + output_readme_file = join(footer_dir, f"README_{article}_v{version}.txt") + if exists(output_readme_file): + log.info(f"README file found. Skipping! {basename(output_readme_file)}") + else: + print(f"Getting: {article}, v{version}") + + try: + dn = DepositorName(article, fs_admin, curation_id=curation_id, + log=log) + working_path = join(footer_dir, "1.ToDo", dn.folderName) + for folder in ['DATA', 'METADATA']: + dir0 = join(working_path, folder) + if not exists(dir0): + makedirs(dir0) + + rc = ReadmeClass(dn, config_dict=config_dict, interactive=False) + rc.main() + rename(rc.readme_file_path, output_readme_file) + + shutil.rmtree(working_path) + + count += 1 + except HTTPError: + log.warning("RECORDED UNAVAILABLE") + pass + except ValueError: + log.warning("NO DEPOSIT AGREEMENT") + pass # Change permission to mode=666 (rw for all) lc.log_permission() From 05f5803fac876c7614b7147e514bf2542449bf66 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 10:12:01 -0700 Subject: [PATCH 11/14] ReadmeClass: Strip additional \n at end [ci skip] --- ldcoolp/curation/inspection/readme/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py index 7c245723..0b4bb4f6 100644 --- a/ldcoolp/curation/inspection/readme/__init__.py +++ b/ldcoolp/curation/inspection/readme/__init__.py @@ -336,6 +336,8 @@ def retrieve_article_metadata(self): strip_text = strip_text[:-4] while strip_text.endswith("\n\n"): strip_text = strip_text[:-2] + while strip_text.endswith("\n"): + strip_text = strip_text[:-1] readme_dict['description'] = strip_text else: From eb8cad14c5cdc85b61fecfc8ba8d63642d580752 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 10:17:40 -0700 Subject: [PATCH 12/14] Update README.md changelog [ci skip] --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c162cbe..a5c6c6a3 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ Currently, there are two GitHub Action workflows: A list of released features and their issue number(s). List is sorted from moderate to minor revisions for reach release. -v1.0.0 - v1.0.3: +v1.0.0 - v1.0.4: * Feature: Handle multiple Qualtrics Deposit Agreement survey, including conference-style submissions (e.g., Space Grant, WCCFL) #137, #193, #194 @@ -256,6 +256,7 @@ v1.0.0 - v1.0.3: * Enhancement: Simple script for Qualtrics link generation for WCCFL conference #171 * Enhancement: Ability to use different README_template.md #195 * Feature: Retrieve corresponding author from Qualtrics Deposit Agreement for jinja templating #138 + * Feature: Strip Figshare Description footer for README.txt #118 **Note**: Backward incompatibility with config file due to #137 From 2a899779cc403e289431c362199c80a1135e83cc Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 10:18:52 -0700 Subject: [PATCH 13/14] Bump version: v1.0.3 -> v1.0.4 --- ldcoolp/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ldcoolp/__init__.py b/ldcoolp/__init__.py index 83efcaef..efcd35d1 100644 --- a/ldcoolp/__init__.py +++ b/ldcoolp/__init__.py @@ -1,6 +1,6 @@ from os import path -__version__ = "1.0.3" +__version__ = "1.0.4" CODE_NAME = "LD-Cool-P" diff --git a/setup.py b/setup.py index c061d9ff..5b4c44f7 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name='ldcoolp', - version='v1.0.3', + version='v1.0.4', packages=['ldcoolp'], url='https://github.com/ualibraries/LD_Cool_P', license='MIT License', From 5f27080545f7f7a3af33a6280c91ef335af55659 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 5 May 2021 10:20:08 -0700 Subject: [PATCH 14/14] Update README.md for current version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5c6c6a3..6e11829c 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ You can confirm installation via `conda list` (curation) $ conda list ldcoolp ``` -You should see that the version is `1.0.3`. +You should see that the version is `1.0.4`. ### Configuration Settings