Merge pull request #141 from ualibraries/feature/118_footer_strip

feature/118_footer_strip
UAL-RE · May 5, 2021 · a99404c · a99404c
2 parents d93a2e1 + 5f27080
commit a99404c
Show file tree

Hide file tree

Showing 9 changed files with 265 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -116,7 +116,7 @@ You can confirm installation via `conda list`
 (curation) $ conda list ldcoolp
 ```
 
-You should see that the version is `1.0.3`.
+You should see that the version is `1.0.4`.
 
 ### Configuration Settings
 
@@ -240,7 +240,7 @@ Currently, there are two GitHub Action workflows:
 A list of released features and their issue number(s).
 List is sorted from moderate to minor revisions for reach release.
 
-v1.0.0 - v1.0.3:
+v1.0.0 - v1.0.4:
  * Feature: Handle multiple Qualtrics Deposit Agreement survey,
    including conference-style submissions (e.g., Space Grant, WCCFL)
    #137, #193, #194
@@ -256,6 +256,7 @@ v1.0.0 - v1.0.3:
  * Enhancement: Simple script for Qualtrics link generation for WCCFL conference #171
  * Enhancement: Ability to use different README_template.md #195
  * Feature: Retrieve corresponding author from Qualtrics Deposit Agreement for jinja templating #138
+ * Feature: Strip Figshare Description footer for README.txt #118
 
 **Note**: Backward incompatibility with config file due to #137
 

diff --git a/ldcoolp/__init__.py b/ldcoolp/__init__.py
@@ -1,6 +1,6 @@
 from os import path
 
-__version__ = "1.0.3"
+__version__ = "1.0.4"
 
 CODE_NAME = "LD-Cool-P"
 

diff --git a/ldcoolp/config/default.ini b/ldcoolp/config/default.ini
@@ -49,6 +49,8 @@ readme_template = README_template.md
 log_parent_dir = %(parent_dir)s
 log_dir = logs
 
+# Footer to strip
+footer = * * *
 
 # Qualtrics configuration
 [qualtrics]

diff --git a/ldcoolp/curation/api/qualtrics.py b/ldcoolp/curation/api/qualtrics.py
@@ -110,7 +110,11 @@ class Qualtrics:
       Generate URL with customized query strings based on Figshare metadata
     """
 
-    def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None):
+    def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None,
+                 interactive=True):
+
+        self.interactive = interactive
+
         self.dict = qualtrics_dict
         self.token = self.dict['token']
         self.data_center = self.dict['datacenter']
@@ -324,11 +328,16 @@ def retrieve_deposit_agreement(self, dn_dict=None, ResponseId=None, out_path='',
             except ValueError:
                 self.log.warn("Error with retrieving ResponseId and SurveyId")
                 self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.")
-                ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
-                self.log.info(f"RESPONSE: {ResponseId}")
-                self.log.info("PROMPT: If you wish, you can manually enter SurveyId to retrieve.")
-                SurveyId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
-                self.log.info(f"RESPONSE: {SurveyId}")
+                if self.interactive:
+                    ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
+                    self.log.info(f"RESPONSE: {ResponseId}")
+                    self.log.info("PROMPT: If you wish, you can manually enter SurveyId to retrieve.")
+                    SurveyId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
+                    self.log.info(f"RESPONSE: {SurveyId}")
+                else:
+                    self.log.info("Interactive mode disabled. Skipping manual input")
+                    ResponseId = ''
+                    SurveyId = ''
 
                 if ResponseId == '' or SurveyId == '':
                     custom_url = self.generate_url(dn_dict)
@@ -356,7 +365,10 @@ def retrieve_deposit_agreement(self, dn_dict=None, ResponseId=None, out_path='',
 
             # Retrieve PDF via direct URL link
             if out_path:
-                pdf_url = 'retrieve'
+                if self.interactive:
+                    pdf_url = 'retrieve'
+                else:
+                    pdf_url = ''
                 while pdf_url == 'retrieve':
                     pdf_url = input("To retrieve PDF via API, provide PDF URL here. Hit enter to skip : ")
 
@@ -539,8 +551,12 @@ def retrieve_qualtrics_readme(self, dn=None, ResponseId='', browser=True):
             except ValueError:
                 self.log.warn("Error with retrieving ResponseId")
                 self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.")
-                ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
-                self.log.info(f"RESPONSE: {ResponseId}")
+                if self.interactive:
+                    ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
+                    self.log.info(f"RESPONSE: {ResponseId}")
+                else:
+                    self.log.info("Interactive mode disabled. Skipping manual input")
+                    ResponseId = ''
 
                 if ResponseId:
                     response_df = self.get_survey_response(self.readme_survey_id, ResponseId)

diff --git a/ldcoolp/curation/inspection/readme/__init__.py b/ldcoolp/curation/inspection/readme/__init__.py
@@ -86,8 +86,9 @@ class ReadmeClass:
     """
 
     def __init__(self, dn, config_dict=config_default_dict, update=False,
-                 q: Qualtrics = None, log=None):
+                 q: Qualtrics = None, interactive=True, log=None):
         self.config_dict = config_dict
+        self.interactive = interactive
 
         self.dn = dn
         self.folderName = self.dn.folderName
@@ -104,31 +105,35 @@ def __init__(self, dn, config_dict=config_default_dict, update=False,
             self.q = q
         else:
             self.q = Qualtrics(qualtrics_dict=self.config_dict['qualtrics'],
-                               log=self.log)
+                               interactive=interactive, log=self.log)
+
+        self.curation_dict = self.config_dict['curation']
+        self.root_directory_main = self.curation_dict[self.curation_dict['parent_dir']]
 
-        curation_dict = self.config_dict['curation']
-        self.root_directory_main = curation_dict[curation_dict['parent_dir']]
         if not update:
             # Use 1.ToDo
-            self.root_directory = join(self.root_directory_main, curation_dict['folder_todo'])
+            self.root_directory = join(self.root_directory_main,
+                                       self.curation_dict['folder_todo'])
         else:
             # Use 2.UnderReview. Need to use admin.move module to find current path
-            mc = move.MoveClass(curation_dict=curation_dict)
+            mc = move.MoveClass(curation_dict=self.curation_dict)
             current_stage = mc.get_source_stage(self.folderName)
             self.root_directory = join(self.root_directory_main, current_stage)
 
         # Paths
         self.folder_path = join(self.root_directory, self.folderName)
-        self.metadata_path = join(self.folder_path, curation_dict['folder_metadata'])  # METADATA
-        self.data_path = join(self.folder_path, curation_dict['folder_copy_data'])  # DATA
+        self.metadata_path = join(self.folder_path,
+                                  self.curation_dict['folder_metadata'])  # METADATA
+        self.data_path = join(self.folder_path,
+                              self.curation_dict['folder_copy_data'])  # DATA
         self.original_data_path = join(self.folder_path,
-                                       curation_dict['folder_data'])  # ORIGINAL_DATA
+                                       self.curation_dict['folder_data'])  # ORIGINAL_DATA
 
         # This is the full path of the final README.txt file for creation
         self.readme_file_path = join(self.data_path, 'README.txt')
 
         # Symlink template name in METADATA
-        self.default_readme_file = curation_dict['readme_template']
+        self.default_readme_file = self.curation_dict['readme_template']
 
         # Retrieve Figshare metadata for jinja template engine
         self.figshare_readme_dict = self.retrieve_article_metadata()
@@ -176,8 +181,12 @@ def check_for_readme(self):
                 self.log.info("Only one README file found!")
 
                 self.log.info("PROMPT: Type 'Yes'/'yes' if you wish to use as template.")
-                src_input = input("PROMPT: Anything else will use 'default' : ")
-                self.log.info(f"RESPONSE: {src_input}")
+                if self.interactive:
+                    src_input = input("PROMPT: Anything else will use 'default' : ")
+                    self.log.info(f"RESPONSE: {src_input}")
+                else:
+                    self.log.info("Interactive mode disabled. Using default")
+                    src_input = ''
 
                 if src_input.lower() == 'yes':
                     template_source = 'user'
@@ -311,12 +320,29 @@ def retrieve_article_metadata(self):
             self.article_dict['item']['authors'][0]['full_name']
 
         # Retrieve description (single string), strip vertical white space
-        description = self.article_dict['item']['description'].replace('<div>', '')
-        description = description.replace('</div>', '')
-        readme_dict['description'] = html2text(description)
-        # Strip extra white space from html2text
-        if readme_dict['description'][-2:] == "\n\n":
-            readme_dict['description'] = readme_dict['description'][:-2]
+        description = html2text(self.article_dict['item']['description'])
+        # Don't think we need this
+        # description = self.article_dict['item']['description'].replace('<div>', '')
+        # description = html2text(description.replace('</div>', ''))
+
+        # Strip ReDATA footer
+        if self.curation_dict['footer'] in description:
+            self.log.info("Stripping footer")
+
+            strip_text = description.partition(self.curation_dict['footer'])[0]
+            if not strip_text.endswith("\n\n"):
+                self.log.info("No carriage returns")
+            while strip_text.endswith("  \n\n"):
+                strip_text = strip_text[:-4]
+            while strip_text.endswith("\n\n"):
+                strip_text = strip_text[:-2]
+            while strip_text.endswith("\n"):
+                strip_text = strip_text[:-1]
+
+            readme_dict['description'] = strip_text
+        else:
+            self.log.info("No footer to strip")
+            readme_dict['description'] = description
 
         # Retrieve references as list
         readme_dict['references'] = self.article_dict['item']['references']
@@ -392,17 +418,22 @@ def main(self):
         self.log.info("")
         self.log.info("** STARTING README.txt CONSTRUCTION **")
 
-        if self.template_source != 'unknown':
-            self.log.info("PROMPT: Do you wish to create a README file?")
-            user_response = input("PROMPT: Type 'Yes'/'yes'. Anything else will exit : ")
-            self.log.info(f"RESPONSE: {user_response}")
-            if user_response.lower() == "yes":
-                self.construct()
+        if self.interactive:
+            if self.template_source != 'unknown':
+                self.log.info("PROMPT: Do you wish to create a README file?")
+                user_response = input("PROMPT: Type 'Yes'/'yes'. Anything else will exit : ")
+                self.log.info(f"RESPONSE: {user_response}")
             else:
-                self.log.warn("Exiting script")
-                return
+                self.log.warn(f"Multiple README files. Unable to save {self.readme_template} and README.txt")
+        else:
+            self.log.info("Interactive mode disabled. Always creating README.txt")
+            user_response = 'yes'
+
+        if user_response.lower() == "yes":
+            self.construct()
         else:
-            self.log.warn(f"Multiple README files. Unable to save {self.readme_template} and README.txt")
+            self.log.warn("Exiting script")
+            return
 
 
 def walkthrough(data_path, ignore='', log=None):

diff --git a/ldcoolp/scripts/__init__.py b/ldcoolp/scripts/__init__.py
diff --git a/ldcoolp/scripts/testing/__init__.py b/ldcoolp/scripts/testing/__init__.py