From f829e6cca1ffc045ee4e6f7cde254fca846b269d Mon Sep 17 00:00:00 2001
From: Joshua Orvis <jorvis@gmail.com>
Date: Fri, 11 Oct 2024 21:41:56 -0500
Subject: [PATCH 1/4] Critical fix: was attempting to both extract tarball AND
 zip, so all would fail

---
 .../process_uploaded_expression_dataset.cgi   | 93 +++++++++++--------
 1 file changed, 52 insertions(+), 41 deletions(-)

diff --git a/www/cgi/process_uploaded_expression_dataset.cgi b/www/cgi/process_uploaded_expression_dataset.cgi
index dc383723..aa8b247c 100755
--- a/www/cgi/process_uploaded_expression_dataset.cgi
+++ b/www/cgi/process_uploaded_expression_dataset.cgi
@@ -26,6 +26,7 @@ import pandas as pd
 import scanpy as sc
 from scipy import sparse
 import anndata
+import zipfile
 
 # This has a huge dependency stack of libraries. Occasionally, one of them has methods
 #  which prints debugging information on STDOUT, killing this CGI.  So here we redirect
@@ -92,7 +93,7 @@ def main():
     # https://stackoverflow.com/a/22181041/1368079
     # https://stackoverflow.com/questions/6024472/start-background-process-daemon-from-cgi-script
     # https://groups.google.com/g/comp.lang.python/c/gSRnd0RoVKY?pli=1
-    do_fork = True
+    do_fork = False
     if do_fork:
         sys.stdout = original_stdout
         result['success'] = 1
@@ -305,48 +306,58 @@ def process_mex_3tab(upload_dir):
 
     files_extracted = []
 
-    with tarfile.open(filename) as tf:
-        for entry in tf:
-            tf.extract(entry, path=upload_dir)
-
-            # Nemo suffixes
-            nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
-            suffix_found = None
+    if compression_format == 'tarball':
+        try:
+            with tarfile.open(filename) as tf:
+                for entry in tf:
+                    tf.extract(entry, path=upload_dir)
+
+                    # Nemo suffixes
+                    nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
+                    suffix_found = None
+
+                    for suffix in nemo_suffixes:
+                        if entry.name.endswith(suffix):
+                            suffix_found = suffix
+                            # Rename the file to the appropriate name
+                            os.rename(os.path.join(upload_dir, entry.name), 
+                                    os.path.join(upload_dir, suffix))
+                    
+                    if suffix_found is not None:
+                        files_extracted.append(suffix_found)
+                    else:
+                        files_extracted.append(entry.name)
+        except tarfile.ReadError:
+            write_status(upload_dir, 'error', "Bad tarball file. Couldn't extract the tarball.")
+            return
 
-            for suffix in nemo_suffixes:
-                if entry.name.endswith(suffix):
-                    suffix_found = suffix
-                    # Rename the file to the appropriate name
-                    os.rename(os.path.join(upload_dir, entry.name), 
-                              os.path.join(upload_dir, suffix))
-            
-            if suffix_found is not None:
-                files_extracted.append(suffix_found)
-            else:
-                files_extracted.append(entry.name)
-
-    with zipfile.ZipFile(filename) as zf:
-        for entry in zf.infolist():
-            zf.extract(entry, path=upload_dir)
-
-            # Nemo suffixes
-            nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
-            suffix_found = None
-
-            for suffix in nemo_suffixes:
-                if entry.filename.endswith(suffix):
-                    suffix_found = suffix
-                    # Rename the file to the appropriate name
-                    os.rename(os.path.join(upload_dir, entry.filename), 
-                              os.path.join(upload_dir, suffix))
-            
-            if suffix_found is not None:
-                files_extracted.append(suffix_found)
-            else:
-                files_extracted.append(entry.filename)
+    if compression_format == 'zip':
+        try:
+            with zipfile.ZipFile(filename) as zf:
+                for entry in zf.infolist():
+                    zf.extract(entry, path=upload_dir)
+
+                    # Nemo suffixes
+                    nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
+                    suffix_found = None
+
+                    for suffix in nemo_suffixes:
+                        if entry.filename.endswith(suffix):
+                            suffix_found = suffix
+                            # Rename the file to the appropriate name
+                            os.rename(os.path.join(upload_dir, entry.filename), 
+                                    os.path.join(upload_dir, suffix))
+                    
+                    if suffix_found is not None:
+                        files_extracted.append(suffix_found)
+                    else:
+                        files_extracted.append(entry.filename)
+        except zipfile.BadZipFile:
+            write_status(upload_dir, 'error', "Bad zip file. Couldn't extract the zip file.")
+            return
 
     # Determine the dataset type
-    dataset_type = tarball_content_type(files_extracted)
+    dataset_type = package_content_type(files_extracted)
 
     if dataset_type is None:
         write_status(upload_dir, 'error', "Unsupported dataset format. Couldn't tell type from file names within the tarball")
@@ -363,7 +374,7 @@ def write_status(upload_dir, status_name, message):
     with open(os.path.join(upload_dir, 'status.json'), 'w') as f:
         f.write(json.dumps(status))
 
-def tarball_content_type(filenames):
+def package_content_type(filenames):
         print("DEBUG: filenames", file=sys.stderr, flush=True)
         print(filenames, file=sys.stderr, flush=True)
         """

From ffb57235aaf306ff91b659f2b40d5657e3fc760d Mon Sep 17 00:00:00 2001
From: Joshua Orvis <jorvis@gmail.com>
Date: Fri, 11 Oct 2024 21:45:48 -0500
Subject: [PATCH 2/4] Uploader: Restored forking setting to true

---
 www/cgi/process_uploaded_expression_dataset.cgi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/www/cgi/process_uploaded_expression_dataset.cgi b/www/cgi/process_uploaded_expression_dataset.cgi
index aa8b247c..067314e5 100755
--- a/www/cgi/process_uploaded_expression_dataset.cgi
+++ b/www/cgi/process_uploaded_expression_dataset.cgi
@@ -93,7 +93,7 @@ def main():
     # https://stackoverflow.com/a/22181041/1368079
     # https://stackoverflow.com/questions/6024472/start-background-process-daemon-from-cgi-script
     # https://groups.google.com/g/comp.lang.python/c/gSRnd0RoVKY?pli=1
-    do_fork = False
+    do_fork = True
     if do_fork:
         sys.stdout = original_stdout
         result['success'] = 1

From 35ec634484af83ea034f16be90d87be1149345d8 Mon Sep 17 00:00:00 2001
From: Joshua Orvis <jorvis@gmail.com>
Date: Tue, 15 Oct 2024 20:58:48 -0500
Subject: [PATCH 3/4] Hotfix for issue #912 plus fixed issue where dataset type
 select box wasn't populating on metadata sheet upload

---
 www/js/upload_dataset.js | 11 ++++++++++-
 www/upload_dataset.html  |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/www/js/upload_dataset.js b/www/js/upload_dataset.js
index a13f6b23..8d556f2f 100644
--- a/www/js/upload_dataset.js
+++ b/www/js/upload_dataset.js
@@ -269,7 +269,6 @@ const populateMetadataFormFromFile = async () => {
     if (data.success) {
         document.getElementsByName('metadata-title')[0].value = data.metadata.title.value;
         document.getElementsByName('metadata-summary')[0].value = data.metadata.summary.value;
-        document.getElementsByName('metadata-dataset-type')[0].value = data.metadata.dataset_type.value;
         document.getElementsByName('metadata-annotation-source')[0].value = data.metadata.annotation_source.value;
         document.getElementsByName('metadata-annotation-version')[0].value = data.metadata.annotation_release_number.value;
         document.getElementsByName('metadata-geo-id')[0].value = data.metadata.geo_accession.value;
@@ -285,6 +284,16 @@ const populateMetadataFormFromFile = async () => {
         document.getElementsByName('metadata-library-source')[0].value = data.metadata.library_source.value;
         document.getElementsByName('metadata-library-strategy')[0].value = data.metadata.library_strategy.value;
         document.getElementsByName('metadata-pubmed-id')[0].value = data.metadata.pubmed_id.value;
+
+        // Handle the metadata-dataset-type select box
+        let dataset_type_select = document.getElementsByName('metadata-dataset-type')[0];
+        for (let i = 0; i < dataset_type_select.options.length; i++) {
+            if (dataset_type_select.options[i].value === data.metadata.dataset_type.value) {
+                dataset_type_select.selectedIndex = i;
+                break;
+            }
+        }
+
         document.getElementById('metadata-upload-status-message').textContent = "Form populated with uploaded metadata";
         button.disabled = false;
 
diff --git a/www/upload_dataset.html b/www/upload_dataset.html
index a00ab5d7..40cdee4f 100644
--- a/www/upload_dataset.html
+++ b/www/upload_dataset.html
@@ -225,8 +225,8 @@ <h1>Step - Enter metadata (via form OR upload)</h1>
                           <div class="select">
                             <select name="metadata-dataset-type">
                               <option value="">Select one</option>
-                              <option value="sc-rna-seq">Single-cell RNA-seq</option>
-                              <option value="bulk-rna-seq">Bulk RNA-seq</option>
+                              <option value="single-cell-rnaseq">Single-cell RNA-seq</option>
+                              <option value="bulk-rnaseq">Bulk RNA-seq</option>
                               <option value="microarray">Microarray</option>
                             </select>
                           </div>

From 03e948acbcc15deba76760cda1a0db06862cbf7e Mon Sep 17 00:00:00 2001
From: adkinsrs <sadkins@som.umaryland.edu>
Date: Mon, 28 Oct 2024 09:10:39 -0400
Subject: [PATCH 4/4] Resolving close adata object bug

---
 www/api/resources/projectr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/www/api/resources/projectr.py b/www/api/resources/projectr.py
index 5d3c2b9a..53ae2945 100644
--- a/www/api/resources/projectr.py
+++ b/www/api/resources/projectr.py
@@ -497,7 +497,10 @@ def projectr_callback(dataset_id, genecart_id, projection_id, session_id, scope,
                 , "num_dataset_genes": num_target_genes
             }
 
-    adata.close()
+    # Close adata so that we do not have a stale opened object
+    if adata.isbacked:
+        adata.file.close()
+
     if dedup_copy.exists():
         dedup_copy.unlink()