From f829e6cca1ffc045ee4e6f7cde254fca846b269d Mon Sep 17 00:00:00 2001 From: Joshua Orvis Date: Fri, 11 Oct 2024 21:41:56 -0500 Subject: [PATCH 1/4] Critical fix: was attempting to both extract tarball AND zip, so all would fail --- .../process_uploaded_expression_dataset.cgi | 93 +++++++++++-------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/www/cgi/process_uploaded_expression_dataset.cgi b/www/cgi/process_uploaded_expression_dataset.cgi index dc383723..aa8b247c 100755 --- a/www/cgi/process_uploaded_expression_dataset.cgi +++ b/www/cgi/process_uploaded_expression_dataset.cgi @@ -26,6 +26,7 @@ import pandas as pd import scanpy as sc from scipy import sparse import anndata +import zipfile # This has a huge dependency stack of libraries. Occasionally, one of them has methods # which prints debugging information on STDOUT, killing this CGI. So here we redirect @@ -92,7 +93,7 @@ def main(): # https://stackoverflow.com/a/22181041/1368079 # https://stackoverflow.com/questions/6024472/start-background-process-daemon-from-cgi-script # https://groups.google.com/g/comp.lang.python/c/gSRnd0RoVKY?pli=1 - do_fork = True + do_fork = False if do_fork: sys.stdout = original_stdout result['success'] = 1 @@ -305,48 +306,58 @@ def process_mex_3tab(upload_dir): files_extracted = [] - with tarfile.open(filename) as tf: - for entry in tf: - tf.extract(entry, path=upload_dir) - - # Nemo suffixes - nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab'] - suffix_found = None + if compression_format == 'tarball': + try: + with tarfile.open(filename) as tf: + for entry in tf: + tf.extract(entry, path=upload_dir) + + # Nemo suffixes + nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab'] + suffix_found = None + + for suffix in nemo_suffixes: + if entry.name.endswith(suffix): + suffix_found = suffix + # Rename the file to the appropriate name + os.rename(os.path.join(upload_dir, entry.name), + os.path.join(upload_dir, suffix)) + + if suffix_found is not None: + files_extracted.append(suffix_found) + else: + files_extracted.append(entry.name) + except tarfile.ReadError: + write_status(upload_dir, 'error', "Bad tarball file. Couldn't extract the tarball.") + return - for suffix in nemo_suffixes: - if entry.name.endswith(suffix): - suffix_found = suffix - # Rename the file to the appropriate name - os.rename(os.path.join(upload_dir, entry.name), - os.path.join(upload_dir, suffix)) - - if suffix_found is not None: - files_extracted.append(suffix_found) - else: - files_extracted.append(entry.name) - - with zipfile.ZipFile(filename) as zf: - for entry in zf.infolist(): - zf.extract(entry, path=upload_dir) - - # Nemo suffixes - nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab'] - suffix_found = None - - for suffix in nemo_suffixes: - if entry.filename.endswith(suffix): - suffix_found = suffix - # Rename the file to the appropriate name - os.rename(os.path.join(upload_dir, entry.filename), - os.path.join(upload_dir, suffix)) - - if suffix_found is not None: - files_extracted.append(suffix_found) - else: - files_extracted.append(entry.filename) + if compression_format == 'zip': + try: + with zipfile.ZipFile(filename) as zf: + for entry in zf.infolist(): + zf.extract(entry, path=upload_dir) + + # Nemo suffixes + nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab'] + suffix_found = None + + for suffix in nemo_suffixes: + if entry.filename.endswith(suffix): + suffix_found = suffix + # Rename the file to the appropriate name + os.rename(os.path.join(upload_dir, entry.filename), + os.path.join(upload_dir, suffix)) + + if suffix_found is not None: + files_extracted.append(suffix_found) + else: + files_extracted.append(entry.filename) + except zipfile.BadZipFile: + write_status(upload_dir, 'error', "Bad zip file. Couldn't extract the zip file.") + return # Determine the dataset type - dataset_type = tarball_content_type(files_extracted) + dataset_type = package_content_type(files_extracted) if dataset_type is None: write_status(upload_dir, 'error', "Unsupported dataset format. Couldn't tell type from file names within the tarball") @@ -363,7 +374,7 @@ def write_status(upload_dir, status_name, message): with open(os.path.join(upload_dir, 'status.json'), 'w') as f: f.write(json.dumps(status)) -def tarball_content_type(filenames): +def package_content_type(filenames): print("DEBUG: filenames", file=sys.stderr, flush=True) print(filenames, file=sys.stderr, flush=True) """ From ffb57235aaf306ff91b659f2b40d5657e3fc760d Mon Sep 17 00:00:00 2001 From: Joshua Orvis Date: Fri, 11 Oct 2024 21:45:48 -0500 Subject: [PATCH 2/4] Uploader: Restored forking setting to true --- www/cgi/process_uploaded_expression_dataset.cgi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/www/cgi/process_uploaded_expression_dataset.cgi b/www/cgi/process_uploaded_expression_dataset.cgi index aa8b247c..067314e5 100755 --- a/www/cgi/process_uploaded_expression_dataset.cgi +++ b/www/cgi/process_uploaded_expression_dataset.cgi @@ -93,7 +93,7 @@ def main(): # https://stackoverflow.com/a/22181041/1368079 # https://stackoverflow.com/questions/6024472/start-background-process-daemon-from-cgi-script # https://groups.google.com/g/comp.lang.python/c/gSRnd0RoVKY?pli=1 - do_fork = False + do_fork = True if do_fork: sys.stdout = original_stdout result['success'] = 1 From 35ec634484af83ea034f16be90d87be1149345d8 Mon Sep 17 00:00:00 2001 From: Joshua Orvis Date: Tue, 15 Oct 2024 20:58:48 -0500 Subject: [PATCH 3/4] Hotfix for issue #912 plus fixed issue where dataset type select box wasn't populating on metadata sheet upload --- www/js/upload_dataset.js | 11 ++++++++++- www/upload_dataset.html | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/www/js/upload_dataset.js b/www/js/upload_dataset.js index a13f6b23..8d556f2f 100644 --- a/www/js/upload_dataset.js +++ b/www/js/upload_dataset.js @@ -269,7 +269,6 @@ const populateMetadataFormFromFile = async () => { if (data.success) { document.getElementsByName('metadata-title')[0].value = data.metadata.title.value; document.getElementsByName('metadata-summary')[0].value = data.metadata.summary.value; - document.getElementsByName('metadata-dataset-type')[0].value = data.metadata.dataset_type.value; document.getElementsByName('metadata-annotation-source')[0].value = data.metadata.annotation_source.value; document.getElementsByName('metadata-annotation-version')[0].value = data.metadata.annotation_release_number.value; document.getElementsByName('metadata-geo-id')[0].value = data.metadata.geo_accession.value; @@ -285,6 +284,16 @@ const populateMetadataFormFromFile = async () => { document.getElementsByName('metadata-library-source')[0].value = data.metadata.library_source.value; document.getElementsByName('metadata-library-strategy')[0].value = data.metadata.library_strategy.value; document.getElementsByName('metadata-pubmed-id')[0].value = data.metadata.pubmed_id.value; + + // Handle the metadata-dataset-type select box + let dataset_type_select = document.getElementsByName('metadata-dataset-type')[0]; + for (let i = 0; i < dataset_type_select.options.length; i++) { + if (dataset_type_select.options[i].value === data.metadata.dataset_type.value) { + dataset_type_select.selectedIndex = i; + break; + } + } + document.getElementById('metadata-upload-status-message').textContent = "Form populated with uploaded metadata"; button.disabled = false; diff --git a/www/upload_dataset.html b/www/upload_dataset.html index a00ab5d7..40cdee4f 100644 --- a/www/upload_dataset.html +++ b/www/upload_dataset.html @@ -225,8 +225,8 @@

Step - Enter metadata (via form OR upload)

From 03e948acbcc15deba76760cda1a0db06862cbf7e Mon Sep 17 00:00:00 2001 From: adkinsrs Date: Mon, 28 Oct 2024 09:10:39 -0400 Subject: [PATCH 4/4] Resolving close adata object bug --- www/api/resources/projectr.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/www/api/resources/projectr.py b/www/api/resources/projectr.py index 5d3c2b9a..53ae2945 100644 --- a/www/api/resources/projectr.py +++ b/www/api/resources/projectr.py @@ -497,7 +497,10 @@ def projectr_callback(dataset_id, genecart_id, projection_id, session_id, scope, , "num_dataset_genes": num_target_genes } - adata.close() + # Close adata so that we do not have a stale opened object + if adata.isbacked: + adata.file.close() + if dedup_copy.exists(): dedup_copy.unlink()