Skip to content

Commit

Permalink
Merge branch 'hotfix-uploader-compression-issue'
Browse files Browse the repository at this point in the history
  • Loading branch information
jorvis committed Oct 12, 2024
2 parents ea366fd + f829e6c commit e662db2
Showing 1 changed file with 52 additions and 41 deletions.
93 changes: 52 additions & 41 deletions www/cgi/process_uploaded_expression_dataset.cgi
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import pandas as pd
import scanpy as sc
from scipy import sparse
import anndata
import zipfile

# This has a huge dependency stack of libraries. Occasionally, one of them has methods
# which prints debugging information on STDOUT, killing this CGI. So here we redirect
Expand Down Expand Up @@ -92,7 +93,7 @@ def main():
# https://stackoverflow.com/a/22181041/1368079
# https://stackoverflow.com/questions/6024472/start-background-process-daemon-from-cgi-script
# https://groups.google.com/g/comp.lang.python/c/gSRnd0RoVKY?pli=1
do_fork = True
do_fork = False
if do_fork:
sys.stdout = original_stdout
result['success'] = 1
Expand Down Expand Up @@ -305,48 +306,58 @@ def process_mex_3tab(upload_dir):

files_extracted = []

with tarfile.open(filename) as tf:
for entry in tf:
tf.extract(entry, path=upload_dir)

# Nemo suffixes
nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
suffix_found = None
if compression_format == 'tarball':
try:
with tarfile.open(filename) as tf:
for entry in tf:
tf.extract(entry, path=upload_dir)

# Nemo suffixes
nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
suffix_found = None

for suffix in nemo_suffixes:
if entry.name.endswith(suffix):
suffix_found = suffix
# Rename the file to the appropriate name
os.rename(os.path.join(upload_dir, entry.name),
os.path.join(upload_dir, suffix))

if suffix_found is not None:
files_extracted.append(suffix_found)
else:
files_extracted.append(entry.name)
except tarfile.ReadError:
write_status(upload_dir, 'error', "Bad tarball file. Couldn't extract the tarball.")
return

for suffix in nemo_suffixes:
if entry.name.endswith(suffix):
suffix_found = suffix
# Rename the file to the appropriate name
os.rename(os.path.join(upload_dir, entry.name),
os.path.join(upload_dir, suffix))

if suffix_found is not None:
files_extracted.append(suffix_found)
else:
files_extracted.append(entry.name)

with zipfile.ZipFile(filename) as zf:
for entry in zf.infolist():
zf.extract(entry, path=upload_dir)

# Nemo suffixes
nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
suffix_found = None

for suffix in nemo_suffixes:
if entry.filename.endswith(suffix):
suffix_found = suffix
# Rename the file to the appropriate name
os.rename(os.path.join(upload_dir, entry.filename),
os.path.join(upload_dir, suffix))

if suffix_found is not None:
files_extracted.append(suffix_found)
else:
files_extracted.append(entry.filename)
if compression_format == 'zip':
try:
with zipfile.ZipFile(filename) as zf:
for entry in zf.infolist():
zf.extract(entry, path=upload_dir)

# Nemo suffixes
nemo_suffixes = ['DataMTX.tab', 'COLmeta.tab', 'ROWmeta.tab']
suffix_found = None

for suffix in nemo_suffixes:
if entry.filename.endswith(suffix):
suffix_found = suffix
# Rename the file to the appropriate name
os.rename(os.path.join(upload_dir, entry.filename),
os.path.join(upload_dir, suffix))

if suffix_found is not None:
files_extracted.append(suffix_found)
else:
files_extracted.append(entry.filename)
except zipfile.BadZipFile:
write_status(upload_dir, 'error', "Bad zip file. Couldn't extract the zip file.")
return

# Determine the dataset type
dataset_type = tarball_content_type(files_extracted)
dataset_type = package_content_type(files_extracted)

if dataset_type is None:
write_status(upload_dir, 'error', "Unsupported dataset format. Couldn't tell type from file names within the tarball")
Expand All @@ -363,7 +374,7 @@ def write_status(upload_dir, status_name, message):
with open(os.path.join(upload_dir, 'status.json'), 'w') as f:
f.write(json.dumps(status))

def tarball_content_type(filenames):
def package_content_type(filenames):
print("DEBUG: filenames", file=sys.stderr, flush=True)
print(filenames, file=sys.stderr, flush=True)
"""
Expand Down

0 comments on commit e662db2

Please sign in to comment.