Skip to content

Commit

Permalink
Committing some collected cleaning which has been intermittently: rem…
Browse files Browse the repository at this point in the history
…ovals, FIXMEs, comments, tests not using .._or_create, etc
  • Loading branch information
glormph committed Jun 12, 2024
1 parent 6b2049d commit f6fe991
Show file tree
Hide file tree
Showing 19 changed files with 55 additions and 91 deletions.
3 changes: 0 additions & 3 deletions src/backend/analysis/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,6 @@ class AnalysisDatasetSetValue(models.Model):
class Meta:
constraints = [models.UniqueConstraint(fields=['analysis', 'dataset', 'field'], name='uni_anadsetsfields')]

# FIXME how should we do with pgt DBGEN input? Are those sets, or are they something else?
# they def have sample names, and can be multiple per sample (BAMs merged, VCFs indel/snv etc)

class AnalysisDSInputFile(models.Model):
'''Input files for set-based analysis (isobaric and prefraction-datasets)'''
Expand All @@ -310,7 +308,6 @@ class AnalysisFileValue(models.Model):
value = models.TextField()
sfile = models.ForeignKey(filemodels.StoredFile, on_delete=models.CASCADE)

# FIXME this should maybe FK to infile above here?
class Meta:
constraints = [models.UniqueConstraint(fields=['analysis', 'sfile', 'field'], name='uni_anassamplefile')]

Expand Down
1 change: 1 addition & 0 deletions src/backend/analysis/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,7 @@ def write_analysis_log(logline, analysis_id):
analysis.save()


# FIXME need auth on this view
def nextflow_analysis_log(request):
req = json.loads(request.body.decode('utf-8'))
if 'runName' not in req or not req['runName']:
Expand Down
2 changes: 1 addition & 1 deletion src/backend/datasets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class Dataset(models.Model):
date = models.DateTimeField('date created')
runname = models.OneToOneField(RunName, on_delete=models.CASCADE)
datatype = models.ForeignKey(Datatype, on_delete=models.CASCADE)
# storage_loc/share should only ever be updated in jobs' post-run (after moves)
# NB! storage_loc/share should only ever be updated in jobs' post-run (after moves)
# because it is source of truth for where to/from move files
storage_loc = models.TextField(max_length=200, unique=True)
storageshare = models.ForeignKey(ServerShare, on_delete=models.CASCADE)
Expand Down
5 changes: 2 additions & 3 deletions src/backend/datasets/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ def test_add_files(self):
self.ds.storage_loc, self.tmpsf.filename)))

def test_add_fails(self):
# Fail because there is no storedfile
fn = 'raw_no_sf'
raw = rm.RawFile.objects.create(name=fn, producer=self.prod, claimed=False,
source_md5='raw_no_sf_fakemd5', size=1024, date=timezone.now())
Expand Down Expand Up @@ -312,6 +313,7 @@ def test_rename_ok(self):
self.ds.storage_loc, self.f3sf.filename)))

def test_if_added_removed_files_ok(self):
# first rename job http request, then add files, then jobs run:
newname = 'testnewname'
self.assertEqual(dm.Project.objects.filter(name=newname).count(), 0)
resp = self.cl.post(self.url, content_type='application/json',
Expand Down Expand Up @@ -364,9 +366,6 @@ def test_if_added_removed_files_ok(self):
self.assertEqual(self.tmpsf.path, self.ds.storage_loc)
self.assertEqual(self.tmpsf.servershare, self.ssnewstore)

# clean up
newdsr.delete()


class SaveSamples(BaseTest):
url = '/datasets/save/samples/'
Expand Down
14 changes: 9 additions & 5 deletions src/backend/datasets/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,14 @@ def get_admin_params_for_dset(response, dset_id, category):


def update_dataset(data):
# FIXME it is v annoying to do all the hierarchical creations in multiple steps
# in a single method, as this generates a lot of error checking, and things like
# saves that are done when an error pops up later (storage loc is checked last, but a
# proj is saved in the start. Easier to run
# one method per update (exp, proj, dset) - but that will create unneccessary move
# operations. But since it's a rename, it will be short, and usually not all of them
# change at the same time.
# Thus: its time to do "new dset", "new proj" in the excel table view!
dset = models.Dataset.objects.filter(pk=data['dataset_id']).select_related(
'runname__experiment', 'datatype').get()
if 'newprojectname' in data:
Expand Down Expand Up @@ -303,17 +311,16 @@ def update_dataset(data):
return JsonResponse({'error': f'Experiment name cannot contain characters except {settings.ALLOWED_PROJEXPRUN_CHARS}'}, status=403)
experiment = models.Experiment(name=data['newexperimentname'],
project=project)
experiment.save()
dset.runname.experiment = experiment
newexp = True
else:
experiment = models.Experiment.objects.get(pk=data['experiment_id'])
experiment.project_id = project.id
experiment.save()
if data['experiment_id'] != dset.runname.experiment_id:
# another experiment was selected
newexp = True
dset.runname.experiment = experiment
experiment.save()
if data['runname'] != dset.runname.name or newexp:
if is_invalid_proj_exp_runnames(data['runname']):
return JsonResponse({'error': f'Run name cannot contain characters except {settings.ALLOWED_PROJEXPRUN_CHARS}'}, status=403)
Expand Down Expand Up @@ -1461,9 +1468,6 @@ def save_samples(request):
dataset=dset):
# proj sample w name already in this dataset (no ID passed), but not in any other, so
# eg new species/sampletype, need to update those

# FIXME make sure when input conflicting sample in dset (same name,
# diff types), that this doesnt override an error!
continue
elif not psam.datasetsample_set.count():
# Sample is up for grabs, already registered but not in dataset
Expand Down
1 change: 0 additions & 1 deletion src/backend/home/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,3 @@ def test_with_filemove(self):
self.do_refine(ds)
self.assertEqual(jm.Job.objects.filter(funcname='move_dset_servershare',
kwargs__dset_id=ds.pk).count(), 1)
# moverun.delete()
23 changes: 0 additions & 23 deletions src/backend/home/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,26 +314,6 @@ def show_jobs(request):
dsets = [dsets]
items[job.id]['dset_ids'] = dsets
stateorder = [jj.Jobstates.ERROR, jj.Jobstates.PROCESSING, jj.Jobstates.PENDING, jj.Jobstates.WAITING]
#####/tasks
#analysis = jv.get_job_analysis(job)
#if analysis:
# analysis = analysis.name
#errors = []
#try:
# errormsg = job.joberror.message
#except jm.JobError.DoesNotExist:
# errormsg = False
#return JsonResponse({'files': fj.count(), 'dsets': 0,
# 'analysis': analysis,
# 'time': datetime.strftime(job.timestamp, '%Y-%m-%d %H:%M'),
# 'errmsg': errormsg,
# 'tasks': {'error': tasks.filter(state=tstates.FAILURE).count(),
# 'procpen': tasks.filter(state=tstates.PENDING).count(),
# 'done': tasks.filter(state=tstates.SUCCESS).count()},
# 'errors': errors,
# })
#####

return JsonResponse({'items': items, 'order':
[x for u in ['user', 'admin'] for s in stateorder
for x in order[u][s]]})
Expand Down Expand Up @@ -876,9 +856,6 @@ def create_mzmls(request):
if ds_instype.filter(rawfile__producer__msinstrument__instrumenttype__name='timstof').exists():
filters.append('"scanSumming precursorTol=0.02 scanTimeTol=10 ionMobilityTol=0.1"')
options.append('combineIonMobilitySpectra')
# FIXME deprecate is_docker, since is always docker
if not pwiz.is_docker:
return JsonResponse({'error': 'Cannot process mzML timstof/pasef data with that version'}, status=403)
num_rawfns = filemodels.RawFile.objects.filter(datasetrawfile__dataset_id=data['dsid']).count()
mzmls_exist = filemodels.StoredFile.objects.filter(rawfile__datasetrawfile__dataset=dset,
deleted=False, purged=False, checked=True, mzmlfile__isnull=False)
Expand Down
1 change: 1 addition & 0 deletions src/backend/jobs/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


class Jobstates:
# FIXME make in DB integer choices
WAITING = 'wait'
PENDING = 'pending'
QUEUED = 'queued'
Expand Down
6 changes: 3 additions & 3 deletions src/backend/jobs/management/commands/runjobs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
These tasks are to be run by the celery beat automatic task runner every 10 seconds or so.
It contains a VERY SIMPLE job scheduler. The more advanced job scheduling is to be done by
celery chains etc, Nextflow, or Galaxy or whatever one likes.
This management task contains a VERY SIMPLE job scheduler.
More advanced job scheduling is to be done by celery chains etc,
Nextflow, or Galaxy or whatever one likes.
Scheduler runs sequential and waits for each job that contains files running in another job
"""
Expand Down
2 changes: 0 additions & 2 deletions src/backend/jobs/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,3 @@ def task_finished(task_id):
def taskfail_update_db(task_id, msg=False):
update_db(urljoin(settings.KANTELEHOST, reverse('jobs:settask')), json={'task_id': task_id,
'client_id': settings.APIKEY, 'msg': msg, 'state': states.FAILURE})


2 changes: 2 additions & 0 deletions src/backend/jobs/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ def register_external_file(request):
return HttpResponseForbidden()
dataset = {'dataset_id': data['dset_id'], 'removed_files': {},
'added_files': {1: {'id': data['raw_id']}}}
# FIXME dont let just any job change the file state!
# FIXME handle errors in save_or_up
StoredFile.objects.filter(pk=data['sf_id']).update(md5=data['md5'], checked=True)
RawFile.objects.filter(pk=data['raw_id']).update(source_md5=data['md5'])
dsviews.save_or_update_files(dataset)
Expand Down
2 changes: 0 additions & 2 deletions src/backend/kantele/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@

# hardcoded name for fasta DBs
DBFA_FT_NAME = 'database'
# FIXME let QC pipe use its own DB (downloadable or in lehtio server)
LONGQC_FADB_ID = os.environ.get('LONGQC_DBID')
MZREFINER_NXFWFV_ID = os.environ.get('REFINE_MZML_WFVID')

# django
Expand Down
63 changes: 26 additions & 37 deletions src/backend/kantele/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def setUp(self):
# File prep, producers etc
self.ft, _ = rm.StoredFileType.objects.get_or_create(name='testft', filetype='tst',
is_rawdata=True)
self.prod, _ = rm.Producer.objects.get_or_create(name='prod1', client_id='abcdefg', shortname='p1', internal=True)
self.prod = rm.Producer.objects.create(name='prod1', client_id='abcdefg', shortname='p1', internal=True)
msit, _ = rm.MSInstrumentType.objects.get_or_create(name='test')
rm.MSInstrument.objects.get_or_create(producer=self.prod, instrumenttype=msit,
filetype=self.ft)
Expand Down Expand Up @@ -106,14 +106,13 @@ def setUp(self):
self.f3raw = rm.RawFile.objects.create(name=fn3, producer=self.prod,
source_md5='f3_fakemd5',
size=f3size, date=timezone.now(), claimed=True)
self.f3dsr, _ = dm.DatasetRawFile.objects.get_or_create(dataset=self.ds, rawfile=self.f3raw)
self.f3sf, _ = rm.StoredFile.objects.update_or_create(rawfile=self.f3raw, filename=fn3,
md5=self.f3raw.source_md5, filetype=self.ft,
defaults={'servershare': self.ssnewstore, 'path': self.storloc,
'checked': True})
self.qcs, _ = dm.QuantChannelSample.objects.get_or_create(dataset=self.ds, channel=self.qtch,
self.f3dsr = dm.DatasetRawFile.objects.create(dataset=self.ds, rawfile=self.f3raw)
self.f3sf = rm.StoredFile.objects.create(rawfile=self.f3raw, filename=fn3,
md5=self.f3raw.source_md5, filetype=self.ft, servershare=self.ssnewstore,
path=self.storloc, checked=True)
self.qcs = dm.QuantChannelSample.objects.create(dataset=self.ds, channel=self.qtch,
projsample=self.projsam1)
dm.QuantDataset.objects.get_or_create(dataset=self.ds, quanttype=self.qt)
dm.QuantDataset.objects.create(dataset=self.ds, quanttype=self.qt)
dm.DatasetSample.objects.create(dataset=self.ds, projsample=self.projsam1)

# Pwiz/mzml
Expand Down Expand Up @@ -151,42 +150,34 @@ def setUp(self):
dm.DatasetOwner.objects.get_or_create(dataset=self.oldds, user=self.user)
self.oldfpath = os.path.join(settings.SHAREMAP[self.ssoldstorage.name], self.oldstorloc)
oldsize = os.path.getsize(os.path.join(self.oldfpath, oldfn))
self.oldraw, _ = rm.RawFile.objects.get_or_create(name=oldfn, producer=self.prod,
source_md5='old_to_new_fakemd5', size=oldsize, defaults={'date': timezone.now(),
'claimed': True})
self.olddsr, _ = dm.DatasetRawFile.objects.get_or_create(dataset=self.oldds, rawfile=self.oldraw)
self.oldsf, _ = rm.StoredFile.objects.update_or_create(rawfile=self.oldraw, filename=oldfn,
md5=self.oldraw.source_md5, filetype=self.ft,
defaults={'servershare': self.ssoldstorage, 'path': self.oldstorloc,
'checked': True})
self.oldraw = rm.RawFile.objects.create(name=oldfn, producer=self.prod,
source_md5='old_to_new_fakemd5', size=oldsize, date=timezone.now(), claimed=True)
self.olddsr = dm.DatasetRawFile.objects.create(dataset=self.oldds, rawfile=self.oldraw)
self.oldsf = rm.StoredFile.objects.create(rawfile=self.oldraw, filename=oldfn,
md5=self.oldraw.source_md5, filetype=self.ft, servershare=self.ssoldstorage,
path=self.oldstorloc, checked=True)
self.oldqsf = dm.QuantSampleFile.objects.create(rawfile=self.olddsr, projsample=self.projsam2)

# Tmp rawfile
tmpfn = 'raw2'
tmpfpathfn = os.path.join(settings.SHAREMAP[self.sstmp.name], tmpfn)
tmpsize = os.path.getsize(tmpfpathfn)
self.tmpraw, _ = rm.RawFile.objects.get_or_create(name=tmpfn, producer=self.prod,
source_md5='tmpraw_fakemd5', size=tmpsize, defaults={'date': timezone.now(),
'claimed': False})
self.tmpsf, _ = rm.StoredFile.objects.update_or_create(rawfile=self.tmpraw,
md5=self.tmpraw.source_md5, defaults={'filename': tmpfn, 'servershare': self.sstmp,
'path': '', 'checked': True, 'filetype': self.ft})

# FIXME should go to analysis? Maybe reuse in home etc views
self.tmpraw = rm.RawFile.objects.create(name=tmpfn, producer=self.prod,
source_md5='tmpraw_fakemd5', size=tmpsize, date=timezone.now(), claimed=False)
self.tmpsf = rm.StoredFile.objects.create(rawfile=self.tmpraw, md5=self.tmpraw.source_md5,
filename=tmpfn, servershare=self.sstmp, path='', checked=True, filetype=self.ft)

# Library files, for use as input, so claimed and ready
self.libraw, _ = rm.RawFile.objects.update_or_create(name='libfiledone',
producer=self.prod, source_md5='libfilemd5',
size=100, defaults={'claimed': True, 'date': timezone.now()})
self.libraw = rm.RawFile.objects.create(name='libfiledone', producer=self.prod,
source_md5='libfilemd5', size=100, claimed=True, date=timezone.now())

self.sflib, _ = rm.StoredFile.objects.update_or_create(rawfile=self.libraw,
md5=self.libraw.source_md5, filetype=self.ft, defaults={'checked': True,
'filename': self.libraw.name, 'servershare': self.sstmp, 'path': ''})
self.lf, _ = am.LibraryFile.objects.get_or_create(sfile=self.sflib, description='This is a libfile')
self.sflib = rm.StoredFile.objects.create(rawfile=self.libraw, md5=self.libraw.source_md5,
filetype=self.ft, checked=True, filename=self.libraw.name, servershare=self.sstmp, path='')
self.lf = am.LibraryFile.objects.create(sfile=self.sflib, description='This is a libfile')

# User files for input
self.usrfraw, _ = rm.RawFile.objects.update_or_create(name='usrfiledone',
producer=self.prod, source_md5='usrfmd5', size=100,
defaults={'claimed': True, 'date': timezone.now()})
self.usrfraw = rm.RawFile.objects.create(name='usrfiledone', producer=self.prod,
source_md5='usrfmd5', size=100, claimed=True, date=timezone.now())
self.uft, _ = rm.StoredFileType.objects.get_or_create(name='ufileft', filetype='tst',
is_rawdata=False)
self.sfusr, _ = rm.StoredFile.objects.update_or_create(rawfile=self.usrfraw,
Expand Down Expand Up @@ -243,6 +234,7 @@ def run_job(self):


class TestMultiStorageServers(BaseIntegrationTest):
# FIXME add test for moving servershare fail on check_error!

def test_add_newtmp_files_to_old_dset(self):
# Fresh start in case multiple tests
Expand Down Expand Up @@ -274,6 +266,3 @@ def test_add_newtmp_files_to_old_dset(self):
self.tmpsf.refresh_from_db()
self.assertEqual(self.tmpsf.servershare_id, self.ssnewstore.pk)
self.assertEqual(self.tmpsf.path, self.oldds.storage_loc)

# Clean up
newdsr.delete()
4 changes: 4 additions & 0 deletions src/backend/rawstatus/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def process(self, **kwargs):


class CreatePDCArchive(SingleFileJob):
'''Archiving of newly arrived files - full datasets can also be archived if they
are not - then we use the BackupDataset job instead'''
refname = 'create_pdc_archive'
task = tasks.pdc_archive

Expand All @@ -53,6 +55,7 @@ def process(self, **kwargs):


class RestoreFromPDC(SingleFileJob):
'''For restoring files which are not in a dataset'''
refname = 'restore_from_pdc_archive'
task = tasks.pdc_restore

Expand Down Expand Up @@ -203,6 +206,7 @@ def process(self, **kwargs):


class DownloadPXProject(DatasetJob):
# FIXME dupe check?
refname = 'download_px_data'
task = tasks.download_px_file_raw
"""gets sf_ids, of non-checked non-downloaded PX files.
Expand Down
6 changes: 1 addition & 5 deletions src/backend/rawstatus/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,11 @@ def inflow_page(request):
'isfolder': x.is_folder} for x in StoredFileType.objects.filter(user_uploadable=True)]})


@login_required
@staff_member_required
@require_POST
def import_external_data(request):
# Input like so: {share_id: int, dirname: top_lvl_dir, dsets: [{'instrument_id': int, 'name': str, 'files': [(path/to/file.raw', ],
# FIXME thermo files are .raw, but how do we handle bruker raws? they are folders!
if request.method != 'POST':
return JsonResponse({'error': 'Must use POST'}, status=405)
req = json.loads(request.body.decode('utf-8'))
share = ServerShare.objects.get(pk=req['share_id'])
proj = dsmodels.Project.objects.get(pk=settings.PX_PROJECT_ID)
Expand Down Expand Up @@ -85,7 +83,6 @@ def import_external_data(request):
return JsonResponse({})


@login_required
@staff_member_required
@require_GET
def scan_raws_tmp(request):
Expand Down Expand Up @@ -608,7 +605,6 @@ def rename_file(request):
elif re.match('^[a-zA-Z_0-9\-]*$', newfilename) is None:
return JsonResponse({'error': 'Illegal characteres in new file name'}, status=403)
job = create_job('rename_file', sf_id=sfile.id, newname=newfilename)
print(job)
if job['error']:
return JsonResponse({'error': job['error']}, status=403)
else:
Expand Down
3 changes: 0 additions & 3 deletions src/docker/develop.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ LOCAL_PTYPE_ID=
PX_PROJECT_ID=
EXTERNAL_PRODUCER_IDS=

## long QC, as stage
LONGQC_DBID=

## nextflow
REFINE_MZML_WFVID=
REFINE_MZML_DBID=
Expand Down
1 change: 1 addition & 0 deletions src/frontend/analysis/src/App.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ async function fetchWorkflow() {
notif.errors[msg] = 1;
setTimeout(function(msg) { notif.errors[msg] = 0 } , flashtime, msg);
} else {
// FIXME cant this be done in backend?
libfiles = Object.fromEntries(Object.entries(result.wf.libfiles).map(([ft, lf]) => [ft, Object.fromEntries(lf.map(x => [x.id, x]))]));
libfnorder = Object.fromEntries(Object.entries(result.wf.libfiles).map(([ft, lf]) => [ft, lf.map(x => x.id)]));
prev_resultfiles = result.wf.prev_resultfiles;
Expand Down
4 changes: 1 addition & 3 deletions src/frontend/datasets/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
"scripts": {
"build": "rollup -c",
"autobuild": "rollup -c -w",
"dev": "run-p autobuild",
"start": "sirv public --single",
"start:dev": "sirv public --single --dev --host 0.0.0.0"
"dev": "run-p autobuild"
}
}
3 changes: 3 additions & 0 deletions src/frontend/datasets/src/DynamicSelect.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,19 @@ async function handleKeyInput(event) {
options = Object.fromEntries(Object.entries(fixedoptions));
optorder = fixedorder.length ? fixedorder : Object.keys(options);
selectval = initval;
} else if (event.key.length > 1 && !(event.keyCode === 8 || event.keyCode === 46)) {
// special key without modification effect (e.g. alt), not backspace/delete
return
} else if (intext.length > 2 && fetchUrl) {
selectval = '';
options = await getJSON(`${fetchUrl}?q=${intext}`);
fetchedData = Object.assign({}, options);
delete(options.ok);
optorder = Object.keys(options);
typing = true;
} else if (!fetchUrl && fixedoptions && intext) {
selectval = '';
options = Object.fromEntries(Object.entries(fixedoptions).filter(x => x[1].name.toLowerCase().indexOf(intext.toLowerCase()) > -1));
Expand Down

0 comments on commit f6fe991

Please sign in to comment.