Skip to content

Commit

Permalink
Dynamic fields depending on input header specified (INPUTDEF.value), …
Browse files Browse the repository at this point in the history
…render in front and save in back, run in job
  • Loading branch information
glormph committed Apr 2, 2024
1 parent e119c98 commit e10483a
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 86 deletions.
21 changes: 7 additions & 14 deletions src/backend/analysis/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,31 +273,24 @@ def process(self, **kwargs):
for fn in sfiles_passed:
infile = {'servershare': fn.servershare.name, 'path': fn.path, 'fn': fn.filename}
if 'setname' in inputdef_fields:
infile['setname'] = kwargs['setnames'].get(str(fn.id), '')
infile['setname'] = kwargs['filesamples'].get(str(fn.id), '')
if 'plate' in inputdef_fields:
infile['plate'] = kwargs['platenames'].get(str(fn.rawfile.datasetrawfile.dataset_id), '')
if 'sampleID' in inputdef_fields:
# sampleID is for pgt / dbgenerator
infile['sampleID'] = fn.rawfile.datasetrawfile.quantsamplefile.projsample.sample
# No fallback, is required if in header
infile['sampleID'] = kwargs['filesamples'][str(fn.id)]
if 'fraction' in inputdef_fields:
infile['fraction'] = kwargs['infiles'].get(str(fn.id), {}).get('fr')
if 'instrument' in inputdef_fields:
# No fallback, instrument in header cannot be ''
infile['instrument'] = fn.rawfile.producer.msinstrument.instrumenttype.name
if 'channel' in inputdef_fields:
# For non-pooled labelcheck
# For non-pooled labelcheck, cannot be ''
infile['channel'] = fn.rawfile.datasetrawfile.quantfilechannel.channel.channel.name
if 'file_type' in inputdef_fields:
infile['file_type'] = fn.filetype.filetype
if 'pep_prefix' in inputdef_fields:
# FIXME needs to be able to change to none, mutalt (VCF), fusion_squid, etc
# We can probably use setname frontend code for that
infile['pep_prefix'] = 'none'


# FIXME add the pgt DB/other fields here
# expr_str expr_thresh sample_gtf_file pep_prefix
# Dynamic fields
infile.update(kwargs['filefields'][fn.pk])
infiles.append(infile)
# FIXME this in tasks and need to write header
# FIXME bigrun not hardcode, probably need to remove when new infra
shortname = models.UserWorkflow.WFTypeChoices(analysis.nextflowsearch.workflow.wftype).name
bigrun = shortname == 'PISEP' or len(infiles) > 500
Expand Down
2 changes: 1 addition & 1 deletion src/backend/analysis/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def run_nextflow_workflow(self, run, params, stagefiles, profiles, nf_version):
else:
fndir = os.path.join(settings.SHAREMAP[fn['servershare']], fn['path'])
fnpath = os.path.join(fndir, fn['fn'])
fn_metadata = '\t'.join(fn[x] for x in run['components']['INPUTDEF'][1:] if fn[x])
fn_metadata = '\t'.join(fn[x] or '' for x in run['components']['INPUTDEF'][1:])
fp.write(f'\n{fnpath}\t{fn_metadata}')
params.extend(['--input', os.path.join(rundir, 'inputdef.txt')])

Expand Down
63 changes: 34 additions & 29 deletions src/backend/analysis/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def setUp(self):
path='', checked=True, filetype=self.ft2)

c_ch = am.PsetComponent.ComponentChoices
am.PsetComponent.objects.create(pset=self.pset, component=c_ch.INPUTDEF, value=['plate', 2, 3])
self.inputdef = am.PsetComponent.objects.create(pset=self.pset, component=c_ch.INPUTDEF, value=['file_path', 'plate', 'fake'])
am.PsetComponent.objects.create(pset=self.pset, component=c_ch.ISOQUANT)
am.PsetComponent.objects.create(pset=self.pset, component=c_ch.ISOQUANT_SAMPLETABLE)
am.PsetComponent.objects.create(pset=self.pset, component=c_ch.PREFRAC, value='.*fr([0-9]+).*mzML$')
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_diff_dsets(self):
'ana': f'{self.wftype.name}_{self.ana.name}',
'date': datetime.strftime(self.ana.date, '%Y-%m-%d')}],
'datasets': {f'{self.ds.pk}': {'fields': {'__regex': f'{self.ads1.value}'},
'setname': f'{self.ads1.setname.setname}', 'filesaresets': False,
'setname': f'{self.ads1.setname.setname}', 'allfilessamesample': True,
'files': {}, 'picked_ftype': f'mzML (pwiz {self.f3sfmz.mzmlfile.pwiz.version_description})'}},
}
self.assertJSONEqual(resp.content.decode('utf-8'), json.dumps(checkjson))
Expand Down Expand Up @@ -186,7 +186,7 @@ def test_same_dsets(self):
'datasets': {f'{self.ds.pk}': {'fields': {'__regex': f'{self.ads1.value}'},
'setname': f'{self.ads1.setname.setname}',
'picked_ftype': f'mzML (pwiz {self.f3sfmz.mzmlfile.pwiz.version_description})',
'filesaresets': False, 'files': {}},
'allfilessamesample': True, 'files': {}},
}
}
self.assertJSONEqual(resp.content.decode('utf-8'), json.dumps(checkjson))
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_diff_dsets_no_mzmlfile(self):
'resultfiles': [{'id': self.resultfnlf.sfile.pk, 'fn': self.resultfnlf.sfile.filename,
'ana': f'{self.wftype.name}_{self.analf.name}',
'date': datetime.strftime(self.ana.date, '%Y-%m-%d')}],
'datasets': {f'{self.oldds.pk}': {'filesaresets': True, 'fields': {},
'datasets': {f'{self.oldds.pk}': {'allfilessamesample': False, 'fields': {},
'picked_ftype': self.afs2.sfile.filetype.name,
'files': {f'{self.afs2.sfile_id}': {'id': self.afs2.sfile_id,
'fields': {'__sample': self.afs2.value}}}},
Expand Down Expand Up @@ -351,7 +351,7 @@ def test_new_ok(self):
'prefrac': False,
'hr': False,
'setname': '',
'fields': {'frregex': am.PsetComponent.objects.get(pset=self.pset,
'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset,
component=am.PsetComponent.ComponentChoices.PREFRAC).value},
'instruments': [self.prod.name],
'instrument_types': [self.prod.shortname],
Expand All @@ -360,13 +360,14 @@ def test_new_ok(self):
'is_isobaric': True},
'nrstoredfiles': [1, self.ft.name],
'channels': {self.qch.name: [self.projsam1.sample, self.qch.pk]},
'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': ''}}],
self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': ''}}],
'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}],
self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}],
},
'incomplete_files': [],
'picked_ftype': mztype,
'filesaresets': False,
'allfilessamesample': True,
}},
'field_order': self.inputdef.value[-1:],
'error': False,
'errmsg': [],
}
Expand All @@ -386,21 +387,22 @@ def test_with_saved_analysis(self):
'prefrac': False,
'hr': False,
'setname': self.ads1.setname.setname,
'fields': {'frregex': self.ads1.value},
'fields': {'fake': '', '__regex': self.ads1.value},
'instruments': [self.prod.name],
'instrument_types': [self.prod.shortname],
'qtype': {'name': self.ds.quantdataset.quanttype.name,
'short': self.ds.quantdataset.quanttype.shortname,
'is_isobaric': True},
'nrstoredfiles': [1, self.ft.name],
'channels': {self.qch.name: [self.projsam1.sample, self.qch.pk]},
'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': ''}}],
self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': ''}}],
'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}],
self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}],
},
'incomplete_files': [],
'picked_ftype': mztype,
'filesaresets': False,
'allfilessamesample': True,
}},
'field_order': self.inputdef.value[-1:],
'error': False,
'errmsg': [],
}
Expand All @@ -424,7 +426,7 @@ def test_new_ok(self):
'prefrac': False,
'hr': False,
'setname': '',
'fields': {'frregex': am.PsetComponent.objects.get(pset=self.pset,
'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset,
component=am.PsetComponent.ComponentChoices.PREFRAC).value},
'instruments': [self.prod.name],
'instrument_types': [self.prod.shortname],
Expand All @@ -433,12 +435,13 @@ def test_new_ok(self):
'is_isobaric': False},
'nrstoredfiles': [1, self.ft.name],
'channels': False,
'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.oldqsf.projsample.sample}, 'dsetsample': self.oldqsf.projsample.sample}],
'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.oldqsf.projsample.sample, 'fake': ''}, 'dsetsample': self.oldqsf.projsample.sample}],
},
'incomplete_files': [],
'picked_ftype': self.ft.name,
'filesaresets': False,
'allfilessamesample': True,
}},
'field_order': self.inputdef.value[-1:],
'error': False,
'errmsg': [],
}
Expand All @@ -459,7 +462,7 @@ def test_with_saved_analysis(self):
'prefrac': False,
'hr': False,
'setname': '',
'fields': {'frregex': am.PsetComponent.objects.get(pset=self.pset,
'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset,
component=am.PsetComponent.ComponentChoices.PREFRAC).value},
'instruments': [self.prod.name],
'instrument_types': [self.prod.shortname],
Expand All @@ -468,12 +471,13 @@ def test_with_saved_analysis(self):
'is_isobaric': False},
'nrstoredfiles': [1, self.ft.name],
'channels': False,
'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.afs2.value}, 'dsetsample': self.oldqsf.projsample.sample}],
'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.afs2.value, 'fake': ''}, 'dsetsample': self.oldqsf.projsample.sample}],
},
'incomplete_files': [],
'picked_ftype': self.ft.name,
'filesaresets': True,
'allfilessamesample': False,
}},
'field_order': self.inputdef.value[-1:],
'error': False,
'errmsg': [],
}
Expand Down Expand Up @@ -580,8 +584,9 @@ def test_new_analysis(self):
}},
},
'analysisname': 'Test new analysis',
'frregex': {f'{self.ds.pk}': 'fr_find'},
'fnsetnames': {},
# FIXME add some fields
'fnfields': {},
'dsetfields': {f'{self.ds.pk}': {'__regex': 'fr_find'}},
'params': params,
'singlefiles': {self.pfn2.pk: self.sflib.pk},
'multifiles': {self.pfn1.pk: [self.sfusr.pk]},
Expand All @@ -605,7 +610,7 @@ def test_new_analysis(self):
for adsif in am.AnalysisDSInputFile.objects.filter(analysisset__analysis=ana):
self.assertEqual(adsif.dsanalysis.dataset_id, self.ds.pk)
self.assertEqual(adsif.analysisset.setname, postdata['dssetnames'][self.ds.pk])
self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['frregex'][f'{self.ds.pk}'])
self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['dsetfields'][f'{self.ds.pk}']['__regex'])
PT = am.Param.PTypes
for ap in ana.analysisparam_set.all():
pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams',
Expand Down Expand Up @@ -641,8 +646,8 @@ def test_existing_analysis(self):
}},
},
'analysisname': 'Test existing analysis',
'frregex': {f'{self.ds.pk}': 'fr_find'},
'fnsetnames': {},
'fnfields': {},
'dsetfields': {f'{self.ds.pk}': {'__regex': 'fr_find'}},
'params': params,
'singlefiles': {self.pfn2.pk: self.sflib.pk},
'multifiles': {self.pfn1.pk: [self.sfusr.pk]},
Expand All @@ -665,7 +670,7 @@ def test_existing_analysis(self):
for adsif in am.AnalysisDSInputFile.objects.filter(analysisset__analysis=self.ana):
self.assertEqual(adsif.dsanalysis.dataset_id, self.ds.pk)
self.assertEqual(adsif.analysisset.setname, postdata['dssetnames'][self.ds.pk])
self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['frregex'][f'{self.ds.pk}'])
self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['dsetfields'][f'{self.ds.pk}']['__regex'])
PT = am.Param.PTypes
for ap in self.ana.analysisparam_set.all():
pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams',
Expand Down Expand Up @@ -701,8 +706,8 @@ def test_existing_analysis(self):
'ISOQUANT': {},
},
'analysisname': 'Test existing analysis LF',
'frregex': {f'{self.oldds.pk}': ''},
'fnsetnames': {self.oldsf.pk: 'testsample'},
'dsetfields': {f'{self.oldds.pk}': {'__regex': ''}},
'fnfields': {self.oldsf.pk: {'__sample': 'testsample'}},
'params': params,
'singlefiles': {self.pfn2.pk: self.sflib.pk},
'multifiles': {self.pfn1.pk: [self.sfusr.pk]},
Expand All @@ -723,7 +728,7 @@ def test_existing_analysis(self):
self.assertFalse(hasattr(self.analf, 'analysissampletable'))
self.assertEqual(am.AnalysisDSInputFile.objects.filter(analysisset__analysis=self.analf).count(), 0)
for afs in self.analf.analysisfilevalue_set.all():
self.assertEqual(postdata['fnsetnames'][afs.sfile_id], afs.value)
self.assertEqual(postdata['fnfields'][afs.sfile_id]['__sample'], afs.value)
PT = am.Param.PTypes
for ap in self.analf.analysisparam_set.all():
pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams',
Expand Down Expand Up @@ -780,8 +785,8 @@ def test_failing(self):
'ISOQUANT': {},
},
'analysisname': 'Test existing analysis LF',
'frregex': {f'{newds.pk}': ''},
'fnsetnames': {},
'fnfields': {},
'dsetfields': {f'{newds.pk}': {'__regex': ''}},
'params': params,
'singlefiles': {self.pfn2.pk: self.sflib.pk},
'multifiles': {self.pfn1.pk: [self.sfusr.pk]},
Expand Down
Loading

0 comments on commit e10483a

Please sign in to comment.