diff --git a/src/backend/analysis/jobs.py b/src/backend/analysis/jobs.py index 6564847f..86a5a485 100644 --- a/src/backend/analysis/jobs.py +++ b/src/backend/analysis/jobs.py @@ -137,7 +137,7 @@ def recurse_nrdsets_baseanalysis(aba): old_mzmls, old_dsets = recurse_nrdsets_baseanalysis(older_aba) # First get stripnames of old ds strips = {} - for oldads in aba.base_analysis.analysisdatasetsetname_set.select_related('dataset__prefractionationdataset__hiriefdataset'): + for oldads in aba.base_analysis.analysisdatasetsetvalue_set.select_related('dataset__prefractionationdataset__hiriefdataset'): if hasattr(oldads.dataset, 'prefractionationdataset'): pfd = oldads.dataset.prefractionationdataset if hasattr(pfd, 'hiriefdataset'): @@ -152,26 +152,28 @@ def recurse_nrdsets_baseanalysis(aba): # This would in 3. give us all oldmzmls from 1. and 2., so setB would be double single_ana_oldmzml = {} single_ana_oldds = {} + regexes = {x.dataset_id: x.value for x in models.AnalysisDatasetSetValue.objects.filter( + analysis=aba.base_analysis, field='__regex')} for asf in models.AnalysisDSInputFile.objects.filter( - analysis=aba.base_analysis).select_related( - 'sfile__rawfile__producer', 'analysisdset__setname'): - if asf.analysisdset.regex: - frnr = re.match(asf.analysisdset.regex, asf.sfile.filename) or False + analysisset__analysis=aba.base_analysis).select_related( + 'sfile__rawfile__producer', 'analysisset__setname'): + if asf.dsanalysis.dataset_id in regexes: + frnr = re.match(regexes[asf.dsanalysis.dataset_id], asf.sfile.filename) or False frnr = frnr.group(1) if frnr else 'NA' else: frnr = 'NA' oldasf = {'fn': asf.sfile.filename, 'instrument': asf.sfile.rawfile.producer.name, - 'setname': asf.analysisdset.setname.setname, - 'plate': strips[asf.analysisdset.dataset_id], + 'setname': asf.analysisset.setname, + 'plate': strips[asf.analysisset.dataset_id], 'fraction': frnr, } try: - single_ana_oldmzml[asf.analysisdset.setname.setname].append(oldasf) - single_ana_oldds[asf.analysisdset.setname.setname].add(asf.analysisdset.dataset_id) + single_ana_oldmzml[asf.analyisset.setname].append(oldasf) + single_ana_oldds[asf.analysisset.setname].add(asf.dsanalysis.dataset_id) except KeyError: - single_ana_oldmzml[asf.analysisdset.setname.setname] = [oldasf] - single_ana_oldds[asf.analysisdset.setname.setname] = {asf.analysisdset.dataset_id} + single_ana_oldmzml[asf.analysisset.setname] = [oldasf] + single_ana_oldds[asf.analysisset.setname] = {asf.dsanalysis.dataset_id} old_mzmls.update(single_ana_oldmzml) old_dsets.update(single_ana_oldds) return old_mzmls, old_dsets @@ -236,8 +238,8 @@ def process(self, **kwargs): # Now remove obsolete deleted-from-dataset files from job (e.g. corrupt, empty, etc) obsolete = sfiles_passed.exclude(rawfile__datasetrawfile__dataset__datasetanalysis__in=dsa) - analysis.analysisdsinputfile_set.filter(sfile__in=obsolete).delete() - analysis.analysisfilesample_set.filter(sfile__in=obsolete).delete() + models.AnalysisDSInputFile.objects.filter(analysisset__analysis=analysis, sfile__in=obsolete).delete() + analysis.analysisfilevalue_set.filter(sfile__in=obsolete).delete() rm.FileJob.objects.filter(job_id=job.pk, storedfile__in=obsolete).delete() for del_sf in obsolete: # FIXME setnames/frac is specific @@ -271,31 +273,24 @@ def process(self, **kwargs): for fn in sfiles_passed: infile = {'servershare': fn.servershare.name, 'path': fn.path, 'fn': fn.filename} if 'setname' in inputdef_fields: - infile['setname'] = kwargs['setnames'].get(str(fn.id), '') + infile['setname'] = kwargs['filesamples'].get(str(fn.id), '') if 'plate' in inputdef_fields: infile['plate'] = kwargs['platenames'].get(str(fn.rawfile.datasetrawfile.dataset_id), '') if 'sampleID' in inputdef_fields: # sampleID is for pgt / dbgenerator - infile['sampleID'] = fn.rawfile.datasetrawfile.quantsamplefile.projsample.sample + # No fallback, is required if in header + infile['sampleID'] = kwargs['filesamples'][str(fn.id)] if 'fraction' in inputdef_fields: infile['fraction'] = kwargs['infiles'].get(str(fn.id), {}).get('fr') if 'instrument' in inputdef_fields: + # No fallback, instrument in header cannot be '' infile['instrument'] = fn.rawfile.producer.msinstrument.instrumenttype.name if 'channel' in inputdef_fields: - # For non-pooled labelcheck + # For non-pooled labelcheck, cannot be '' infile['channel'] = fn.rawfile.datasetrawfile.quantfilechannel.channel.channel.name - if 'file_type' in inputdef_fields: - infile['file_type'] = fn.filetype.filetype - if 'pep_prefix' in inputdef_fields: - # FIXME needs to be able to change to none, mutalt (VCF), fusion_squid, etc - # We can probably use setname frontend code for that - infile['pep_prefix'] = 'none' - - - # FIXME add the pgt DB/other fields here - # expr_str expr_thresh sample_gtf_file pep_prefix + # Dynamic fields + infile.update(kwargs['filefields'][fn.pk]) infiles.append(infile) - # FIXME this in tasks and need to write header # FIXME bigrun not hardcode, probably need to remove when new infra shortname = models.UserWorkflow.WFTypeChoices(analysis.nextflowsearch.workflow.wftype).name bigrun = shortname == 'PISEP' or len(infiles) > 500 @@ -312,7 +307,7 @@ def process(self, **kwargs): run['infiles'] = infiles else: # SELECT prefrac with fraction regex to get fractionated datasets in old analysis - if ana_baserec.base_analysis.exclude(analysisdatasetsetname__regex='').count(): + if ana_baserec.base_analysis.filter(analysisdatasetsetvalue__field='__regex').count(): # rerun/complement runs with fractionated base analysis need --oldmzmldef parameter old_infiles, old_dsets = recurse_nrdsets_baseanalysis(ana_baserec) run['old_infiles'] = ['{}\t{}'.format(x['fn'], '\t'.join([x[key] for key in run['components']['INPUTDEF']])) diff --git a/src/backend/analysis/migrations/0048_auto_20240326_1509.py b/src/backend/analysis/migrations/0048_auto_20240326_1509.py new file mode 100644 index 00000000..f5cfeff6 --- /dev/null +++ b/src/backend/analysis/migrations/0048_auto_20240326_1509.py @@ -0,0 +1,49 @@ +# Generated by Django 3.2.13 on 2024-03-26 15:09 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('rawstatus', '0028_alter_storedfile_checked'), + ('datasets', '0019_deletes'), + ('analysis', '0047_alter_param_ptype'), + ] + + operations = [ + migrations.RenameModel( + old_name='AnalysisDatasetSetname', + new_name='AnalysisDatasetSetValue', + ), + migrations.RenameModel( + old_name='AnalysisFileSample', + new_name='AnalysisFileValue', + ), + migrations.RenameField( + model_name='AnalysisFileValue', + old_name='sample', + new_name='value', + ), + migrations.RemoveConstraint( + model_name='analysisdsinputfile', + name='uni_anainfile', + ), + migrations.RemoveConstraint( + model_name='analysisfilevalue', + name='uni_anassamplefile', + ), + migrations.AddField( + model_name='analysisdsinputfile', + name='analysisset', + field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, to='analysis.analysissetname'), + preserve_default=False, + ), + migrations.AddField( + model_name='analysisdsinputfile', + name='dsanalysis', + field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, to='analysis.datasetanalysis'), + preserve_default=False, + ), + ] diff --git a/src/backend/analysis/migrations/0049_auto_20240327_1144.py b/src/backend/analysis/migrations/0049_auto_20240327_1144.py new file mode 100644 index 00000000..44d40d0c --- /dev/null +++ b/src/backend/analysis/migrations/0049_auto_20240327_1144.py @@ -0,0 +1,50 @@ +# Generated by Django 3.2.13 on 2024-03-27 11:44 + +from django.db import migrations, models +import django.db.models.deletion +from django.db.models import OuterRef, Subquery + + +def remove_dups(apps, s): + DSA = apps.get_model('analysis', 'DatasetAnalysis') + for dsa in DSA.objects.all()[::-1]: + if DSA.objects.filter(dataset=dsa.dataset, analysis=dsa.analysis).count() > 1: + dsa.delete() + + +def populate_analysisset(apps, s): + ADSI = apps.get_model('analysis', 'AnalysisDSInputFile') + ADSI.objects.update(analysisset=Subquery(ADSI.objects.filter(pk=OuterRef('pk')).values('analysisdset__setname')[:1])) + + +def moveback_analysisset(apps, s): + pass + + +def populate_dsanalysis(apps, s): + ADSI = apps.get_model('analysis', 'AnalysisDSInputFile') + ADSV = apps.get_model('analysis', 'AnalysisDatasetSetValue') + DSA = apps.get_model('analysis', 'DatasetAnalysis') + ADSI.objects.update(dsanalysis=Subquery(DSA.objects.filter( + dataset=Subquery(ADSV.objects.filter(pk=OuterRef(OuterRef('analysisdset'))).values('dataset')[:1]), + analysis=OuterRef('analysis')).values('pk')[:1])) + + +def moveback_dsanalysis(apps, s): + pass + +def fake(apps, s): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0048_auto_20240326_1509'), + ] + + operations = [ + migrations.RunPython(remove_dups, fake), + migrations.RunPython(populate_analysisset, moveback_analysisset), + migrations.RunPython(populate_dsanalysis, moveback_dsanalysis), + ] diff --git a/src/backend/analysis/migrations/0050_delete_old_fields_20240327_1350.py b/src/backend/analysis/migrations/0050_delete_old_fields_20240327_1350.py new file mode 100644 index 00000000..1ec84935 --- /dev/null +++ b/src/backend/analysis/migrations/0050_delete_old_fields_20240327_1350.py @@ -0,0 +1,29 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0049_auto_20240327_1144'), + ] + + operations = [ + migrations.AddConstraint( + model_name='datasetanalysis', + constraint=models.UniqueConstraint(fields=('analysis', 'dataset'), name='uni_dsa_anadsets'), + ), + + migrations.RemoveField( + model_name='analysisdsinputfile', + name='analysis', + ), + migrations.RemoveField( + model_name='analysisdsinputfile', + name='analysisdset', + ), + migrations.AddConstraint( + model_name='analysisdsinputfile', + constraint=models.UniqueConstraint(fields=('analysisset', 'sfile'), name='uni_anaset_infile'), + ), + ] diff --git a/src/backend/analysis/migrations/0051_auto_20240327_2033.py b/src/backend/analysis/migrations/0051_auto_20240327_2033.py new file mode 100644 index 00000000..88f47107 --- /dev/null +++ b/src/backend/analysis/migrations/0051_auto_20240327_2033.py @@ -0,0 +1,62 @@ +# Generated by Django 3.2.13 on 2024-03-27 20:33 + +from django.db import migrations, models + + +def sample_or_regex_to_field(apps, s): + ADSV = apps.get_model('analysis', 'AnalysisDatasetSetValue') + ADSV.objects.filter(regex='').delete() + ADSV.objects.update(field='__regex', value=models.F('regex')) + AFS = apps.get_model('analysis', 'AnalysisFileValue') + AFS.objects.update(field='__sample') + + +def fake(a, s): + pass + + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0050_delete_old_fields_20240327_1350'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='analysisdatasetsetvalue', + name='uni_anadsets', + ), + migrations.AddField( + model_name='analysisdatasetsetvalue', + name='field', + field=models.TextField(default=''), + preserve_default=False, + ), + migrations.AddField( + model_name='analysisdatasetsetvalue', + name='value', + field=models.TextField(default=''), + preserve_default=False, + ), + migrations.AddField( + model_name='analysisfilevalue', + name='field', + field=models.TextField(default=''), + preserve_default=False, + ), + migrations.RunPython(sample_or_regex_to_field, fake), + + migrations.AddConstraint( + model_name='analysisdatasetsetvalue', + constraint=models.UniqueConstraint(fields=('analysis', 'dataset', 'field'), name='uni_anadsetsfields'), + ), + migrations.AddConstraint( + model_name='analysisfilevalue', + constraint=models.UniqueConstraint(fields=('analysis', 'sfile', 'field'), name='uni_anassamplefile'), + ), + migrations.RemoveField( + model_name='analysisdatasetsetvalue', + name='regex', + ), + ] diff --git a/src/backend/analysis/models.py b/src/backend/analysis/models.py index 3d129369..81c82657 100644 --- a/src/backend/analysis/models.py +++ b/src/backend/analysis/models.py @@ -251,8 +251,7 @@ class AnalysisSampletable(models.Model): class AnalysisSetname(models.Model): - '''All set or sample names in an analysis that are per dataset, - which means prefractionated proteomics data''' + '''All set or sample names in an analysis that are per dataset''' analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) setname = models.TextField() @@ -260,7 +259,17 @@ class Meta: constraints = [models.UniqueConstraint(fields=['analysis', 'setname'], name='uni_anasets')] -class AnalysisDatasetSetname(models.Model): +class DatasetAnalysis(models.Model): + analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) + dataset = models.ForeignKey(dsmodels.Dataset, on_delete=models.CASCADE) + # cannot put setname here because of searches without dset/setname + # model used in reporting, and also for finding datasets for base analysis etc + + class Meta: + constraints = [models.UniqueConstraint(fields=['analysis', 'dataset'], name='uni_dsa_anadsets')] + + +class AnalysisDatasetSetValue(models.Model): '''Dataset mapping to setnames (multiple dataset can have the same setname)''' # Note that datasets can be deleted, or have their file contents changed # That means this is not to be trusted for future bookkeeping of what was in the analysis @@ -268,41 +277,42 @@ class AnalysisDatasetSetname(models.Model): analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) dataset = models.ForeignKey(dsmodels.Dataset, on_delete=models.CASCADE) setname = models.ForeignKey(AnalysisSetname, on_delete=models.CASCADE, null=True) - regex = models.TextField() # optional + field = models.TextField() + value = models.TextField() class Meta: - constraints = [models.UniqueConstraint(fields=['analysis', 'dataset'], name='uni_anadsets')] + constraints = [models.UniqueConstraint(fields=['analysis', 'dataset', 'field'], name='uni_anadsetsfields')] # FIXME how should we do with pgt DBGEN input? Are those sets, or are they something else? # they def have sample names, and can be multiple per sample (BAMs merged, VCFs indel/snv etc) class AnalysisDSInputFile(models.Model): '''Input files for set-based analysis (isobaric and prefraction-datasets)''' - analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) + dsanalysis = models.ForeignKey(DatasetAnalysis, on_delete=models.CASCADE) sfile = models.ForeignKey(filemodels.StoredFile, on_delete=models.CASCADE) - analysisdset = models.ForeignKey(AnalysisDatasetSetname, on_delete=models.CASCADE) + analysisset = models.ForeignKey(AnalysisSetname, on_delete=models.CASCADE) class Meta: - constraints = [models.UniqueConstraint(fields=['analysis', 'sfile'], name='uni_anainfile')] + constraints = [models.UniqueConstraint(fields=['analysisset', 'sfile'], name='uni_anaset_infile')] -class AnalysisFileSample(models.Model): +class AnalysisFileValue(models.Model): '''If one sample per file is used in labelfree analyses, the samples are stored here''' + # this assumes at least one entry of this model per file/analysis + # (for non-set data), so samplename is a field. This is the only mapping of + # file/analysis we have currently for non-set data. If there's ever need + # of mapping files WITHOUT field/value for an analysis, we can break out + # to an extra model, alternatively null the fields + analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) - sample = models.TextField() + field = models.TextField() + value = models.TextField() sfile = models.ForeignKey(filemodels.StoredFile, on_delete=models.CASCADE) # FIXME this should maybe FK to infile above here? class Meta: - constraints = [models.UniqueConstraint(fields=['analysis', 'sfile'], name='uni_anassamplefile')] - - -class DatasetAnalysis(models.Model): - analysis = models.ForeignKey(Analysis, on_delete=models.CASCADE) - dataset = models.ForeignKey(dsmodels.Dataset, on_delete=models.CASCADE) - # cannot put setname here because of searches without dset/setname - # model used in reporting, and also for finding datasets for base analysis etc + constraints = [models.UniqueConstraint(fields=['analysis', 'sfile', 'field'], name='uni_anassamplefile')] class AnalysisIsoquant(models.Model): diff --git a/src/backend/analysis/tasks.py b/src/backend/analysis/tasks.py index c76b32a3..1e61d59d 100644 --- a/src/backend/analysis/tasks.py +++ b/src/backend/analysis/tasks.py @@ -161,7 +161,7 @@ def run_nextflow_workflow(self, run, params, stagefiles, profiles, nf_version): else: fndir = os.path.join(settings.SHAREMAP[fn['servershare']], fn['path']) fnpath = os.path.join(fndir, fn['fn']) - fn_metadata = '\t'.join(fn[x] for x in run['components']['INPUTDEF'][1:] if fn[x]) + fn_metadata = '\t'.join(fn[x] or '' for x in run['components']['INPUTDEF'][1:]) fp.write(f'\n{fnpath}\t{fn_metadata}') params.extend(['--input', os.path.join(rundir, 'inputdef.txt')]) diff --git a/src/backend/analysis/tests.py b/src/backend/analysis/tests.py index bdc8b7ed..d814978e 100644 --- a/src/backend/analysis/tests.py +++ b/src/backend/analysis/tests.py @@ -36,7 +36,7 @@ def setUp(self): path='', checked=True, filetype=self.ft2) c_ch = am.PsetComponent.ComponentChoices - am.PsetComponent.objects.create(pset=self.pset, component=c_ch.INPUTDEF, value=['plate', 2, 3]) + self.inputdef = am.PsetComponent.objects.create(pset=self.pset, component=c_ch.INPUTDEF, value=['file_path', 'plate', 'fake']) am.PsetComponent.objects.create(pset=self.pset, component=c_ch.ISOQUANT) am.PsetComponent.objects.create(pset=self.pset, component=c_ch.ISOQUANT_SAMPLETABLE) am.PsetComponent.objects.create(pset=self.pset, component=c_ch.PREFRAC, value='.*fr([0-9]+).*mzML$') @@ -55,7 +55,7 @@ def setUp(self): self.wf.nfwfversionparamsets.add(self.nfwf) # Create analysis for isoquant: self.ana = am.Analysis.objects.create(user=self.user, name='testana_iso', storage_dir='testdir_iso') - am.DatasetAnalysis.objects.create(analysis=self.ana, dataset=self.ds) + self.dsa = am.DatasetAnalysis.objects.create(analysis=self.ana, dataset=self.ds) self.anajob = jm.Job.objects.create(funcname='testjob', kwargs={}, state=jj.Jobstates.WAITING, timestamp=timezone.now()) self.nfs = am.NextflowSearch.objects.create(analysis=self.ana, nfwfversionparamset=self.nfwf, @@ -76,7 +76,7 @@ def setUp(self): # Create analysis for LF self.analf = am.Analysis.objects.create(user=self.user, name='testana_lf', storage_dir='testdirlf') - am.DatasetAnalysis.objects.create(analysis=self.analf, dataset=self.oldds) + self.dsalf = am.DatasetAnalysis.objects.create(analysis=self.analf, dataset=self.oldds) self.anajoblf = jm.Job.objects.create(funcname='testjob', kwargs={}, state=jj.Jobstates.WAITING, timestamp=timezone.now()) self.nfslf = am.NextflowSearch.objects.create(analysis=self.analf, nfwfversionparamset=self.nfwf, @@ -97,9 +97,10 @@ def setUp(self): sfile=self.anasfile2) self.anaset = am.AnalysisSetname.objects.create(analysis=self.ana, setname='set1') - self.ads1 = am.AnalysisDatasetSetname.objects.create(analysis=self.ana, - dataset=self.ds, setname=self.anaset, regex='hej') - self.adsif = am.AnalysisDSInputFile.objects.create(analysis=self.ana, sfile=self.f3sfmz, analysisdset=self.ads1) + self.ads1 = am.AnalysisDatasetSetValue.objects.create(analysis=self.ana, + dataset=self.ds, setname=self.anaset, field='__regex', value='hej') + self.adsif = am.AnalysisDSInputFile.objects.create(sfile=self.f3sfmz, analysisset=self.anaset, + dsanalysis=self.dsa) self.isoqvals = {'denoms': {self.qch.pk: True}, 'sweep': False, 'report_intensity': False} am.AnalysisIsoquant.objects.create(analysis=self.ana, setname=self.anaset, value=self.isoqvals) @@ -112,7 +113,8 @@ class AnalysisLabelfreeSamples(AnalysisTest): def setUp(self): super().setUp() - self.afs2, _ = am.AnalysisFileSample.objects.get_or_create(analysis=self.analf, sample='newname2', sfile=self.oldsf) + self.afs2, _ = am.AnalysisFileValue.objects.get_or_create(analysis=self.analf, + value='newname2', field='__sample', sfile=self.oldsf) class TestNewAnalysis(BaseTest): @@ -153,8 +155,8 @@ def test_diff_dsets(self): 'resultfiles': [{'id': self.resultfn.sfile.pk, 'fn': self.resultfnlf.sfile.filename, 'ana': f'{self.wftype.name}_{self.ana.name}', 'date': datetime.strftime(self.ana.date, '%Y-%m-%d')}], - 'datasets': {f'{self.ds.pk}': {'frregex': f'{self.ads1.regex}', - 'setname': f'{self.ads1.setname.setname}', 'filesaresets': False, + 'datasets': {f'{self.ds.pk}': {'fields': {'__regex': f'{self.ads1.value}'}, + 'setname': f'{self.ads1.setname.setname}', 'allfilessamesample': True, 'files': {}, 'picked_ftype': f'mzML (pwiz {self.f3sfmz.mzmlfile.pwiz.version_description})'}}, } self.assertJSONEqual(resp.content.decode('utf-8'), json.dumps(checkjson)) @@ -181,10 +183,10 @@ def test_same_dsets(self): 'samplegroups': {self.samples.samples[0][0]: self.samples.samples[0][3]}}}, }, 'resultfiles': [], - 'datasets': {f'{self.ds.pk}': {'frregex': f'{self.ads1.regex}', + 'datasets': {f'{self.ds.pk}': {'fields': {'__regex': f'{self.ads1.value}'}, 'setname': f'{self.ads1.setname.setname}', 'picked_ftype': f'mzML (pwiz {self.f3sfmz.mzmlfile.pwiz.version_description})', - 'filesaresets': False, 'files': {}}, + 'allfilessamesample': True, 'files': {}}, } } self.assertJSONEqual(resp.content.decode('utf-8'), json.dumps(checkjson)) @@ -220,10 +222,10 @@ def test_diff_dsets_no_mzmlfile(self): 'resultfiles': [{'id': self.resultfnlf.sfile.pk, 'fn': self.resultfnlf.sfile.filename, 'ana': f'{self.wftype.name}_{self.analf.name}', 'date': datetime.strftime(self.ana.date, '%Y-%m-%d')}], - 'datasets': {f'{self.oldds.pk}': {'filesaresets': True, + 'datasets': {f'{self.oldds.pk}': {'allfilessamesample': False, 'fields': {}, 'picked_ftype': self.afs2.sfile.filetype.name, 'files': {f'{self.afs2.sfile_id}': {'id': self.afs2.sfile_id, - 'setname': self.afs2.sample}}}, + 'fields': {'__sample': self.afs2.value}}}}, }, } self.assertJSONEqual(resp.content.decode('utf-8'), json.dumps(checkjson)) @@ -349,8 +351,8 @@ def test_new_ok(self): 'prefrac': False, 'hr': False, 'setname': '', - 'frregex': am.PsetComponent.objects.get(pset=self.pset, - component=am.PsetComponent.ComponentChoices.PREFRAC).value, + 'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset, + component=am.PsetComponent.ComponentChoices.PREFRAC).value}, 'instruments': [self.prod.name], 'instrument_types': [self.prod.shortname], 'qtype': {'name': self.ds.quantdataset.quanttype.name, @@ -358,13 +360,14 @@ def test_new_ok(self): 'is_isobaric': True}, 'nrstoredfiles': [1, self.ft.name], 'channels': {self.qch.name: [self.projsam1.sample, self.qch.pk]}, - 'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'setname': '', 'sample': ''}], - self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'setname': '', 'sample': ''}], + 'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}], + self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}], }, 'incomplete_files': [], 'picked_ftype': mztype, - 'filesaresets': False, + 'allfilessamesample': True, }}, + 'field_order': self.inputdef.value[-1:], 'error': False, 'errmsg': [], } @@ -384,7 +387,7 @@ def test_with_saved_analysis(self): 'prefrac': False, 'hr': False, 'setname': self.ads1.setname.setname, - 'frregex': self.ads1.regex, + 'fields': {'fake': '', '__regex': self.ads1.value}, 'instruments': [self.prod.name], 'instrument_types': [self.prod.shortname], 'qtype': {'name': self.ds.quantdataset.quanttype.name, @@ -392,13 +395,14 @@ def test_with_saved_analysis(self): 'is_isobaric': True}, 'nrstoredfiles': [1, self.ft.name], 'channels': {self.qch.name: [self.projsam1.sample, self.qch.pk]}, - 'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'setname': '', 'sample': ''}], - self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'setname': '', 'sample': ''}], + 'ft_files': {mztype: [{'ft_name': mztype, 'id': self.f3sfmz.pk, 'name': self.f3sfmz.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}], + self.ft.name: [{'ft_name': self.ft.name, 'id': self.f3sf.pk, 'name': self.f3sf.filename, 'fr': '', 'dsetsample': '', 'fields': {'__sample': '', 'fake': ''}}], }, 'incomplete_files': [], 'picked_ftype': mztype, - 'filesaresets': False, + 'allfilessamesample': True, }}, + 'field_order': self.inputdef.value[-1:], 'error': False, 'errmsg': [], } @@ -422,8 +426,8 @@ def test_new_ok(self): 'prefrac': False, 'hr': False, 'setname': '', - 'frregex': am.PsetComponent.objects.get(pset=self.pset, - component=am.PsetComponent.ComponentChoices.PREFRAC).value, + 'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset, + component=am.PsetComponent.ComponentChoices.PREFRAC).value}, 'instruments': [self.prod.name], 'instrument_types': [self.prod.shortname], 'qtype': {'name': self.oldds.quantdataset.quanttype.name, @@ -431,12 +435,13 @@ def test_new_ok(self): 'is_isobaric': False}, 'nrstoredfiles': [1, self.ft.name], 'channels': False, - 'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'setname': self.oldqsf.projsample.sample, 'sample': self.oldqsf.projsample.sample}], + 'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.oldqsf.projsample.sample, 'fake': ''}, 'dsetsample': self.oldqsf.projsample.sample}], }, 'incomplete_files': [], 'picked_ftype': self.ft.name, - 'filesaresets': False, + 'allfilessamesample': True, }}, + 'field_order': self.inputdef.value[-1:], 'error': False, 'errmsg': [], } @@ -457,8 +462,8 @@ def test_with_saved_analysis(self): 'prefrac': False, 'hr': False, 'setname': '', - 'frregex': am.PsetComponent.objects.get(pset=self.pset, - component=am.PsetComponent.ComponentChoices.PREFRAC).value, + 'fields': {'fake': '', '__regex': am.PsetComponent.objects.get(pset=self.pset, + component=am.PsetComponent.ComponentChoices.PREFRAC).value}, 'instruments': [self.prod.name], 'instrument_types': [self.prod.shortname], 'qtype': {'name': self.oldds.quantdataset.quanttype.name, @@ -466,12 +471,13 @@ def test_with_saved_analysis(self): 'is_isobaric': False}, 'nrstoredfiles': [1, self.ft.name], 'channels': False, - 'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'setname': self.afs2.sample, 'sample': self.oldqsf.projsample.sample}], + 'ft_files': {self.ft.name: [{'ft_name': self.ft.name, 'id': self.oldsf.pk, 'name': self.oldsf.filename, 'fr': '', 'fields': {'__sample': self.afs2.value, 'fake': ''}, 'dsetsample': self.oldqsf.projsample.sample}], }, 'incomplete_files': [], 'picked_ftype': self.ft.name, - 'filesaresets': True, + 'allfilessamesample': False, }}, + 'field_order': self.inputdef.value[-1:], 'error': False, 'errmsg': [], } @@ -578,8 +584,9 @@ def test_new_analysis(self): }}, }, 'analysisname': 'Test new analysis', - 'frregex': {f'{self.ds.pk}': 'fr_find'}, - 'fnsetnames': {}, + # FIXME add some fields + 'fnfields': {}, + 'dsetfields': {f'{self.ds.pk}': {'__regex': 'fr_find'}}, 'params': params, 'singlefiles': {self.pfn2.pk: self.sflib.pk}, 'multifiles': {self.pfn1.pk: [self.sfusr.pk]}, @@ -598,10 +605,12 @@ def test_new_analysis(self): self.assertEqual(resp.status_code, 200) ana = am.Analysis.objects.last() self.assertEqual(ana.analysissampletable.samples, {'hello': 'yes'}) - for adsif in ana.analysisdsinputfile_set.all(): - self.assertEqual(adsif.analysisdset.dataset_id, self.ds.pk) - self.assertEqual(adsif.analysisdset.setname.setname, postdata['dssetnames'][self.ds.pk]) - self.assertEqual(adsif.analysisdset.regex, postdata['frregex'][f'{self.ds.pk}']) + regexes = {x.dataset_id: x.value for x in am.AnalysisDatasetSetValue.objects.filter( + analysis=ana, field='__regex')} + for adsif in am.AnalysisDSInputFile.objects.filter(analysisset__analysis=ana): + self.assertEqual(adsif.dsanalysis.dataset_id, self.ds.pk) + self.assertEqual(adsif.analysisset.setname, postdata['dssetnames'][self.ds.pk]) + self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['dsetfields'][f'{self.ds.pk}']['__regex']) PT = am.Param.PTypes for ap in ana.analysisparam_set.all(): pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams', @@ -637,8 +646,8 @@ def test_existing_analysis(self): }}, }, 'analysisname': 'Test existing analysis', - 'frregex': {f'{self.ds.pk}': 'fr_find'}, - 'fnsetnames': {}, + 'fnfields': {}, + 'dsetfields': {f'{self.ds.pk}': {'__regex': 'fr_find'}}, 'params': params, 'singlefiles': {self.pfn2.pk: self.sflib.pk}, 'multifiles': {self.pfn1.pk: [self.sfusr.pk]}, @@ -656,10 +665,12 @@ def test_existing_analysis(self): self.assertEqual(resp.status_code, 200) self.ana.refresh_from_db() self.assertEqual(self.ana.analysissampletable.samples, {'hello': 'yes'}) - for adsif in self.ana.analysisdsinputfile_set.all(): - self.assertEqual(adsif.analysisdset.dataset_id, self.ds.pk) - self.assertEqual(adsif.analysisdset.setname.setname, postdata['dssetnames'][self.ds.pk]) - self.assertEqual(adsif.analysisdset.regex, postdata['frregex'][f'{self.ds.pk}']) + regexes = {x.dataset_id: x.value for x in am.AnalysisDatasetSetValue.objects.filter( + analysis=self.ana, field='__regex')} + for adsif in am.AnalysisDSInputFile.objects.filter(analysisset__analysis=self.ana): + self.assertEqual(adsif.dsanalysis.dataset_id, self.ds.pk) + self.assertEqual(adsif.analysisset.setname, postdata['dssetnames'][self.ds.pk]) + self.assertEqual(regexes[adsif.dsanalysis.dataset_id], postdata['dsetfields'][f'{self.ds.pk}']['__regex']) PT = am.Param.PTypes for ap in self.ana.analysisparam_set.all(): pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams', @@ -695,8 +706,8 @@ def test_existing_analysis(self): 'ISOQUANT': {}, }, 'analysisname': 'Test existing analysis LF', - 'frregex': {f'{self.oldds.pk}': ''}, - 'fnsetnames': {self.oldsf.pk: 'testsample'}, + 'dsetfields': {f'{self.oldds.pk}': {'__regex': ''}}, + 'fnfields': {self.oldsf.pk: {'__sample': 'testsample'}}, 'params': params, 'singlefiles': {self.pfn2.pk: self.sflib.pk}, 'multifiles': {self.pfn1.pk: [self.sfusr.pk]}, @@ -715,9 +726,9 @@ def test_existing_analysis(self): self.assertEqual(resp.status_code, 200) self.analf.refresh_from_db() self.assertFalse(hasattr(self.analf, 'analysissampletable')) - self.assertEqual(self.analf.analysisdsinputfile_set.count(), 0) - for afs in self.analf.analysisfilesample_set.all(): - self.assertEqual(postdata['fnsetnames'][afs.sfile_id], afs.sample) + self.assertEqual(am.AnalysisDSInputFile.objects.filter(analysisset__analysis=self.analf).count(), 0) + for afs in self.analf.analysisfilevalue_set.all(): + self.assertEqual(postdata['fnfields'][afs.sfile_id]['__sample'], afs.value) PT = am.Param.PTypes for ap in self.analf.analysisparam_set.all(): pt = {PT.MULTI: 'multicheck', PT.TEXT: 'inputparams', PT.NUMBER: 'inputparams', @@ -744,7 +755,7 @@ def test_existing_analysis(self): 'samplegroups': {self.samples.samples[0][0]: self.samples.samples[0][3]}, }})) self.assertJSONEqual(json.dumps(ba.shadow_dssetnames), json.dumps({ - self.ds.pk: {'setname': self.ads1.setname.setname, 'regex': self.ads1.regex}})) + self.ds.pk: {'setname': self.ads1.setname.setname, 'fields': {'__regex': self.ads1.value}}})) def test_failing(self): # no sample annotations @@ -774,8 +785,8 @@ def test_failing(self): 'ISOQUANT': {}, }, 'analysisname': 'Test existing analysis LF', - 'frregex': {f'{newds.pk}': ''}, - 'fnsetnames': {}, + 'fnfields': {}, + 'dsetfields': {f'{newds.pk}': {'__regex': ''}}, 'params': params, 'singlefiles': {self.pfn2.pk: self.sflib.pk}, 'multifiles': {self.pfn1.pk: [self.sfusr.pk]}, diff --git a/src/backend/analysis/views.py b/src/backend/analysis/views.py index f4ec1b2a..1895d18e 100644 --- a/src/backend/analysis/views.py +++ b/src/backend/analysis/views.py @@ -126,26 +126,29 @@ def load_base_analysis(request, wfversion_id, baseanid): analysis['fileparams'][afp.param_id] = afp.sfile_id # Get datasets from base analysis for their setnames/filesamples etc - # Only overlapping datasets are fetched here - dsets = {x: {} for x in new_ana_dsids} + # Only overlapping datasets are fetched here (empty dsets are popped at the end) + dsets = {x: defaultdict(dict) for x in new_ana_dsids} analysis_dsfiles = defaultdict(set) - for ads in ana.analysisdatasetsetname_set.filter(dataset_id__in=new_ana_dsids): - dsets[ads.dataset_id] = {'setname': ads.setname.setname, 'frregex': ads.regex, - 'files': {}} - analysis_dsfiles[ads.dataset_id] = {x.sfile_id for x in am.AnalysisDSInputFile.objects.filter(analysis_id=ads.analysis_id, analysisdset=ads)} + for ads in ana.analysisdatasetsetvalue_set.filter(dataset_id__in=new_ana_dsids): + dsets[ads.dataset_id]['fields'][ads.field] = ads.value + dsets[ads.dataset_id]['setname'] = ads.setname.setname + dsets[ads.dataset_id]['files'] = {} + analysis_dsfiles[ads.dataset_id] = {x.sfile_id for x in + am.AnalysisDSInputFile.objects.filter(analysisset=ads.setname)} for dsid in new_ana_dsids: - for fn in am.AnalysisFileSample.objects.filter(analysis=ana, + for fn in am.AnalysisFileValue.objects.filter(analysis=ana, sfile__rawfile__datasetrawfile__dataset_id=dsid): analysis_dsfiles[dsid].add(fn.sfile_id) # FIXME files should maybe be called filesamples -> less confusion try: - dsets[dsid]['files'][fn.sfile_id] = {'id': fn.sfile_id, 'setname': fn.sample} + dsets[dsid]['files'][fn.sfile_id]['fields'][fn.field] = fn.value except KeyError: - dsets[dsid]['files'] = {fn.sfile_id: {'id': fn.sfile_id, 'setname': fn.sample}} + dsets[dsid]['files'][fn.sfile_id] = {'id': fn.sfile_id, 'fields': {fn.field: fn.value}} + dsets[dsid]['fields'] = {} if 'files' in dsets[dsid]: - # Must check if dset is actually in the overlap before setting filesaresets, else it errors - dsets[dsid]['filesaresets'] = any((x['setname'] != '' for x in dsets[dsid]['files'].values())) + # Must check if dset is actually in the overlap before setting allfilessamesample, else it errors + dsets[dsid]['allfilessamesample'] = all(not x['fields']['__sample'] for x in dsets[dsid]['files'].values()) # Clean dsets to only contain dsets from base analysis [dsets.pop(x) for x in new_ana_dsids if not dsets[x]] @@ -388,13 +391,17 @@ def get_datasets(request, wfversion_id): allcomponents = {x.value: x for x in am.PsetComponent.ComponentChoices} wfcomponents = {allcomponents[x.component].name: x.value for x in am.PsetComponent.objects.filter(pset__nextflowwfversionparamset=wfversion_id)} + inputcomps = wfcomponents.get('INPUTDEF', []) + non_fields = {'setname', 'sampleID', 'instrument', 'channel', 'plate', 'fraction'} + fields = {x: '' for x in inputcomps[1:] if not x in non_fields} + field_order = [x for x in fields.keys()] # Get analysis filesamples for later use - has_filesamples, analysis_dsfiles = {}, set() + has_filesamples, analysis_dsfiles = defaultdict(dict), set() if anid: - if afss := am.AnalysisFileSample.objects.filter(analysis_id=anid): - has_filesamples = {x.sfile_id: x.sample for x in afss} - analysis_dsfiles = {x for x in has_filesamples} + for afv in am.AnalysisFileValue.objects.filter(analysis_id=anid): + has_filesamples[afv.sfile_id][afv.field] = afv.value + analysis_dsfiles = {x for x in has_filesamples} # FIXME accumulate errors across data sets and show all, but do not report other stuff if error for dset in dbdsets.select_related('runname__experiment__project', 'prefractionationdataset', @@ -411,15 +418,22 @@ def get_datasets(request, wfversion_id): hr = f'HiRIEF {str(pf.hiriefdataset.hirief)}' frregex = wfcomponents['PREFRAC'] - # Sample(set) names and previously used files + # Sample(set) names, adsv fields and previously used files setname = '' if anid: - if adsn := am.AnalysisDatasetSetname.objects.filter(analysis_id=anid, dataset=dset): - adsn = adsn.get(analysis_id=anid) - setname = adsn.setname.setname - analysis_dsfiles.update({x.sfile_id for x in am.AnalysisDSInputFile.objects.filter(analysis_id=anid, analysisdset=adsn)}) - # PREFRAC component: - frregex = adsn.regex + if adsis := am.AnalysisDSInputFile.objects.filter(dsanalysis__analysis_id=anid, + dsanalysis__dataset=dset): + anasetname = adsis.select_related('analysisset').first().analysisset + setname = anasetname.setname + analysis_dsfiles.update({x.sfile_id for x in adsis}) + # PREFRAC component: + if adsvs := am.AnalysisDatasetSetValue.objects.filter(analysis_id=anid, + dataset=dset, field='__regex'): + frregex = adsvs.get().value + # Get other existing fields if any + fields.update({x.field: x.value for x in + am.AnalysisDatasetSetValue.objects.filter(analysis_id=anid, + dataset=dset).exclude(field__startswith='__')}) # Get dataset files dssfiles = rm.StoredFile.objects.select_related('rawfile__producer', 'servershare', @@ -483,7 +497,8 @@ def get_datasets(request, wfversion_id): incomplete_files.append(ft) resp_files = {x.id: {'ft_name': ft_name, 'id': x.id, 'name': x.filename, 'fr': '', - 'setname': '', 'sample': ''} for ft_name, dsf in usefiles.items() for x in dsf} + 'dsetsample': '', 'fields': {'__sample': '', **fields}} + for ft_name, dsf in usefiles.items() for x in dsf} # Fill channels with quant data channels = {} @@ -499,26 +514,25 @@ def get_datasets(request, wfversion_id): for ft_name, dsfiles in usefiles.items(): for fn in dsfiles.filter(rawfile__datasetrawfile__quantsamplefile__isnull=False).select_related( 'rawfile__datasetrawfile__quantsamplefile__projsample'): - resp_files[fn.id]['sample'] = fn.rawfile.datasetrawfile.quantsamplefile.projsample.sample + resp_files[fn.id]['dsetsample'] = fn.rawfile.datasetrawfile.quantsamplefile.projsample.sample if fn.id in has_filesamples: - resp_files[fn.id]['setname'] = has_filesamples[fn.id] + resp_files[fn.id]['fields'].update(has_filesamples[fn.id]) # Files with samples (non-MS, IP, non-isobaric, etc) - # FIXME rename this key to samplename instead of setname! - filesaresets = False if anid and is_msdata: - filesaresets = any((x['setname'] != '' for x in resp_files.values())) + allfilessamesample = all((x['fields']['__sample'] == '' for x in resp_files.values())) elif not is_msdata: # sequencing data etcetera, always have sample-per-file since we dont # expect multiplexing or fractionation here # Add possible already stored analysis file samplenames - filesaresets = True + allfilessamesample= False else: # New analysis, set names for files can be there quantsamplefile values # initially - [x.update({'setname': x['sample']}) for x in resp_files.values()] + allfilessamesample = True + [x['fields'].update({'__sample': x['dsetsample']}) for x in resp_files.values()] # Finalize response grouped_resp_files = defaultdict(list) @@ -540,7 +554,7 @@ def get_datasets(request, wfversion_id): 'prefrac': prefrac, 'hr': hr, 'setname': setname, - 'frregex': frregex, + 'fields': {'__regex': frregex, **fields}, 'instruments': [x.rawfile.producer.name for x in producers], 'instrument_types': [x.rawfile.producer.shortname for x in producers], 'qtype': qtype, @@ -549,12 +563,12 @@ def get_datasets(request, wfversion_id): 'ft_files': grouped_resp_files, 'incomplete_files': incomplete_files, 'picked_ftype': picked_ft, - 'filesaresets': filesaresets, + 'allfilessamesample': allfilessamesample, } if len(response['errmsg']): return JsonResponse({**response, 'error': True}, status=400) else: - response['dsets'] = dsetinfo + response.update({'dsets': dsetinfo, 'field_order': field_order}) return JsonResponse(response) @@ -666,8 +680,8 @@ def store_analysis(request): req['dssetnames'] req['picked_ftypes'] req['analysisname'] - req['frregex'] - req['fnsetnames'] + req['dsetfields'] + req['fnfields'] req['params'] req['singlefiles'] req['multifiles'] @@ -803,13 +817,16 @@ def parse_isoquant(quants): dss = am.DatasetAnalysis.objects.filter(analysis=analysis) excess_dss = {x.dataset_id for x in dss}.difference(req['dsids']) dss.filter(dataset_id__in=excess_dss).delete() - am.DatasetAnalysis.objects.bulk_create([am.DatasetAnalysis(dataset_id=dsid, analysis=analysis) + newdss = am.DatasetAnalysis.objects.bulk_create([am.DatasetAnalysis(dataset_id=dsid, analysis=analysis) for dsid in set(req['dsids']).difference({x.dataset_id for x in dss})]) wfshortname = am.UserWorkflow.WFTypeChoices(analysis.nextflowsearch.workflow.wftype).name + dss_map = {x.dataset_id: x.pk for x in [*dss, *newdss]} else: analysis = am.Analysis.objects.create(name=req['analysisname'], user_id=request.user.id) wfshortname = am.UserWorkflow.WFTypeChoices(am.UserWorkflow.objects.get(pk=req['wfid']).wftype).name - am.DatasetAnalysis.objects.bulk_create([am.DatasetAnalysis(dataset_id=dsid, analysis=analysis) for dsid in req['dsids']]) + dss = am.DatasetAnalysis.objects.bulk_create([am.DatasetAnalysis(dataset_id=dsid, + analysis=analysis) for dsid in req['dsids']]) + dss_map = {x.dataset_id: x.pk for x in dss} ana_storpathname = (f'{analysis.pk}_{wfshortname}_{analysis.name}_' f'{datetime.strftime(analysis.date, "%Y%m%d_%H.%M")}') analysis.storage_dir = f'{analysis.user.username}/{ana_storpathname}' @@ -817,7 +834,7 @@ def parse_isoquant(quants): in_components = {k: v for k, v in req['components'].items() if v} jobinputs = {'components': wf_components, 'singlefiles': {}, 'multifiles': {}, 'params': {}} - data_args = {'setnames': {}, 'platenames': {}} + data_args = {'filesamples': {}, 'platenames': {}, 'filefields': defaultdict(dict)} data_args['infiles'] = req['infiles'] # Input file definition @@ -827,7 +844,6 @@ def parse_isoquant(quants): jobinputs['components']['INPUTDEF'] = False # Store setnames - # FIXME component? setname_ids = {} am.AnalysisSetname.objects.filter(analysis=analysis).exclude(setname__in=req['dssetnames'].values()).delete() for setname in set(req['dssetnames'].values()): @@ -835,19 +851,18 @@ def parse_isoquant(quants): setname_ids[setname] = anaset.pk # setnames for datasets, optionally fractions and strips new_ads = {} - am.AnalysisDSInputFile.objects.filter(analysis=analysis).exclude(sfile_id__in=req['infiles']).delete() + am.AnalysisDSInputFile.objects.filter(analysisset__analysis=analysis).exclude(sfile_id__in=req['infiles']).delete() for dsid, setname in req['dssetnames'].items(): if 'PREFRAC' in wf_components: - regex = req['frregex'][dsid] - else: - regex = '' - ads, created = am.AnalysisDatasetSetname.objects.update_or_create( - defaults={'setname_id': setname_ids[setname], 'regex': regex}, - analysis=analysis, dataset_id=dsid) + regex = req['dsetfields'][dsid]['__regex'] + ads, created = am.AnalysisDatasetSetValue.objects.update_or_create( + defaults={'setname_id': setname_ids[setname], 'value': regex}, + analysis=analysis, field='__regex', dataset_id=dsid) new_ads[ads.pk] = created for sf in dsfiles[dsid]: - am.AnalysisDSInputFile.objects.update_or_create(sfile=sf, analysis=analysis, analysisdset=ads) - data_args['setnames'][sf.pk] = setname + am.AnalysisDSInputFile.objects.get_or_create(sfile=sf, analysisset_id=setname_ids[setname], + dsanalysis_id=dss_map[dsid]) + data_args['filesamples'][sf.pk] = setname dset = dsets[dsid] if 'PREFRAC' in wf_components and hasattr(dset, 'prefractionationdataset'): # get platenames @@ -857,14 +872,15 @@ def parse_isoquant(quants): data_args['platenames'][dsid] = strip else: data_args['platenames'][dsid] = pfd.prefractionation.name - am.AnalysisDatasetSetname.objects.filter(analysis=analysis).exclude(pk__in=new_ads).delete() + am.AnalysisDatasetSetValue.objects.filter(analysis=analysis).exclude(pk__in=new_ads).delete() # store samples if non-prefrac labelfree files are sets - am.AnalysisFileSample.objects.filter(analysis=analysis).exclude(sfile_id__in=req['fnsetnames']).delete() - for sfid, sample in req['fnsetnames'].items(): - am.AnalysisFileSample.objects.update_or_create(defaults={'sample': sample}, - analysis=analysis, sfile_id=sfid) - data_args['setnames'].update({sfid: sample for sfid, sample in req['fnsetnames'].items()}) + am.AnalysisFileValue.objects.filter(analysis=analysis).exclude(sfile_id__in=req['fnfields']).delete() + for sfid, sample in req['fnfields'].items(): + for fieldname, value in sample.items(): + am.AnalysisFileValue.objects.update_or_create(defaults={'value': value}, + field=fieldname, analysis=analysis, sfile_id=sfid) + data_args['filesamples'].update({sfid: sample for sfid, sample in req['fnfields'].items()}) # Store params passedparams_exdelete = {**req['params']['flags'], **req['params']['inputparams'], **req['params']['multicheck']} @@ -910,8 +926,12 @@ def parse_isoquant(quants): sampletables = base_ana.analysissampletable.samples else: sampletables = {} - shadow_dss = {x.dataset_id: {'setname': x.setname.setname, 'regex': x.regex} - for x in base_ana.analysisdatasetsetname_set.all()} + shadow_dss = {} + for x in base_ana.analysisdatasetsetvalue_set.all(): + try: + shadow_dss[x.dataset_id]['fields'][x.field] = x.value + except KeyError: + shadow_dss[x.dataset_id] = {'setname': x.setname.setname, 'fields': {x.field: x.value}} shadow_isoquants = get_isoquants(base_ana, sampletables) # Add the base analysis' own base analysis shadow isquants/dss is any try: @@ -1003,7 +1023,7 @@ def get_isoquants(analysis, sampletables): """For analysis passed, return its analysisisoquants from DB in nice format for frontend""" isoquants = {} for aiq in am.AnalysisIsoquant.objects.select_related('setname').filter(analysis=analysis): - set_dsets = aiq.setname.analysisdatasetsetname_set.all() + set_dsets = am.DatasetAnalysis.objects.filter(analysisdsinputfile__analysisset=aiq.setname) qtypename = set_dsets.values('dataset__quantdataset__quanttype__shortname').distinct().get()['dataset__quantdataset__quanttype__shortname'] qcsamples = {qcs.channel.channel_id: qcs.projsample.sample for qcs in dm.QuantChannelSample.objects.filter(dataset_id__in=set_dsets.values('dataset'))} channels = {qtc.channel.name: qtc.channel_id for anasds in set_dsets.distinct('dataset__quantdataset__quanttype') for qtc in anasds.dataset.quantdataset.quanttype.quanttypechannel_set.all()} diff --git a/src/backend/home/views.py b/src/backend/home/views.py index 9a21c9c6..ad454690 100644 --- a/src/backend/home/views.py +++ b/src/backend/home/views.py @@ -556,8 +556,8 @@ def get_analysis_invocation(ana): iqparams = [] for aiq in anmodels.AnalysisIsoquant.objects.select_related('setname').filter(analysis=ana): - set_dsets = aiq.setname.analysisdatasetsetname_set.all() - qtypename = set_dsets.values('dataset__quantdataset__quanttype__shortname').distinct().get()['dataset__quantdataset__quanttype__shortname'] + set_dsas = aiq.setname.analysisdsinputfile_set.distinct('dsanalysis').values('dsanalysis') + qtypename = set_dsas.values('dsanalysis__dataset__quantdataset__quanttype__shortname').distinct().get()['dsanalysis__dataset__quantdataset__quanttype__shortname'] if aiq.value['sweep']: calc_psm = 'sweep' elif aiq.value['report_intensity']: @@ -601,6 +601,8 @@ def get_analysis_info(request, nfs_id): # This means we have to check for taskerror__isnull here if task.taskerror.message: errors.append(task.taskerror.message) + dsicount = anmodels.AnalysisDSInputFile.objects.filter(analysisset__analysis=ana).count() + afscount = ana.analysisfilevalue_set.count() resp = {'name': aj.get_ana_fullname(ana), 'wf': {'fn': nfs.nfwfversionparamset.filename, 'name': nfs.nfwfversionparamset.nfworkflow.description, @@ -608,7 +610,7 @@ def get_analysis_info(request, nfs_id): 'repo': nfs.nfwfversionparamset.nfworkflow.repo}, ## 'proj': [{'name': x.name, 'id': x.id} for x in projs], 'nrdsets': len(dsets), - 'nrfiles': ana.analysisdsinputfile_set.count(), + 'nrfiles': dsicount + afscount, 'storage_locs': [{'server': x.servershare.server.uri, 'share': x.servershare.name, 'path': x.path} for x in storeloc], 'log': logentry, diff --git a/src/frontend/analysis/src/App.svelte b/src/frontend/analysis/src/App.svelte index cb044910..4f9aa3b2 100644 --- a/src/frontend/analysis/src/App.svelte +++ b/src/frontend/analysis/src/App.svelte @@ -26,6 +26,7 @@ let prev_resultfiles = []; let resfn_arr = []; let resultfiles = {} let resultfnorder = []; +let field_order = []; let base_analysis = { isComplement: false, @@ -88,11 +89,11 @@ function validate() { notif.errors['No datasets are in this analysis, maybe they need some editing'] = 1; } Object.values(dsets).forEach(ds => { - if (!('LABELCHECK_ISO' in wf.components) && !ds.filesaresets && !ds.setname) { + if (!('LABELCHECK_ISO' in wf.components) && ds.allfilessamesample && !ds.setname) { notif.errors[`Dataset ${ds.proj} - ${ds.exp} - ${ds.run} needs to have a set name`] = 1; - } else if (ds.filesaresets) { - if (ds.ft_files[ds.picked_ftype].some(fn => !fn.setname)) { - notif.errors[`File ${fn.name} needs to have a setname`] = 1; + } else if (!ds.allfilessamesample) { + if (ds.ft_files[ds.picked_ftype].some(fn => !fn.fields.__sample)) { + notif.errors[`File ${fn.name} needs to have a sample name`] = 1; } } else if (ds.setname && !charRe.test(ds.setname)) { notif.errors[`Dataset ${ds.proj} - ${ds.exp} - ${ds.run} needs to have another set name: only a-z 0-9 _ are allowed`] = 1; @@ -135,19 +136,20 @@ async function storeAnalysis() { base_analysis: base_analysis, dsids: Object.keys(dsets), dssetnames: Object.fromEntries(Object.entries(dsets) - .filter(([x,ds]) => !ds.filesaresets) + .filter(([x,ds]) => ds.allfilessamesample) .map(([dsid, ds]) => [dsid, ds.setname])), infiles: Object.fromEntries(Object.values(dsets) .flatMap(ds => ds.ft_files[ds.picked_ftype] .map(fn => [fn.id, {fr: fn.fr}]))), - fnsetnames: Object.fromEntries(Object.entries(dsets) - .filter(([x,ds]) => ds.filesaresets) + fnfields: Object.fromEntries(Object.entries(dsets) + .filter(([x,ds]) => !ds.allfilessamesample) .map(([dsid, ds]) => ds.ft_files[ds.picked_ftype] - .map(fn => [fn.id, fn.setname])) + .map(fn => [fn.id, fn.fields])) .flat()), picked_ftypes: Object.fromEntries(Object.entries(dsets) .map(([dsid, ds]) => [dsid, ds.picked_ftype])), - frregex: Object.fromEntries(Object.entries(dsets).map(([dsid, ds]) => [dsid, ds.frregex])), + dsetfields: Object.fromEntries(Object.entries(dsets) + .map(([dsid, ds]) => [dsid, ds.fields])), singlefiles: fns, multifiles: multifns, components: { @@ -292,6 +294,7 @@ async function fetchDatasetDetails(fetchdsids) { dsets[x].changed = false; }) Object.entries(dsets).filter(x=>x[1].prefrac).forEach(x=>matchFractions(dsets[x[0]])); + field_order = result.field_order; } } @@ -363,13 +366,15 @@ async function loadBaseAnalysis() { const resds = result.datasets[dsid]; dsets[dsid].setname = resds.setname; overlapping_setnames.add(dsets[dsid].setname); - dsets[dsid].frregex = resds.frregex; - dsets[dsid].filesaresets = resds.filesaresets; + Object.keys(resds.fields).forEach(f => { + dsets[dsid].fields[f] = resds.fields[f]; + }); + dsets[dsid].allfilessamesample = resds.allfilessamesample; dsets[dsid].picked_ftype = resds.picked_ftype; dsets[dsid].ft_files[resds.picked_ftype] .filter(x => x.id in resds.files) .forEach(x => { - x.setname = resds.files[x.id].setname; + x.fields = resds.files[x.id].fields; }); if (dsets[dsid].prefrac) { matchFractions(dsets[dsid]); @@ -426,7 +431,7 @@ function getIntextFileName(fnid, files) { function matchFractions(ds) { let allfrs = new Set(); for (let fn of ds.ft_files[ds.picked_ftype]) { - const match = fn.name.match(RegExp(ds.frregex)); + const match = fn.name.match(RegExp(ds.fields.__regex)); if (match) { fn.fr = match[1]; allfrs.add(match[1]); @@ -648,10 +653,10 @@ onMount(async() => {
{#if !ds.prefrac && !ds.qtype.is_isobaric} - - + + {/if} - {#if !ds.filesaresets} + {#if ds.allfilessamesample}
updateIsoquant(ds.id)}>
@@ -659,9 +664,20 @@ onMount(async() => { {#if wf && ds.prefrac && 'PREFRAC' in wf.components}
- matchFractions(ds)} bind:value={ds.frregex}> + matchFractions(ds)} bind:value={ds.fields.__regex}>
{matchedFr[ds.id]} fractions matched + {/if} + + {#if ds.allfilessamesample && field_order.filter(x => !x.startsWith('__')).length} +
+
Workflow specific fields:
+ {#each field_order.filter(x => !x.startsWith('__')) as field} +
+ + +
+ {/each} {/if}
@@ -687,15 +703,33 @@ onMount(async() => {
- {#if ds.filesaresets} - {#each ds.ft_files[ds.picked_ftype] as fn} -
-
{fn.name}
-
- -
-
- {/each} + {#if !ds.allfilessamesample} + + + + + {#each field_order.filter(x => !x.startsWith('__')) as field} + + {/each} + + + {#each ds.ft_files[ds.picked_ftype] as fn} + + + + {#each field_order.filter(x => !x.startsWith('__')) as field} + + {/each} + + {/each} + +
File nameSample{field}
{fn.name} + + +
+ +
+
{/if} {/if}