-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
126 lines (89 loc) · 3.28 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#! /usr/bin/env nextflow
nextflow.enable.dsl=2
def summary = [:]
if (workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Launch dir'] = workflow.launchDir
summary['Working dir'] = workflow.workDir
summary['Script dir'] = workflow.projectDir
summary['User'] = workflow.userName
summary['Output dir'] = params.outdir
log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
log.info "-\033[2m--------------------------------------------------\033[0m-"
/* --------------------
| Help Message |
--------------------- */
def helpMessage() {
log.info """
Usage:
The typical command for running the pipeline is as follows:
Mandatory:
--inputlist Input file
Resource Options:
--max_cpus Maximum number of CPUs (int)
(default: $params.max_cpus)
--max_memory Maximum memory (memory unit)
(default: $params.max_memory)
--max_time Maximum time (time unit)
(default: $params.max_time)
""".stripIndent()
}
project_dir = workflow.projectDir
run_date = new java.text.SimpleDateFormat("yyyy_MM_dd").format(new Date())
params.inputlist = 'input1.csv'
ch_input = Channel
.fromPath(params.inputlist)
.ifEmpty {
error "Cannot find input file: ${params.inputlist}"
}
.splitCsv(skip: 1) // Skip the header row
.map { row ->
tuple(row[0], file(row[1]), file(row[2])) // Create a tuple with three elements
}
// Add extract VAF code process - 1. read in VCF file list, 2. process each VCF through rscript, 3. output each sample extract_coding_mut_001_no_qual_rep_asmd_clpm_for_drivers.csv, 4. create inputlist.csv using files in output with columns sample,mutations,regions, 5. output file to next process
// Run the script to make MTR input on above file paths
// This step is most comp intensive requires better parralelisation here
process extract_coding_mutations {
label 'cloudos_cli'
tag { sample }
publishDir "${params.outdir}/extract/", mode: 'copy'
input:
tuple val(sample), val(mutations), val(regions)
output:
tuple val(sample), file('*_noncoding_mutations.csv')
script:
"""
coding_mutations_nf.py -sample '${sample}' -mutations '${mutations}' -regions '${regions}'
"""
}
process prepare_vep {
label 'cloudos_cli'
tag { sample }
publishDir "${params.outdir}/vep_prep/", mode: 'copy'
input:
tuple val(sample), file(noncoding_mutations)
output:
file '*input.txt'
script:
"""
make_vep_input.py -output_dir "/mnt/session_data/results/extract/"
"""
}
// change above before running on cloudOS: make_vep_input.py -output_dir "${params.outdir}/extract/"
process VEP_run {
label 'vep'
tag { sample }
publishDir "${params.outdir}/vep_results/", mode: 'copy'
input:
file(vep_input)
output:
file '*vep_output.txt'
script:
"""
vep -i ${vep_input} -o scanb_brca_vep_output.txt --dir_cache /mnt/session_data/vep_cache_GRCh38_v105 --offline -custom '${projectDir}/data/clinvar.vcf.gz, ClinVar,vcf,exact,CLNDN, CLNDNINCL, CLNDISDB,CLNDISDBINCL, CLNHGVS,CLNREVSTAT,CLNSIG,CLINSIGCONF,CLINSIGCONF,CLINSIGINCL,CLNVC,CLNVCSO,CLNVI' --cache
"""
}
// add above before running on cloudOS: vep -i '${vep_input}' --dir_cache '${cache_input}' pointing to S3 path
// test is with specified path change -i to '${vep_input}'
workflow {
extract_coding_mutations(ch_input) | prepare_vep | VEP_run
}