forked from jasteen/nextflow-workflows
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflairPipe.nf
178 lines (139 loc) · 4.63 KB
/
flairPipe.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env nextflow
// Required Inputs
project_name = "PALB2_isoform_analysis"
refFolder = file("/projects/vh83/reference/genomes/b37/bwa_0.7.12_index/")
inputDirectory = file('./fastqs')
tmp_dir = file('/scratch/vh83/tmp/')
chromsizes = file('/scratch/vh83/projects/small_projects/palb2_nanopore/chrom_sizes.txt')
humanGTF = file('/scratch/vh83/projects/small_projects/palb2_nanopore/Homo_sapiens.GRCh37.87.chr.gtf')
// Getting Reference Files
refBase = "$refFolder/human_g1k_v37_decoy"
ref = file("${refBase}.fasta")
refDict = file("${refBase}.dict")
refFai = file("${refBase}.fasta.fai")
// Tools
condaModule = 'miniconda3/4.1.11-python3.5'
samtoolsModule = 'samtools/1.9-gcc5'
// Global Resource Configuration Options
globalExecutor = 'slurm'
globalStageInMode = 'symlink'
globalCores = 1
bwaCores = 12
globalMemoryS = '6 GB'
globalMemoryM = '32 GB'
globalMemoryL = '64 GB'
globalTimeS = '8m'
globalTimeM = '1h'
globalTimeL = '24h'
globalQueueS = 'short'
globalQueueL = 'comp'
ch_fastqs = Channel.fromPath("${inputDirectory}/*.fastq").collect()
process catFastqs {
publishDir path: './output', mode: 'copy'
input:
file '*.fastq' from ch_fastqs
output:
file "${project_name}.all.fastq" into ch_catFastq, ch_catFastq2
publishDir path: './output', mode: 'copy'
cache 'lenient'
executor globalExecutor
stageInMode globalStageInMode
cpus 1
memory globalMemoryM
time '1h'
queue globalQueueL
script:
"""
cat *.fastq > "${project_name}.all.fastq"
"""
}
process flairAlign {
publishDir path: './output', mode: 'copy'
input:
file(fastq) from ch_catFastq
output:
set file("${project_name}.flair_aligned.sam"), file("${project_name}.flair_aligned.bed") into ch_align, ch_align2
publishDir path: './output', mode: 'copy'
cache 'lenient'
executor globalExecutor
stageInMode globalStageInMode
cpus 8
module condaModule
conda '/home/jste0021/.conda/envs/py3.5/'
module samtoolsModule
memory globalMemoryM
time '6h'
queue globalQueueL
script:
"""
python ~/scripts/git_controlled/flair/flair.py align -v1.3 -g ${ref} -r ${fastq} -t ${task.cpus} \
-o "${project_name}.flair_aligned"
"""
}
process makeBam{
publishDir path: './output', mode: 'copy'
input:
set file(sam), file(bed) from ch_align2
output:
set file("*.bam"), file("*.bai") into ch_bams
publishDir path: './output', mode: 'copy'
cache 'lenient'
executor globalExecutor
stageInMode globalStageInMode
cpus 1
module samtoolsModule
memory globalMemoryM
time '6h'
queue globalQueueL
errorStrategy 'ignore'
script:
"""
samtools view -bS ${sam} | sort -o "${sam}.bam"
samtools index "${sam}.bam"
"""
}
process flairCorrect {
publishDir path: './output', mode: 'copy'
input:
set file(sam), file(bed) from ch_align
output:
set file("*_all_corrected.bed"), file("*_all_inconsistent.bed"), file("*.psl") into ch_correct
publishDir path: './output', mode: 'copy'
cache 'lenient'
executor globalExecutor
stageInMode globalStageInMode
cpus 8
module condaModule
conda '/home/jste0021/.conda/envs/py3.5/'
module samtoolsModule
memory globalMemoryM
time '6h'
queue globalQueueL
script:
"""
python ~/scripts/git_controlled/flair/flair.py correct -g ${ref} -q ${bed} -f $humanGTF -c $chromsizes -t ${task.cpus}
"""
}
process flairCollapse {
publishDir path: './output', mode: 'copy'
input:
set file(corrected), file(inconsistent), file(psl) from ch_correct
file(fastq) from ch_catFastq2
output:
set file("*isoforms.psl"), file("*isoforms.gtf"), file("*isoforms.fa") into ch_collapse
publishDir path: './output', mode: 'copy'
cache 'lenient'
executor globalExecutor
stageInMode globalStageInMode
cpus 8
module condaModule
conda '/home/jste0021/.conda/envs/py3.5/'
module samtoolsModule
memory globalMemoryM
time '6h'
queue globalQueueL
script:
"""
python ~/scripts/git_controlled/flair/flair.py collapse --keep_intermediate -g ${ref} -r ${fastq} -q ${psl} -t ${task.cpus}
"""
}