-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
53 lines (45 loc) · 1.34 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
rule all:
input:
[expand("grch38-{build}/genes.gtf.gz", build=range(77, 100)),
expand("grch38-{build}/genes.csv", build=range(77, 100)),
expand("grch37-{build}/genes.gtf.gz", build=range(77, 100)),
expand("grch37-{build}/genes.csv", build=range(77, 100)),
]
rule convert_gtf:
input:
"{assembly}-{build}/genes.gtf.gz"
output:
"{assembly}-{build}/genes.csv"
params:
slurm__hours=1,
slurm__cores=4,
slurm__mem=5
shell:
"""
zcat {input} \
| awk -F"\\t" '{{if ($3 == "gene") {{split($9, a,";"); print a[1] a[3]}} }}' \
| cut -d" " -f2,4 --output-delimiter , \
| tr -d '"' > {output}
"""
rule get_gtf37:
output:
"grch37-{build}/genes.gtf.gz"
params:
slurm__skip=True
shell:
"""
wget \
ftp://ftp.ensembl.org/pub/grch37/release-{wildcards.build}/gtf/homo_sapiens/Homo_sapiens.GRCh37.{wildcards.build}.gtf.gz \
-O {output}
"""
rule get_gtf:
output:
"grch38-{build}/genes.gtf.gz"
params:
slurm__skip=True
shell:
"""
wget \
ftp://ftp.ensembl.org/pub/release-{wildcards.build}/gtf/homo_sapiens/Homo_sapiens.GRCh38.{wildcards.build}.gtf.gz \
-O {output}
"""