-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy path02_get_genome.sh
53 lines (42 loc) · 1.74 KB
/
02_get_genome.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
#SBATCH --job-name=get_genome
#SBATCH -n 1
#SBATCH -N 1
#SBATCH -c 4
#SBATCH --mem=2G
#SBATCH --partition=general
#SBATCH --qos=general
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH -o %x_%j.out
#SBATCH -e %x_%j.err
echo `hostname`
date
#################################################################
# Download genome and annotation from ENSEMBL
#################################################################
# load software
module load samtools/1.12
# output directory
GENOMEDIR=../genome
mkdir -p $GENOMEDIR
# we're using Fundulus heteroclitus from ensembl v105
# we'll download the genome, GTF annotation and transcript fasta
# https://useast.ensembl.org/Fundulus_heteroclitus/Info/Index
# download the genome
wget ftp://ftp.ensembl.org/pub/release-105/fasta/fundulus_heteroclitus/dna/Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.dna.toplevel.fa.gz
# decompress it
gunzip Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.dna.toplevel.fa.gz
# download the GTF annotation
wget ftp://ftp.ensembl.org/pub/release-105/gtf/fundulus_heteroclitus/Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.105.gtf.gz
# decompress it
gunzip Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.105.gtf.gz
# download the transcript fasta
wget http://ftp.ensembl.org/pub/release-105/fasta/fundulus_heteroclitus/cds/Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.cds.all.fa.gz
# decompress it
gunzip Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.cds.all.fa.gz
# generate simple samtools fai indexes
samtools faidx Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.dna.toplevel.fa
samtools faidx Fundulus_heteroclitus.Fundulus_heteroclitus-3.0.2.cds.all.fa
# move everything to the genome directory
mv Fundulus* $GENOMEDIR