-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathhisat2.temp.script
72 lines (61 loc) · 2.17 KB
/
hisat2.temp.script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
#PBS -k o
#PBS -l nodes=1:ppn=8:dc,vmem=32gb,walltime=48:00:00
#PBS -M [email protected]
#PBS -m abe
#PBS -N HISAT2
#PBS -j oe
#PBS -e /home/zhenyisong/data/cardiodata/SRP029464
#PBS -o /home/zhenyisong/data/cardiodata/SRP029464
##---
## discarded!!!
## #python -m HTSeq.scripts.count -f bam -r name -s no $base.sorted.bam $gtf > $base.txt
##---
#---
# unique mapping read
# see the hisat2 manual:
# https://ccb.jhu.edu/software/hisat2/manual.shtml
# SAM output
# see the discussion
# https://www.researchgate.net/post/How_can_I_get_uniquely_mapped_reads_from_Tophatv2012
#---
source /etc/profile
##----------------------------------------------------------
## old index file was broken, I do not know why.
## I used the command below to re-build the genome
##----------------------------------------------------------
hisat2='/data/software/bin/hisat2-2.0.4'
stringtie='/home/zhenyisong/bin/stringtie'
preDE='/usr/bin/prepDE.py'
mm10_genome='/home/zhenyisong/data/genome/mm10_Bowtie1Index/mm10.fa'
mm10_index='/home/zhenyisong/data/genome/hisat2/mm10'
mergelist='mergelist.txt'
merge_gtf='stringtie_merged.gtf'
cd $mm10_index
## discard to build the index.
## $hisat2/hisat2-build -f -p 4 $mm10_genome genome
gtf='/home/zhenyisong/data/bringback/igenome/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf'
fastq='/home/zhenyisong/data/cardiodata/SRP029464'
## if unpiared the data, -U parameter will be used
##shopt -s nullglob
cd /home/zhenyisong/data/cardiodata/SRP029464
files1=(*_1.fastq)
files2=(*_2.fastq)
len=${#files1[@]}
cd /home/zhenyisong/data/cardiodata/SRP029464/hisat2
#---
# this is dangerous!!!!
#---
#rm -rf *
for (( i=0; i<${len}; i++ ));
do
forward=${files1[$i]}
backward=${files2[$i]}
base=${forward%_1.fastq}
$hisat2/hisat2 -p 4 --dta --fr --rna-strandness FR -x $mm10_index/genome -1 $fastq/$forward -2 $fastq/$backward -S $base.sam
samtools view -H $base.sam > header.temp.sam
samtools view $base.sam | grep -w "NH:i:1" | cat header.temp.sam - | samtools view -Sb - |samtools sort -@ 8 - $base
$stringtie -e -B -p 4 -G $gtf -o ballgown/$base/$base.gtf $base.bam
done
$preDE -i ballgown
rm header.temp.sam