-
Notifications
You must be signed in to change notification settings - Fork 0
/
surpi_blast_summary.sh~
executable file
·49 lines (39 loc) · 1.97 KB
/
surpi_blast_summary.sh~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
#DATASETS_B226-RAB2/genus.bar.SL148717.B226-RAB2.plotting/SnRa.Nairovirus.bar.SL148717.B226-RAB2.223019527.Nairovirus.1e-15.Blastn - identity
# bar.SL148717.Nairovirus.B226-RAB2.78191716.1e-15.Report - name #reads
# OUTPUT genus.bar.SL148717.B226-RAB2.Blastn.fasta/SnRa.Nairovirus.bar.SL148717.B226-RAB2.78191716.Nairovirus.1e-15.Blastn.uniq.ex.fa - gi of winner
outfile=${1:-surpi_blast_summary.tsv}
persum=0
ls *.config || exit 1
root=$(ls *.config|cut -f1 -d'.')
tempdir=$(mktemp -d)
filter=1
filterfile=$HOME/surpi_tmp/genusfilterlist.txt
for sampledir in OUTPUT_$root/genus.bar*; do
sampleid=$(echo $sampledir|sed "s/.*genus.bar.\(.*\).$root.Blastn.fasta/\1/")
for aln in $sampledir/*.fa; do
persum=0; reads=0;
aln2=${aln##*/}
gi=$(echo $aln2|cut -f6 -d'.')
genus=$(echo $aln2|cut -f7 -d'.')
blastfile=DATASETS_$root/genus.bar.$sampleid.$root.plotting/${aln2%.uniq.ex.fa}
reportfile=DATASETS_$root/genus.bar.$sampleid.$root.plotting/bar.$sampleid.$genus.$root.$gi.1e-15.Report
reads=$(wc -l < $blastfile)
giplus=$(head -n 3 $reportfile|tail -n 1)
covfrac="$(grep "Coverage in bp" $reportfile|awk '{print$5}')/$(grep "Reference sequence length" $reportfile|awk '{print$5}')"
covperc=$(grep \%Coverage $reportfile|awk '{print$3}')
covdepth=$(grep "Average depth of coverage" $reportfile|awk '{print$6}')
average=$(awk '{sum += $3} END {print sum/NR}' $blastfile)
echo -e "$sampleid\t$genus\t$reads\t$covfrac\t$covperc\t$covdepth\t$average\t$giplus" >> $tempdir/$sampleid
done
sort -k5 -nr $tempdir/$sampleid > $tempdir/$sampleid.sorted
rm $tempdir/$sampleid
done
echo -e "SampleID\tGenus\tNum_reads\tCov_frac\tCov_perc\tCov_depth\tAvg_BLAST_ID\tGI" > $outfile
if [ $filter -eq 1 ] ; then
terms=$(tr '\n' '|' < $filterfile|sed 's;||;;g'|sed 's;|$;;g')
for i in $tempdir/*; do grep -vE "$terms" $i; echo -en "\n" ; done >> $outfile
else
cat $tempdir/* >> $outfile
fi
rm -rf $tempdir