-
Notifications
You must be signed in to change notification settings - Fork 0
/
dcc_starter.pl
executable file
·98 lines (70 loc) · 4.71 KB
/
dcc_starter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#/usr/bin/perl -w
use strict;
# starting vars
my$currdir=`pwd`;
chdir "../";
my$starttime= time;
open(ER,'>>',"/home/daniel/logfile_auto.log")||die "$!"; # global logfile
`rm Chimeric.out.junction`;
`rm fusion_junction.txt`;
# first get reference place for hg19
my$bowtwiindexplace="/media/daniel/NGS1/RNASeq/find_circ/dcc/";
# change to reference place for execution
if(!($currdir eq $bowtwiindexplace)){
chdir $bowtwiindexplace;
}
my$infile1=$ARGV[0];
chomp $infile1;
my$infile2=$ARGV[1];
chomp $infile2;
my$samplename=$ARGV[2];
chomp $samplename;
print ER "-\nsample $samplename DCC processing:\n";
#### start of dcc for this sample
mkdir "run_$samplename";
# go there
chdir "run_$samplename";
print ER "doing mate STAR alignment...\n";
# start DCC in find_circ/dcc/
# STAR --runThreadN 10 --genomeDir . --outSAMtype BAM SortedByCoordinate --readFilesIn HAL01_R1_trimmed.fastq HAL01_R2_trimmed.fastq --outFileNamePrefix halo1_r --outReadsUnmapped Fastx --outSJfilterOverhangMin 15 15 15 15 --alignSJoverhangMin 15 --alignSJDBoverhangMin 15 --outFilterMultimapNmax 20 --outFilterScoreMin 1 --outFilterMatchNmin 1 --outFilterMismatchNmax 2 --chimSegmentMin 15 --chimScoreMin 15 --chimScoreSeparation 10 --chimJunctionOverhangMin 15
my$tophatout=`STAR --runThreadN 10 --genomeDir ../ --outSAMtype BAM SortedByCoordinate --readFilesIn ../../$infile1 ../../$infile2 --outFileNamePrefix $samplename. --outReadsUnmapped Fastx --outSJfilterOverhangMin 15 15 15 15 --alignSJoverhangMin 15 --alignSJDBoverhangMin 15 --outFilterMultimapNmax 20 --outFilterScoreMin 1 --outFilterMatchNmin 1 --outFilterMismatchNmax 2 --chimSegmentMin 15 --chimScoreMin 15 --chimScoreSeparation 10 --chimJunctionOverhangMin 15 --limitBAMsortRAM 512000000000`;
# creates auto_$samplename dir in test/
print ER "errors during STAR alignment:\n $tophatout\n";
# mate1 and
# mate2 need to get remaned files aswell
my$laneonename="lane_1$samplename";
my$lanetwoname="lane_2$samplename";
# alignment first read .fastq file
print ER "doing line 1 STAR alignment...\n";
my$err_star_line1=`STAR --runThreadN 10 --genomeDir ../ --outSAMtype None --readFilesIn ../../$infile1 --outFileNamePrefix $laneonename. --outReadsUnmapped Fastx --outSJfilterOverhangMin 15 15 15 15 --alignSJoverhangMin 15 --alignSJDBoverhangMin 15 --seedSearchStartLmax 30 --outFilterMultimapNmax 20 --outFilterScoreMin 1 --outFilterMatchNmin 1 --outFilterMismatchNmax 2 --chimSegmentMin 15 --chimScoreMin 15 --chimScoreSeparation 10 --chimJunctionOverhangMin 15 --limitBAMsortRAM 512000000000`;
print ER "errors during STAR lane 1 alignment:\n $err_star_line1\n";
print ER "doing line 2 STAR alignment...\n";
my$err_star_line2=`STAR --runThreadN 10 --genomeDir ../ --outSAMtype None --readFilesIn ../../$infile2 --outFileNamePrefix $lanetwoname. --outReadsUnmapped Fastx --outSJfilterOverhangMin 15 15 15 15 --alignSJoverhangMin 15 --alignSJDBoverhangMin 15 --seedSearchStartLmax 30 --outFilterMultimapNmax 20 --outFilterScoreMin 1 --outFilterMatchNmin 1 --outFilterMismatchNmax 2 --chimSegmentMin 15 --chimScoreMin 15 --chimScoreSeparation 10 --chimJunctionOverhangMin 15 --limitBAMsortRAM 512000000000`;
print ER "errors during STAR lane 2 alignment:\n $err_star_line2\n";
#now we have
#run_$samplename/$samplenameChimeric.out.junction for sample1
#run_$samplename/lane_1$samplenameChimeric.out.junction for lane 1
#run_$samplename/lane_2$samplenameChimeric.out.junction for lane 2
##
# run dcc
my$bothlanesname="$samplename.Chimeric.out.junction";
##
print ER "running dcc..\n";
my$dcc_err=`DCC $bothlanesname -mt1 $laneonename.Chimeric.out.junction -mt2 $lanetwoname.Chimeric.out.junction -D -fg -an ../hg19_ens.gtf -Pi -M -Nr 2 1 -A ../hg19.fa -N -T 10 -an ../all_ref.gtf`;
print ER "errors running dcc: $dcc_err\n";
# seding, removing headers
print ER "removing header from CircRNACount...\n";
#my$line=qq{"sed -i '1d' CircRNACount"};
my$sed=`sed -i '1d' CircRNACount`;
print "errors removing headers:\n$sed\n";
# bedtools annotations
print ER "using bedtools for annotation of CircRNACount file for $samplename...\n";
my$betout=`bedtools window -a CircRNACount -b /media/daniel/NGS1/RNASeq/find_circ/bed_files/Genes_RefSeq_hg19_09.20.2013.bed -w 1 >CircRNACount_annotated.tsv`;
print ER "errors annotating $samplename :\n$betout\n";
# now parsing the output
print ER "parsing in run_$samplename ...\n";
my$err_running_dcc_outreader=`perl ../automate_DCC/dcc_outreader.pl CircRNACount_annotated.tsv CircCoordinates processed_run_$samplename.tsv $samplename`;
print ER "errors parsing in run_$samplename/ : \n$err_running_dcc_outreader\n";
print ER "############################################################\nsample $samplename done :\n";
my$total_time=((time)-$starttime)/60;
print ER "done. \n\n took $total_time minutes for sample $samplename\n";