Skip to content

Latest commit

 

History

History
87 lines (61 loc) · 2.32 KB

README.md

File metadata and controls

87 lines (61 loc) · 2.32 KB

RNA_SEQ_Analysis Pipeline

1 Quality Control Check on raw sequencing data

1.1 Install the Java Development Kit followed by FASTQC

1.2 Run FASTQC for all fastq files

for i in *.fastq
	do 
		echo $i
		./fastqc $i -o fastqc_results/
	done

1.3 Check for quality plots in fastqc_results

2 Alignment and Read Counts

2.1 Download the reference genome and Gene Transfer File

2.2. Install RSEM

	```
	git clone [email protected]:bli25ucb/RSEM_tutorial.git
	cd software
	unzip bowtie2-2.2.6-source.zip
	cd bowtie2-2.2.6
	make -j 8
	cd ..
	tar -xzf RSEM-1.2.25.tar.gz
	cd RSEM-1.2.25
	make -j 8
	make ebseq
	cd ..
	cd ..
	gunzip ref/Homo_sapiens.GRCh38.dna_sm.toplevel.fa.gz
	gunzip ref/Homo_sapiens.GRCh38.95.chr.gtf.gz
	software/RSEM-1.2.25/rsem-prepare-reference --gtf ref/Homo_sapiens.GRCh38.95.chr.gtf --bowtie2 --bowtie2-path software/bowtie2-2.2.6 ref/Homo_sapiens.GRCh38.dna_sm.toplevel.fa ref/1_hg38
	```

2.3 Submit the jobs to Cypress (Tulane HPC) for alignment and counts

```
#!/bin/bash
#SBATCH --qos=normal
#SBATCH --job-name=rsem_star_pe
#SBATCH -o rsem_star_pe_Output_log.txt
#SBATCH -e rsem_star_pe_Error_log.txt
#SBATCH [email protected]
#SBATCH --mail-type=ALL
#SBATCH --time=12:00:00
#SBATCH --nodes=1 #nodes
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=20
#SBATCH --mem=128000



module load rsem/1.2.31
module load star
module load samtools
module load bzip2

prefix=${1%_[1-2].fastq}

rsem-calculate-expression --star -p 20 --paired-end --output-genome-bam --sort-bam-by-coordinate --append-names --forward-prob=0 $prefix"_1.fastq" $prefix"_2.fastq" /lustre/project/lgragert/peptide-binding/rna_seq/1_hg38/hg38_chr_labels $prefix"_hg38"
	```

	for i in *1.fastq; 
		do 
			sbatch rsem_star_pe.sh $i; 
	done ```


module load rsem

rsem-generate-data-matrix Can_5_hg38.genes.results Can_6_hg38.genes.results CTR_1_hg38.genes.results CTR_2_hg38.genes.results > canvsctrl.genes.matrix.txt


rsem-run-ebseq canvsctrl.genes.matrix.txt 2,2 canvsctrl.genes.results.txt