-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodule1-16S.sh
58 lines (49 loc) · 1.71 KB
/
module1-16S.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
{
###make folders
mkdir -p raw_data
mkdir -p processed/1.trimmed_primers
mkdir -p processed/2.filtered
mkdir -p reports/quality
mkdir -p temp
###quality check of the rawdata
#fastqc ./raw_data/*.gz -o ./raw_data/quality/
#multiqc -f ./raw_data/quality/ -o ./raw_data/quality/
###trimming
##read primers
primF=`awk 'NR==2' primers.fasta`
primR=`awk 'NR==4' primers.fasta`
seqkit seq -p -r -t dna primers.fasta > temp/primers_RC.fasta
primFrc=`awk 'NR==2' temp/primers_RC.fasta`
primRrc=`awk 'NR==4' temp/primers_RC.fasta`
##trim primers
cd raw_data/
for F in *_1.fq.gz; do
echo "trimming " $primF " and " $primR " in " $F " and " ${F//_1.fq.gz/_2.fq.gz}
cutadapt -g $primF -G $primR -a $primFrc -A $primRrc -n 1 \
-o ../processed/1.trimmed_primers/${F//raw_1.fq.gz/trim_1.fq.gz} \
-p ../processed/1.trimmed_primers/${F//raw_1.fq.gz/trim_2.fq.gz} \
--minimum-length 30 --cores 0 --report=minimal $F ${F//_1.fq.gz/_2.fq.gz} \
1>> ../reports/cutadapt_report.txt
done
cd ..
rm temp/primers_RC.fasta
## quality filtering with fastp
cd processed/1.trimmed_primers/
for F in *_1.fq.gz; do
echo "working on " $F " and " ${F//_1.fq.gz/_2.fq.gz}
fastp -i $F -I ${F//_1.fq.gz/_2.fq.gz} \
-o ../2.filtered/${F//trim_1.fq.gz/filt_1.fq.gz} \
-O ../2.filtered/${F//trim_1.fq.gz/filt_2.fq.gz} \
-h ../../reports/quality/${F//_trim_1.fq.gz/}.html -R ${F//_trim_1.fq.gz/} \
-j ../../reports/quality/${F//_trim_1.fq.gz/}.json \
-y -l 30 -r --cut_window_size 4 --cut_mean_quality 23 \
--n_base_limit 0
done
cd ../..
## potionally you can use the inbuild dada2 quality filtering tool
## run an R script to filter and trim
#Rscript --vanilla src/filter.R
##
echo "Check the quality before going further !"
} 2>&1 | tee ./processed/module1.out