diff --git a/bu_isciii/templates/blast_nt/ANALYSIS/ANALYSIS02_BLAST/lablog b/bu_isciii/templates/blast_nt/ANALYSIS/ANALYSIS02_BLAST/lablog index b955a424..a6974298 100644 --- a/bu_isciii/templates/blast_nt/ANALYSIS/ANALYSIS02_BLAST/lablog +++ b/bu_isciii/templates/blast_nt/ANALYSIS/ANALYSIS02_BLAST/lablog @@ -1,6 +1,5 @@ # module load BLAST+/2.11.0-gompi-2020b - scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g") mkdir logs @@ -16,7 +15,7 @@ cat ../samples_id.txt | while read in; do # NOTE: change extension and location at will # NOTE2: zcat is only used in case of gzipped files, use a cp or ln -s if needed if [ $(ls ${LOCATION}/${in}.scaffolds.fa.gz | wc -l) != 0 ]; then - zcat ${LOCATION}/${in}.scaffolds.fa.gz/${in}.scaffolds.fa.gz > ${in}/${in}.scaffolds.fa + zcat ${LOCATION}/${in}.scaffolds.fa.gz > ${in}/${in}.scaffolds.fa else # Note assemblies that did not make a scaffold zcat ${LOCATION}/${in}.contigs.fa.gz > ${in}/${in}.scaffolds.fa @@ -25,9 +24,8 @@ cat ../samples_id.txt | while read in; do done # NOTE3: change the -query flag to meet your requirements - -cat ../samples_id.txt | xargs -I %% echo "srun --chdir ${scratch_dir} --partition middle_idx --mem 376530M --time 48:00:00 --cpus-per-task 10 --output logs/BLASTN_%%_%j.log --job-name BLASTN_%% blastn -num_threads 10 -db /data/bi/references/BLAST_dbs/nt_20211025/nt -query %%/%%.scaffolds.fa -out %%/%%_blast.tab -outfmt '6 qseqid stitle std slen qlen qcovs' &" > _01_blast.sh -cat ../samples_id.txt | xargs -I %% echo "awk 'BEGIN{OFS=\"\t\";FS=\"\t\"}{print \$0,\$6/\$16,\$6/\$15}' %%/%%_blast.tab | awk -v \"samplename=%%\" 'BEGIN{OFS=\"\t\";FS=\"\t\"} \$15 > 200 && \$17 > 0.7 && \$1 !~ /phage/ {print samplename,\$0}' > %%/%%_blast_filt.tab" > _02_filter_blast.sh +cat ../samples_id.txt | xargs -I %% echo "srun --chdir ${scratch_dir} --partition middle_idx --mem 376530M --time 48:00:00 --cpus-per-task 10 --output logs/BLASTN_%%_%j.log --job-name BLASTN_%% blastn -num_threads 10 -db /data/bi/references/BLAST_dbs/nt_20211025/nt -query %%/%%.scaffolds.fa -out %%/%%_blast.tsv -outfmt '6 qseqid stitle std slen qlen qcovs' &" > _01_blast.sh +cat ../samples_id.txt | xargs -I %% echo "awk 'BEGIN{OFS=\"\t\";FS=\"\t\"}{print \$0,\$6/\$16,\$6/\$15}' %%/%%_blast.tsv | awk -v \"samplename=%%\" 'BEGIN{OFS=\"\t\";FS=\"\t\"} \$15 > 200 && \$17 > 0.7 && \$1 !~ /phage/ {print samplename,\$0}' > %%/%%_blast_filt.tsv" > _02_filter_blast.sh echo -e "echo \"samplename\tcontigname\tstitle\tqaccver\tsaccver\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tslen\tqlen\tqcovs\t%cgAligned\t%refCovered\" > header" > _03_gather_results.sh echo "cat header */*blast_filt.tab > all_samples_filtered_BLAST_results.tab" >> _03_gather_results.sh echo "rm header" >> _03_gather_results.sh