-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1_GenomicFeatureDistribution.sh
39 lines (31 loc) · 1.56 KB
/
1_GenomicFeatureDistribution.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
# This script is for calculating Tn5 distribution across genomic features
source /public/home/zhy/Tn5_bias/scripts/0.utilities.sh
for i in *filtered.dedup.shifted.insertSites.bed
do
base=`basename ${i} .bed`
#Get species information for current sample
id=`echo ${base} | sed "s/_.*//"`
Configuration_info ${id}
echo "Processing ${i} for ${GENOME_NAME} ..."
#Get blacklist removed cut sites
if [[ ! -f ${base}_rb.bed ]]; then
sort -k1,1 -k2,2n ${i} | bedtools intersect -nonamecheck -a - -b ${blacklist} -v -nobuf > ${base}_rb.bed
fi
#All cut sites for calculation
python ~/Zhang_Scripts/Zhang/Genomic_feature_occupancy_significance_v2.py -g ${GENOME_SIZE_NUM} -f ${FEATURE_PATH} \
-s ${base}_rb.bed -o ${base}.feature.out -r 0
#Remove outliers cut sites for calculation
python ~/Zhang_Scripts/Zhang/Genomic_feature_occupancy_significance_v2.py -g ${GENOME_SIZE_NUM} -f ${FEATURE_PATH} \
-s ${base}_rb.bed -o ${base}_r0.05.feature.out -r 0.05
#Get unique cut sites
if [[ ! -f ${base}_rb_uq.bed ]]; then
sort -k1,1 -k2,2n ${base}_rb.bed | awk '!a[$1$2$3]++' > ${base}_rb_uq.bed
fi
#Unique cut sites for calculation
python ~/Zhang_Scripts/Zhang/Genomic_feature_occupancy_significance_v2.py -g ${GENOME_SIZE_NUM} -f ${FEATURE_PATH} \
-s ${base}_rb_uq.bed -o ${base}_uq.feature.out -r 0
#Remove outliers unique cut sites for calculation
nohup python ~/Zhang_Scripts/Zhang/Genomic_feature_occupancy_significance_v2.py -g ${GENOME_SIZE_NUM} -f ${FEATURE_PATH} \
-s ${base}_rb_uq.bed -o ${base}_uq_r0.05.feature.out -r 0.05 &
done