-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAbDBpipeline.sh
executable file
·240 lines (200 loc) · 6.75 KB
/
AbDBpipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/bin/bash
#*************************************************************************
#
# Program: AbDBpipeline
# File: AbDBpipeline.sh
#
# Version: V1.1
# Date: 22.04.14
# Function: This is an automatic pipeline to process the Data for AbDb and
# It generates data just for all numbering schemes
# Usage: ./AbDBpipeline.sh <list of antibody PDB codes>
# Note: abdb.cfg file should be in current working dir
#
#
# Copyright: (c) UCL, Saba Ferdous, 2014
# Author: Miss Saba Ferdous
# Address: Institute of Structural and Molecular Biology
# Division of Biosciences
# University College
# Gower Street
# London
# WC1E 6BT
# EMail: [email protected]
#
#*************************************************************************
. ./abdb.cfg
lib=$abdb_HOME/lib
bin=$abdb_HOME/bin
function process
{
# parameter
free=$1
proAntigen=$2
npAntigen=$3
Combined=$4
scheme=$5
label=$6
data="Data"
mkdir -p $data
Redundant="Redundant_files"
mkdir -p $Combined
mkdir -p $Redundant
# copy all the complexes in Combined directory
cp ./$free/* ./$Combined
cp ./$proAntigen/* ./$Combined
cp ./$npAntigen/* ./$Combined
# Preparing redundant clusters for all 4 datasets
for f in `ls -d $label"_"*`;
do
cd $f
echo "Calculating Redundant Clusters for $free";
perl $bin/getRedundantClustersAntibody.pl
mv *.txt ../$Redundant
# Preparaing non-redundant data for each dataset
mkdir -p "NR_"$f
echo "Preparing Non-Redundant Data";
perl $bin/prepareNRAbData.pl ../$Redundant/"Redundant_"$f".txt" "NR_"$f
if [ $f = $Combined ] ; then
`ls "NR_"$f/*.pdb | grep "_" | cut -f1 -d. | awk -F "/" '{print \$2}' >../"NR_"$label"_Combined.txt"`
else
`ls "NR_"$f/*.pdb | grep "_" | cut -f1 -d. | awk -F "/" '{print \$2}' >>../"NR_"$label"_Merged.txt"`
fi
mv "NR_"$f ../
cd ..
done
# This shell command finds diffrence between Combined.txt and Merged.txt
`comm -13 <(sort NR_"$label"_Combined.txt) <(sort NR_"$label"_Merged.txt) >$label"_difference.list"`
mv $free ./$data
mv $proAntigen ./$data
mv $npAntigen ./$data
mv $Combined ./$data
mv "NR_"* ./$data
#cd $data
#combineData $scheme
#cd ..
} # Function ends
##############################
# This function puts together all the data (Complete Antibody, Light and
# Heavy chains) for each numbering scheme
function combineData
{
scheme=$1;
mkdir -p "ALL_"$scheme
cp ./"LH_Combined_"$scheme/* ./ALL_$scheme
cp ./"L_Combined_"$scheme/* ./ALL_$scheme
cp ./"H_Combined_"$scheme/* ./ALL_$scheme
cd "ALL_"$scheme
cd ..
}
function compress
{
array=(*/)
for directory in "${array[@]}"
do
dirname=$(basename "$directory")
tar -jcvf $dirname.tar.bz2 $dirname
done
}
function runProg
{
scheme=$1
schemeFlag=$2
# Running processAntibodyPDBs program for 3 numbering schemes
free="LH_Free_"$scheme
proAntigen="LH_Protein_"$scheme
npAntigen="LH_NonProtein_"$scheme
Combined="LH_Combined_"$scheme
label="LH"
echo "processAntibodyPDBs program is running for $scheme numbering";
perl $bin/processAntibodyPDBs.pl $schemeFlag $3 # $3 is input file
process $free $proAntigen $npAntigen $Combined $scheme $label
perl $bin/FreeComplexedAntibody.pl -$label ./LH_difference.list ./Redundant_files/"Redundant_"$Combined".txt"
free="L_Free_"$scheme
proAntigen="L_Protein_"$scheme
npAntigen="L_NonProtein_"$scheme
Combined="L_Combined_"$scheme
label="L"
process $free $proAntigen $npAntigen $Combined $scheme $label
perl $bin/FreeComplexedAntibody.pl -$label ./L_difference.list ./Redundant_files/"Redundant_"$Combined".txt"
free="H_Free_"$scheme
proAntigen="H_Protein_"$scheme
npAntigen="H_NonProtein_"$scheme
Combined="H_Combined_"$scheme
label="H"
process $free $proAntigen $npAntigen $Combined $scheme $label
perl $bin/FreeComplexedAntibody.pl -$label ./H_difference.list ./Redundant_files/"Redundant_"$Combined".txt"
# Combine the LH,L and H combined redundant clusters in to Redundant_ALL file
cat ./Redundant_files/"Redundant_LH_Combined_"$scheme".txt" ./Redundant_files/"Redundant_L_Combined_"$scheme".txt" ./Redundant_files/"Redundant_H_Combined_"$scheme".txt" >./Redundant_files/"Redundant_ALL_"$scheme".txt"
# Combining data from LH, L and H datasets into ALL_scheme directory
cd Data
combineData $scheme
cd ..
}
############################################
echo "Main program running";
############################################
scheme="Martin"
schemeFlag="-a"
runProg $scheme $schemeFlag $1
mkdir -p $scheme"_logs"
mv *.list *.dat ./$scheme"_logs"
#***** Temporary Comment Kabat and Chothia Schemes ******#
# ****************************
scheme="Kabat"
schemeFlag="-k"
runProg $scheme $schemeFlag $1
mkdir -p $scheme"_logs"
mv *.list *.dat ./$scheme"_logs"
scheme="Chothia"
schemeFlag="-c"
runProg $scheme $schemeFlag $1
mkdir -p $scheme"_logs"
mv *.list *.dat ./$scheme"_logs"
# ******************************
mv Redundant_files Data
cd Data/Redundant_files
# To sort all the files in directory
shopt -s nullglob
filearray=( * ) # Reading directory files into an array
for i in "${filearray[@]}"
do
sort $i -o $i
done
cd ..
# Run Chain Mapping script on the ALL_Martin directory
cd ALL_Martin
perl $bin/chainMapping.pl >AbDb_chainMapping.dat
mv AbDb_chainMapping.dat ../
cd ..
# Stats for processed data
perl $bin/getprocessedDataStats.pl
mv *.tt ../
compress
cd ..
# Stats for unprocessed data
cd Martin_logs
grep -r "multi-chain" ../Dataprep*Martin/ | awk -F "/" '{print $3}' | sort | uniq >multi-chain.list
grep -r "scFV" ../Dataprep*Martin/ | awk -F "/" '{print $3}' | sort | uniq >scFV.list
kabatError=`awk 'END {print NR}' Kabat_Error.list`;
Fc=`awk 'END {print NR}' FC.list`;
superceded=`awk 'END {print NR}' Superceded.list`;
scFV=`awk 'END {print NR}' scFV.list`;
multichains=`awk 'END {print NR}' multi-chain.list`;
realKabatError=$(($kabatError-$scFV));
bash $bin/statsUnprocessed.sh $Fc $realKabatError $superceded $scFV >../stats_unprocessed.tt
# To merge 2 consective lines into one
awk 'NR%2{printf $0" ";next;}1' header.dat >headerProcessed.dat
cd ..
data="Data"
mv Martin_logs $data
datasrc=`pwd`
echo "Moving Data to destination"
cd $dataprep_dest
# Move processed data (Data) into Web directory
mv $datasrc/$data/ $webdata_dest
mv $datasrc/*.tt $webdata_dest
# Make dir with date of the day
mkdir -p $(date '+%d-%b-%Y') && dateDir=$(date '+%d-%b-%Y')
# Move data prep folders in date directory
mv `ls -d $datasrc/*/` $dataprep_dest/$dateDir