-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlociq.script
executable file
·51 lines (43 loc) · 1.38 KB
/
lociq.script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!
#########################################################
# Help #
#########################################################
Help()
{
# Display Help
echo "Add description of the script functions here."
echo
echo "Syntax: scriptTemplate [-g|h|v|V]"
echo "options:"
echo "H Print this Help."
echo "a Path containing analysis files"
}
#########################################################
# PROGRAM #
#########################################################
while getopts ":Ha:" opt; do
case $opt in
H) # Help info
Help
exit;;
a) # Path to analysis files
PATHWAYMR="$OPTARG";;
\?) #Invalid option
echo "Option does not exist"
exit;;
esac
done
# Navigate to appropriate directory
cd $PATHWAYMR
# Extract sequences from the PLSDB database using ID and range info stored in the 'batcher' file
blastdbcmd -db db.indexed -entry_batch batcher > seqs.1
# Remove range info from headers
sed 's/:.*//' seqs.1 > seqs.2
# Convert each sequence into sets of 2 lines, first being header second being sequence
awk '/^>/ {printf("\n%s\n",$0);next; } { printf("%s",$0);} END {printf("\n");}' < seqs.2 > seqs.3
# Convert from multifasta sequence to col1 col2 format
awk 'BEGIN{RS=">"}{print $1"\t"$2;}' seqs.3 | tail -n+2 > seqs.4.R
# Cleanup
rm seqs.1
rm seqs.2
rm seqs.3