forked from unipept/unipept-database
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreateEcNumbers.sh.in
36 lines (32 loc) · 990 Bytes
/
createEcNumbers.sh.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
# Parses the ec numbers and names out of 2 source files
set -o pipefail -e
exec 3>&1
classes="http://ftp.ebi.ac.uk/pub/databases/enzyme/enzclass.txt"
numbers="http://ftp.ebi.ac.uk/pub/databases/enzyme/enzyme.dat"
# TODO remove dot at end of description
lines="$(wget -q "$classes" -O - \
| grep "^[1-9]" \
| <<<CMD_SED>>> "s/\. *\([0-9\-]\)/.\1/g" \
| <<<CMD_SED>>> "s/ */\t/" \
| nl -s " " \
| <<<CMD_SED>>> 's/^ *//' \
| <<<CMD_SED>>> 's/\.$//' \
| tee >(cat >&3) \
| wc -l \
)"
wget -q "$numbers" -O - | <<<CMD_AWK>>> -v lines="$lines" 'BEGIN {
FS = " "
lines++
} /^ID/{
if (id != "") {
printf("%d\t%s\t%s\n", lines++, id, name)
}
name = ""
id = $2
} /^DE/{
gsub(/.$/, "", $2)
name = name $2
} END {
printf("%d\t%s\t%s\n", lines++, id, name)
}'