-
Notifications
You must be signed in to change notification settings - Fork 114
/
pdb_fetch.py
executable file
·121 lines (98 loc) · 3.33 KB
/
pdb_fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
"""
Fetches a PDB file (optionally the biological unit) from the RCSB database.
usage: python pdb_fetch.py [-biounit] <pdb id>
example: python pdb_fetch.py 1CTF
Author: {0} ({1})
This program is part of the PDB tools distributed with HADDOCK
or with the HADDOCK tutorial. The utilities in this package
can be used to quickly manipulate PDB files, with the benefit
of 'piping' several different commands. This is a rewrite of old
FORTRAN77 code that was taking too much effort to compile. RIP.
"""
from __future__ import print_function
import gzip
import re
import sys
# Python 3 vs Python 2
if sys.version_info[0] < 3:
from cStringIO import StringIO as IO
from urllib2 import Request, build_opener
from urllib2 import HTTPError
else:
from io import BytesIO as IO
from urllib.request import Request, build_opener
from urllib.error import HTTPError
__author__ = "Joao Rodrigues"
__email__ = "[email protected]"
USAGE = __doc__.format(__author__, __email__)
def check_input(args):
"""
Checks whether to read from stdin/file and validates user input/options.
"""
if len(args) == 1:
if not re.match('[0-9a-zA-Z]{4}$', args[0]):
sys.stderr.write('Invalid PDB code: ' + args[0] + '\n')
sys.stderr.write(USAGE)
sys.exit(1)
pdb_id = args[0]
biounit = False
elif len(args) == 2:
# Chain & File
if not re.match('\-biounit$', args[0]):
sys.stderr.write('Invalid option: ' + args[0] + '\n')
sys.stderr.write(USAGE)
sys.exit(1)
if not re.match('[0-9a-zA-Z]{4}$', args[1]):
sys.stderr.write('Invalid PDB code: ' + args[1] + '\n')
sys.stderr.write(USAGE)
sys.exit(1)
biounit = True
pdb_id = args[1]
else:
sys.stderr.write(USAGE)
sys.exit(1)
return (pdb_id, biounit)
def _fetch_structure(pdbid, biounit=False):
"""Enclosing logic in a function"""
base_url = 'https://files.rcsb.org/download/'
pdb_type = '.pdb1' if biounit else '.pdb'
pdb_url = base_url + pdbid.lower() + pdb_type + '.gz'
try:
request = Request(pdb_url)
opener = build_opener()
url_data = opener.open(request).read()
except HTTPError as e:
print('[!] Error fetching structure: ({0}) {1}'.format(e.code, e.msg),
file=sys.stderr)
return
else:
try:
buf = IO(url_data)
gz_handle = gzip.GzipFile(fileobj=buf, mode='rb')
for line in gz_handle:
yield line.decode('utf-8')
except IOError as e:
print('[!] Error fetching structure: {0}'.format(e.msg),
file=sys.stderr)
return
finally:
gz_handle.close()
if __name__ == '__main__':
# Check Input
pdb_id, biounit = check_input(sys.argv[1:])
# Do the job
pdb_structure = _fetch_structure(pdb_id, biounit)
if not pdb_structure:
sys.exit(1)
try:
sys.stdout.write(''.join(pdb_structure))
sys.stdout.flush()
except IOError:
# This is here to catch Broken Pipes
# for example to use 'head' or 'tail' without
# the error message showing up
pass
# last line of the script
# We can close it even if it is sys.stdin
sys.exit(0)