Skip to content

Commit

Permalink
Considers isalhum in pdb_selchain (#40)
Browse files Browse the repository at this point in the history
* Solves #39

* added tests

* added test lines for solvent atoms
  • Loading branch information
joaomcteixeira authored and JoaoRodrigues committed Sep 26, 2019
1 parent eb1baf7 commit 4f6c4b9
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pdbtools/pdb_selchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def check_input(args):
sys.exit(1)

# Validate option
option_set = set([o.upper().strip() for o in option.split(',') if o.strip()])
option_set = set([o.strip() for o in option.split(',') if o.strip().isalnum()])
if not option_set:
emsg = 'ERROR!! You must provide at least one chain identifier\n'
sys.stderr.write(emsg)
Expand Down
204 changes: 204 additions & 0 deletions tests/data/dummy_az09.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
HEADER THIS A DUMMY PDB FOR PDB-TOOLS TESTING
TITLE A RANDOM PDB
COMPND MOL_ID: 1;
SOURCE MOL_ID: 1;
KEYWDS PDB-TOOLS, TEST-PDB
REMARK THIS STRUCTURE CONTAINS 4 CHAINS
REMARK ALL ATOM SERIAL NUMBERS ARE 1 FOR ATOM
REMARK CHAIN A HAS SEGID Z
REMARK DOUBLE OCCUPANCIES ON THE FIRST RESIDUE OF CHAIN A
REMARK CHAIN B HAS NON-CONTINUOUS RESIDUES (SHOULD HAVE A TER STATEMENT)
REMARK CHAIN C DOES NOT HAVE ELEMENTS FOR LAST RESIDUE
REMARK CHAIN C CONTAINS RESIDUES OUT OF ORDER
REMARK CHAIN D IS NUCLEIC ACID AND LACKS A TER STATEMENT
ATOM 1 N ARG b 4 36.898 42.175 -2.688 1.00 0.00 N
ATOM 2 H ARG b 4 37.673 41.570 -2.916 1.00 0.00 H
ATOM 3 H2 ARG b 4 35.929 41.470 -2.758 1.00 0.00 H
ATOM 3 H3 ARG b 4 37.099 42.392 -1.524 1.00 0.00 H
ATOM 3 CA ARG b 4 37.080 43.455 -3.421 1.00 0.00 C
ATOM 3 HA ARG b 4 38.102 43.960 -3.065 1.00 0.00 H
ATOM 3 CB ARG b 4 37.064 43.172 -4.926 1.00 0.00 C
ATOM 3 HB2 ARG b 4 36.937 44.238 -5.450 1.00 0.00 H
ATOM 3 HB3 ARG b 4 36.146 42.550 -5.381 1.00 0.00 H
ATOM 3 CG ARG b 4 38.366 42.367 -5.167 1.00 0.00 C
ATOM 3 HG2 ARG b 4 38.044 41.263 -5.501 1.00 0.00 H
ATOM 3 HG3 ARG b 4 39.214 42.156 -4.350 1.00 0.00 H
ATOM 3 CD ARG b 4 39.191 43.008 -6.234 1.00 0.00 C
ATOM 3 HD2 ARG b 4 39.652 44.053 -5.873 1.00 0.00 H
ATOM 3 HD3 ARG b 4 40.211 42.425 -6.479 1.00 0.00 H
ATOM 3 NE ARG b 4 38.557 43.081 -7.486 1.00 0.00 N
ATOM 3 HE ARG b 4 38.421 41.980 -7.920 1.00 0.00 H
ATOM 3 CZ ARG b 4 38.266 43.951 -8.427 1.00 0.00 C
ATOM 3 NH1 ARG b 4 38.554 45.234 -8.396 1.00 0.00 N
ATOM 3 HH11 ARG b 4 39.085 45.950 -7.607 1.00 0.00 H
ATOM 3 HH12 ARG b 4 38.392 45.917 -9.359 1.00 0.00 H
ATOM 3 NH2 ARG b 4 37.609 43.458 -9.493 1.00 0.00 N
ATOM 3 HH21 ARG b 4 36.480 43.750 -9.739 1.00 0.00 H
ATOM 3 HH22 ARG b 4 38.086 42.813 -10.373 1.00 0.00 H
ATOM 3 C ARG b 4 36.102 44.524 -2.998 1.00 0.00 C
ATOM 3 O ARG b 4 36.577 45.677 -2.879 1.00 0.00 O
ATOM 3 N GLU b 6 34.849 44.167 -2.710 1.00 0.00 N
ATOM 3 H GLU b 6 34.444 43.155 -3.175 1.00 0.00 H
ATOM 3 CA GLU b 6 33.861 45.127 -2.233 1.00 0.00 C
ATOM 3 HA GLU b 6 33.871 46.090 -2.939 1.00 0.00 H
ATOM 3 CB GLU b 6 32.480 44.538 -2.041 1.00 0.00 C
ATOM 3 HB2 GLU b 6 32.305 43.781 -1.133 1.00 0.00 H
ATOM 3 HB3 GLU b 6 31.793 45.504 -1.851 1.00 0.00 H
ATOM 3 CG GLU b 6 31.968 43.831 -3.261 1.00 0.00 C
ATOM 3 HG2 GLU b 6 32.228 42.682 -3.461 1.00 0.00 H
ATOM 3 HG3 GLU b 6 32.059 44.448 -4.281 1.00 0.00 H
ATOM 3 CD GLU b 6 30.440 43.676 -3.190 1.00 0.00 C
ATOM 3 OE1 GLU b 6 29.816 44.208 -2.210 1.00 0.00 O
ATOM 3 OE2 GLU b 6 30.005 43.060 -4.204 1.00 0.00 O
ATOM 3 C GLU b 6 34.180 45.629 -0.820 1.00 0.00 C
ATOM 3 O GLU b 6 33.914 46.775 -0.464 1.00 0.00 O
ATOM 3 N ALA b 7 34.725 44.679 -0.087 1.00 0.00 N
ATOM 3 H ALA b 7 34.813 43.714 -0.371 1.00 0.00 H
ATOM 3 CA ALA b 7 35.081 45.036 1.305 1.00 0.00 C
ATOM 3 HA ALA b 7 34.200 45.604 1.876 1.00 0.00 H
ATOM 3 CB ALA b 7 35.381 43.810 2.140 1.00 0.00 C
ATOM 3 HB1 ALA b 7 34.382 43.229 2.456 1.00 0.00 H
ATOM 3 HB2 ALA b 7 36.167 42.973 1.802 1.00 0.00 H
ATOM 3 HB3 ALA b 7 35.847 44.180 3.181 1.00 0.00 H
ATOM 3 C ALA b 7 36.213 46.067 1.258 1.00 0.00 C
ATOM 3 O ALA b 7 36.287 47.046 2.028 1.00 0.00 O
TER 3 ALA b 7
ATOM 3 N ASN A 1 22.066 40.557 0.420 1.00 0.00 N
ATOM 3 H ASN A 1 21.629 41.305 -0.098 1.00 0.00 H
ATOM 3 H2 ASN A 1 23.236 40.798 0.369 1.00 0.00 H
ATOM 3 H3 ASN A 1 21.866 40.736 1.590 1.00 0.00 H
ATOM 3 CA BASN A 1 20.000 30.000 0.005 0.60 0.00 C
ATOM 3 CA AASN A 1 21.411 39.311 0.054 0.40 0.00 C
ATOM 3 HA ASN A 1 21.274 38.560 0.973 1.00 0.00 H
ATOM 3 CB ASN A 1 19.994 39.763 -0.371 1.00 0.00 C
ATOM 3 HB2 ASN A 1 19.390 40.329 0.493 1.00 0.00 H
ATOM 3 HB3 ASN A 1 19.951 40.426 -1.365 1.00 0.00 H
ATOM 3 CG ASN A 1 18.956 38.712 -0.677 1.00 0.00 C
ATOM 3 OD1 ASN A 1 19.093 37.685 0.011 1.00 0.00 O
ATOM 3 ND2 ASN A 1 17.958 38.797 -1.531 1.00 0.00 N
ATOM 3 HD21 ASN A 1 17.073 39.590 -1.490 1.00 0.00 H
ATOM 3 HD22 ASN A 1 17.746 37.948 -2.337 1.00 0.00 H
ATOM 3 C ASN A 1 22.143 38.629 -1.102 1.00 0.00 C
ATOM 3 O ASN A 1 21.581 38.297 -2.176 1.00 0.00 O
ATOM 3 N ARG A 2 23.408 38.395 -0.829 1.00 0.00 N
ATOM 3 H ARG A 2 23.683 38.199 0.313 1.00 0.00 H
ATOM 3 CA ARG A 2 24.384 37.823 -1.757 1.00 0.00 C
ATOM 3 HA ARG A 2 24.282 38.422 -2.786 1.00 0.00 H
ATOM 3 CB ARG A 2 25.810 37.966 -1.211 1.00 0.00 C
ATOM 3 HB2 ARG A 2 26.567 37.565 -2.049 1.00 0.00 H
ATOM 3 HB3 ARG A 2 26.001 37.209 -0.306 1.00 0.00 H
ATOM 3 CG ARG A 2 26.164 39.436 -0.931 1.00 0.00 C
ATOM 3 HG2 ARG A 2 26.305 39.921 -2.015 1.00 0.00 H
ATOM 3 HG3 ARG A 2 25.398 40.159 -0.373 1.00 0.00 H
ATOM 3 CD ARG A 2 27.434 39.527 -0.075 1.00 0.00 C
ATOM 3 HD2 ARG A 2 27.330 38.901 0.945 1.00 0.00 H
ATOM 3 HD3 ARG A 2 28.442 39.042 -0.507 1.00 0.00 H
ATOM 3 NE ARG A 2 27.571 40.926 0.442 1.00 0.00 N
ATOM 3 HE ARG A 2 27.306 41.176 1.574 1.00 0.00 H
ATOM 3 CZ ARG A 2 28.171 41.856 -0.298 1.00 0.00 C
ATOM 3 NH1 ARG A 2 28.694 41.561 -1.484 1.00 0.00 N
ATOM 3 HH11 ARG A 2 28.089 41.809 -2.480 1.00 0.00 H
ATOM 3 HH12 ARG A 2 29.766 41.066 -1.634 1.00 0.00 H
ATOM 3 NH2 ARG A 2 28.285 43.099 0.095 1.00 0.00 N
ATOM 3 HH21 ARG A 2 27.536 43.967 -0.232 1.00 0.00 H
ATOM 3 HH22 ARG A 2 28.955 43.437 1.019 1.00 0.00 H
ATOM 3 C ARG A 2 24.061 36.421 -2.189 1.00 0.00 C
ATOM 3 O ARG A 2 24.411 36.106 -3.325 1.00 0.00 O
ATOM 3 N GLU A 3 23.456 35.570 -1.408 1.00 0.00 N
ATOM 3 H GLU A 3 23.028 35.844 -0.334 1.00 0.00 H
ATOM 3 CA GLU A 3 23.064 34.219 -1.780 1.00 0.00 C
ATOM 3 HA GLU A 3 23.679 33.794 -2.710 1.00 0.00 H
ATOM 3 CB GLU A 3 23.067 33.293 -0.568 1.00 0.00 C
ATOM 3 HB2 GLU A 3 22.647 32.197 -0.811 1.00 0.00 H
ATOM 3 HB3 GLU A 3 22.272 33.605 0.271 1.00 0.00 H
ATOM 3 CG GLU A 3 24.431 33.215 0.088 1.00 0.00 C
ATOM 3 HG2 GLU A 3 24.298 32.589 1.101 1.00 0.00 H
ATOM 3 HG3 GLU A 3 25.122 34.127 0.432 1.00 0.00 H
ATOM 3 CD GLU A 3 25.340 32.327 -0.714 1.00 0.00 C
ATOM 3 OE1 GLU A 3 24.891 31.745 -1.724 1.00 0.00 O
ATOM 3 OE2 GLU A 3 26.527 32.135 -0.455 1.00 0.00 O
ATOM 3 C GLU A 3 21.682 34.241 -2.380 1.00 0.00 C
ATOM 3 O GLU A 3 21.239 33.190 -2.786 1.00 0.00 O
TER 3 GLU A 3
ATOM 3 N ARG 1 5 36.898 42.175 -2.688 1.00 0.00 N
ATOM 3 H ARG 1 5 37.673 41.570 -2.916 1.00 0.00 H
ATOM 3 H2 ARG 1 5 35.929 41.470 -2.758 1.00 0.00 H
ATOM 3 H3 ARG 1 5 37.099 42.392 -1.524 1.00 0.00 H
ATOM 3 CA ARG 1 5 37.080 43.455 -3.421 1.00 0.00 C
ATOM 3 HA ARG 1 5 38.102 43.960 -3.065 1.00 0.00 H
ATOM 3 CB ARG 1 5 37.064 43.172 -4.926 1.00 0.00 C
ATOM 3 HB2 ARG 1 5 36.937 44.238 -5.450 1.00 0.00 H
ATOM 3 HB3 ARG 1 5 36.146 42.550 -5.381 1.00 0.00 H
ATOM 3 CG ARG 1 5 38.366 42.367 -5.167 1.00 0.00 C
ATOM 3 HG2 ARG 1 5 38.044 41.263 -5.501 1.00 0.00 H
ATOM 3 HG3 ARG 1 5 39.214 42.156 -4.350 1.00 0.00 H
ATOM 3 CD ARG 1 5 39.191 43.008 -6.234 1.00 0.00 C
ATOM 3 HD2 ARG 1 5 39.652 44.053 -5.873 1.00 0.00 H
ATOM 3 HD3 ARG 1 5 40.211 42.425 -6.479 1.00 0.00 H
ATOM 3 NE ARG 1 5 38.557 43.081 -7.486 1.00 0.00 N
ATOM 3 HE ARG 1 5 38.421 41.980 -7.920 1.00 0.00 H
ATOM 3 CZ ARG 1 5 38.266 43.951 -8.427 1.00 0.00 C
ATOM 3 NH1 ARG 1 5 38.554 45.234 -8.396 1.00 0.00 N
ATOM 3 HH11 ARG 1 5 39.085 45.950 -7.607 1.00 0.00 H
ATOM 3 HH12 ARG 1 5 38.392 45.917 -9.359 1.00 0.00 H
ATOM 3 NH2 ARG 1 5 37.609 43.458 -9.493 1.00 0.00 N
ATOM 3 HH21 ARG 1 5 36.480 43.750 -9.739 1.00 0.00 H
ATOM 3 HH22 ARG 1 5 38.086 42.813 -10.373 1.00 0.00 H
ATOM 3 C ARG 1 5 36.102 44.524 -2.998 1.00 0.00 C
ATOM 3 O ARG 1 5 36.577 45.677 -2.879 1.00 0.00 O
ATOM 3 N GLU 1 2 34.849 44.167 -2.710 1.00 0.00 N
ATOM 3 H GLU 1 2 34.444 43.155 -3.175 1.00 0.00 H
ATOM 3 CA GLU 1 2 33.861 45.127 -2.233 1.00 0.00 C
ATOM 3 HA GLU 1 2 33.871 46.090 -2.939 1.00 0.00 H
ATOM 3 CB GLU 1 2 32.480 44.538 -2.041 1.00 0.00 C
ATOM 3 HB2 GLU 1 2 32.305 43.781 -1.133 1.00 0.00 H
ATOM 3 HB3 GLU 1 2 31.793 45.504 -1.851 1.00 0.00 H
ATOM 3 CG GLU 1 2 31.968 43.831 -3.261 1.00 0.00 C
ATOM 3 HG2 GLU 1 2 32.228 42.682 -3.461 1.00 0.00 H
ATOM 3 HG3 GLU 1 2 32.059 44.448 -4.281 1.00 0.00 H
ATOM 3 CD GLU 1 2 30.440 43.676 -3.190 1.00 0.00 C
ATOM 3 OE1 GLU 1 2 29.816 44.208 -2.210 1.00 0.00 O
ATOM 3 OE2 GLU 1 2 30.005 43.060 -4.204 1.00 0.00 O
ATOM 3 C GLU 1 2 34.180 45.629 -0.820 1.00 0.00 C
ATOM 3 O GLU 1 2 33.914 46.775 -0.464 1.00 0.00 O
ATOM 3 N MET 1 -1 43.010 -16.998 71.911 1.00 54.34
ATOM 3 CA MET 1 -1 42.850 -16.494 70.506 1.00 52.98
ATOM 3 C MET 1 -1 41.752 -17.205 69.684 1.00 52.05
ATOM 3 O MET 1 -1 41.560 -18.418 69.777 1.00 54.00
ATOM 3 CB MET 1 -1 44.170 -16.675 69.746 1.00 54.15
ATOM 3 CG MET 1 -1 44.178 -16.031 68.376 0.70 53.45
ATOM 3 SD MET 1 -1 45.459 -16.671 67.274 0.70 54.81
ATOM 3 CE MET 1 -1 46.644 -15.335 67.447 0.50 56.04 ABCD
TER 3 MET 1 -1
ATOM 3 P DT D 2 36.556 19.296 31.761 1.00247.39 Z
ATOM 3 OP1 DT D 2 37.512 20.431 31.873 1.00246.92 Z
ATOM 3 OP2 DT D 2 36.121 18.841 30.413 1.00249.27 Z
ATOM 3 O5' DT D 2 37.156 18.071 32.587 1.00240.73 Z
ATOM 3 C5' DT D 2 37.291 18.167 34.022 1.00234.60 Z
ATOM 3 C4' DT D 2 36.894 16.884 34.720 1.00230.04 Z
ATOM 3 O4' DT D 2 35.483 16.586 34.583 1.00223.51 Z
ATOM 3 C3' DT D 2 37.637 15.617 34.271 1.00231.80 Z
ATOM 3 O3' DT D 2 38.514 15.208 35.326 1.00243.58 Z
ATOM 3 C2' DT D 2 36.528 14.599 34.046 1.00225.69 Z
ATOM 3 C1' DT D 2 35.377 15.199 34.824 1.00221.02 Z
ATOM 3 N1 DT D 2 34.026 14.737 34.425 1.00216.83 Z
ATOM 3 C2 DT D 2 33.171 14.323 35.424 1.00212.97 Z
ATOM 3 O2 DT D 2 33.456 14.374 36.611 1.00208.83 Z
ATOM 3 N3 DT D 2 31.953 13.865 34.982 1.00211.37 Z
ATOM 3 C4 DT D 2 31.522 13.771 33.670 1.00212.96 Z
ATOM 3 O4 DT D 2 30.395 13.347 33.427 1.00214.43 Z
ATOM 3 C5 DT D 2 32.479 14.201 32.672 1.00213.13 Z
ATOM 3 C7 DT D 2 32.100 14.139 31.225 1.00210.53 Z
ATOM 3 C6 DT D 2 33.671 14.646 33.094 1.00215.41 Z
HETATM 4 CA CA A 301 44.698 -0.753 65.490 1.00 57.81
HETATM 5 O HOH A 302 61.179 -8.803 36.085 1.00 51.61
HETATM 6 O HOH A 303 64.052 21.644 20.397 1.00 65.18
HETATM 7 O HOH b 301 11.052 -12.419 29.700 1.00 73.70
HETATM 8 O HOH 1 301 -8.172 -22.003 57.197 1.00 70.53
HETATM 9 O HOH 1 302 36.020 -23.583 73.186 1.00 24.82
HETATM 10 O HOH 1 303 41.203 -28.852 57.698 1.00 53.16
HETATM 11 O HOH 1 304 -4.491 -9.687 56.752 1.00 55.08
HETATM 12 O HOH 1 305 24.561 0.532 70.565 1.00 44.77
CONECT 10 11
CONECT 1 2 4 5

45 changes: 45 additions & 0 deletions tests/test_pdb_selchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,51 @@ def test_one_option(self):
self.assertEqual(len(self.stdout), 76) # selected c.A
self.assertEqual(len(self.stderr), 0) # no errors

def test_one_option_CAPS_lowercase(self):
"""$ pdb_selchain -A data/dummy_az09.pdb"""

# Simulate input
# pdb_selchain dummy_az09.pdb
sys.argv = ['', '-A', os.path.join(data_dir, 'dummy_az09.pdb')]

# Execute the script
self.exec_module()

# Validate results
self.assertEqual(self.retcode, 0) # ensure the program exited OK.
self.assertEqual(len(self.stdout), 76) # selected c.A
self.assertEqual(len(self.stderr), 0) # no errors

def test_one_option_lowercase(self):
"""$ pdb_selchain -b data/dummy_az09.pdb"""

# Simulate input
# pdb_selchain dummy.pdb
sys.argv = ['', '-b', os.path.join(data_dir, 'dummy_az09.pdb')]

# Execute the script
self.exec_module()

# Validate results
self.assertEqual(self.retcode, 0) # ensure the program exited OK.
self.assertEqual(len(self.stdout), 69) # selected c.b
self.assertEqual(len(self.stderr), 0) # no errors

def test_one_option_digit(self):
"""$ pdb_selchain -1 data/dummy_az09.pdb"""

# Simulate input
# pdb_selchain dummy.pdb
sys.argv = ['', '-1', os.path.join(data_dir, 'dummy_az09.pdb')]

# Execute the script
self.exec_module()

# Validate results
self.assertEqual(self.retcode, 0) # ensure the program exited OK.
self.assertEqual(len(self.stdout), 71) # selected c.1
self.assertEqual(len(self.stderr), 0) # no errors

def test_multiple(self):
"""
$ pdb_selchain -A,B data/dummy.pdb
Expand Down

0 comments on commit 4f6c4b9

Please sign in to comment.