diff --git a/README.md b/README.md index 3f9f0a8..32da5b1 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ In addition to providing a complete interface between Python and PaDEL-Descripto ### SMILES to Descriptors/Fingerprints -The "from_smiles" function accepts a SMILES string as an argument, and returns a Python dictionary with descriptor/fingerprint names/values as keys/values respectively. +The "from_smiles" function accepts a SMILES string or list of SMILES strings as an argument, and returns a Python dictionary with descriptor/fingerprint names/values as keys/values respectively - if multiple SMILES strings are supplied, "from_smiles" returns a list of dictionaries. ```python from padelpy import from_smiles @@ -41,6 +41,9 @@ from padelpy import from_smiles # calculate molecular descriptors for propane descriptors = from_smiles('CCC') +# calculate molecular descriptors for propane and butane +descriptors = from_smiles(['CCC', 'CCCC']) + # in addition to descriptors, calculate PubChem fingerprints desc_fp = from_smiles('CCC', fingerprints=True) diff --git a/padelpy/__init__.py b/padelpy/__init__.py index ef2e9a9..fe3e8f6 100644 --- a/padelpy/__init__.py +++ b/padelpy/__init__.py @@ -1,3 +1,3 @@ from padelpy.wrapper import padeldescriptor from padelpy.functions import from_mdl, from_smiles -__version__ = '0.1.7' +__version__ = '0.1.8' diff --git a/padelpy/functions.py b/padelpy/functions.py index 4448bd7..cacfb51 100644 --- a/padelpy/functions.py +++ b/padelpy/functions.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # padelpy/functions.py -# v.0.1.7 +# v.0.1.8 # Developed in 2019 by Travis Kessler # # Contains various functions commonly used with PaDEL-Descriptor @@ -15,30 +15,42 @@ from os import remove from re import compile, IGNORECASE from time import sleep +import warnings # PaDELPy imports from padelpy import padeldescriptor -def from_smiles(smiles: str, output_csv: str = None, descriptors: bool = True, - fingerprints: bool = False, timeout: int = 12) -> OrderedDict: +def from_smiles(smiles, output_csv: str = None, descriptors: bool = True, + fingerprints: bool = False, timeout: int = 60) -> OrderedDict: ''' from_smiles: converts SMILES string to QSPR descriptors/fingerprints Args: - smiles (str): SMILES string for a given molecule + smiles (str, list): SMILES string for a given molecule, or a list of + SMILES strings output_csv (str): if supplied, saves descriptors to this CSV file descriptors (bool): if `True`, calculates descriptors fingerprints (bool): if `True`, calculates fingerprints timeout (int): maximum time, in seconds, for conversion Returns: - OrderedDict: descriptors/fingerprint labels and values + list or OrderedDict: if multiple SMILES strings provided, returns a + list of OrderedDicts, else single OrderedDict; each OrderedDict + contains labels and values for each descriptor generated for each + supplied molecule ''' timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3] with open('{}.smi'.format(timestamp), 'w') as smi_file: - smi_file.write(smiles) + if type(smiles) == str: + smi_file.write(smiles) + elif type(smiles) == list: + smi_file.write('\n'.join(smiles)) + else: + raise RuntimeError('Unknown input format for `smiles`: {}'.format( + type(smiles) + )) smi_file.close() save_csv = True @@ -64,7 +76,10 @@ def from_smiles(smiles: str, output_csv: str = None, descriptors: bool = True, remove('{}.smi'.format(timestamp)) if not save_csv: sleep(0.5) - remove(output_csv) + try: + remove(output_csv) + except FileNotFoundError as e: + warnings.warn(e, RuntimeWarning) raise RuntimeError(exception) else: continue @@ -78,16 +93,32 @@ def from_smiles(smiles: str, output_csv: str = None, descriptors: bool = True, if not save_csv: remove(output_csv) - if len(rows) == 0: - raise RuntimeError('PaDEL-Descriptor returned no calculated values.' + - ' Ensure the input structure is correct.') + if type(smiles) == list and len(rows) != len(smiles): + raise RuntimeError('PaDEL-Descriptor failed on one or more mols.' + + ' Ensure the input structures are correct.') + elif type(smiles) == str and len(rows) == 0: + raise RuntimeError( + 'PaDEL-Descriptor failed on {}.'.format(smiles) + + ' Ensure input structure is correct.' + ) - del rows[0]['Name'] - return rows[0] + for idx, r in enumerate(rows): + if len(r) == 0: + raise RuntimeError( + 'PaDEL-Descriptor failed on {}.'.format(smiles[idx]) + + ' Ensure input structure is correct.' + ) + + for idx in range(len(rows)): + del rows[idx]['Name'] + + if type(smiles) == str: + return rows[0] + return rows def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, - fingerprints: bool = False, timeout: int = 12) -> list: + fingerprints: bool = False, timeout: int = 60) -> list: ''' from_mdl: converts MDL file into QSPR descriptors/fingerprints; multiple molecules may be represented in the MDL file @@ -134,7 +165,10 @@ def from_mdl(mdl_file: str, output_csv: str = None, descriptors: bool = True, if attempt == 2: if not save_csv: sleep(0.5) - remove(output_csv) + try: + remove(output_csv) + except FileNotFoundError as e: + warnings.warn(e, RuntimeWarning) raise RuntimeError(exception) else: continue diff --git a/padelpy/wrapper.py b/padelpy/wrapper.py index c11390e..fffe6e1 100644 --- a/padelpy/wrapper.py +++ b/padelpy/wrapper.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # padelpy/wrapper.py -# v.0.1.7 +# v.0.1.8 # Developed in 2019 by Travis Kessler # # Contains the `padeldescriptor` function, a wrapper for PaDEL-Descriptor diff --git a/setup.py b/setup.py index 263f783..28e15b4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='padelpy', - version='0.1.7', + version='0.1.8', description='A Python wrapper for PaDEL-Descriptor', url='https://github.com/ecrl/padelpy', author='Travis Kessler', diff --git a/tests/test.py b/tests/test.py index 472d619..357ec65 100644 --- a/tests/test.py +++ b/tests/test.py @@ -13,6 +13,20 @@ def test_from_smiles(self): self.assertAlmostEqual(float(descriptors['MW']), 44.0626, 4) self.assertEqual(int(descriptors['nC']), 3) + def test_multiple_smiles(self): + + smiles = ['CCC', 'CCCC'] + descriptors = from_smiles(smiles) + self.assertEqual(len(descriptors), 2) + self.assertEqual(len(descriptors[0]), 1875) + + def test_errors(self): + + bad_smiles = 'SJLDFGSJ' + self.assertRaises(RuntimeError, from_smiles, bad_smiles) + bad_smiles = ['SJLDFGSJ', 'CCC'] + self.assertRaises(RuntimeError, from_smiles, bad_smiles) + if __name__ == '__main__':