From 4b355525a5b505cb69cac21ba2a19b763b742937 Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Mon, 24 Jan 2022 09:22:13 -0700 Subject: [PATCH] 1. LGTM fixes --- Example_workflows.zip | Bin 483188 -> 483068 bytes README.md | 10 +- aqme/aqme.py | 6 +- aqme/cheshire_lookup.py | 213 ------- aqme/cmin.py | 8 +- aqme/crest.py | 14 +- aqme/csearch.py | 2 - aqme/dbstep_conf.py | 125 ----- aqme/energy.py | 72 --- aqme/filter.py | 1172 +++++++++++++++++++-------------------- aqme/grapher.py | 1 - aqme/mainf.py | 11 - aqme/nmr.py | 388 ------------- aqme/qcorr.py | 34 +- aqme/qdesc.py | 151 ----- aqme/qprep.py | 1 - aqme/turbomole.py | 1164 -------------------------------------- aqme/utils.py | 4 - 18 files changed, 618 insertions(+), 2758 deletions(-) delete mode 100644 aqme/cheshire_lookup.py delete mode 100644 aqme/dbstep_conf.py delete mode 100644 aqme/energy.py delete mode 100644 aqme/nmr.py delete mode 100644 aqme/qdesc.py delete mode 100644 aqme/turbomole.py diff --git a/Example_workflows.zip b/Example_workflows.zip index 9951b5f7e0222425878d7894713fbd94cef1020e..174b0a6f54446e3408d5e9bc76a3355d6ea37679 100644 GIT binary patch delta 3241 zcmZ{ncTkhd8pcxy9~h)7y?F>AAc=IPDnbOMC}5;SKu|(&f>IdDG0<2Lym`}oxUrYK{Kt39nBljwoa){+s!z08K8WUYs87XFG1D;pa=VsORuVHs%DD>0alMk+xxH}_b~%#hS>sXF^vXcIv^>@@ z{<gf+(}YYPy#CQL^78T+9+v^apZon%W(zPA?hT+-WhE zY9tG6r`Q!!(Wy#78U6Ams4vV0sQW#>Yi;~xtez#{?blyoXo;4)S1T8_n=BR2t>p7R zsiTI-%ULNze0ptU)Y^)8Sh}H0gd2^JgVntQN%(BIMTmqC!KVlQ0^zU=EJCl$tr1o< zs>JfB3t;&pyLKm$PQG3@v37+qYID6x6eqrJP<~L4bYZSXI?IAB>F$;(MRlhL( ziTa6?GVT2m+J zH;!I4I5*J$0EYe+R-M*gkA(WnKGB+H?R=Kpz)FgQW^V>+fk&hs;!_M8WMXWaK<(x~j zheu^8LY0qu;B@X0PyWR?-O_}eqx6Kx&t2NzFszdHcHT%OX%fc^k>O3vDa6e-52ds=r!_(ta?xSD_^*>c7RT0ZAw*2*NHQ^Bix?qfSM_Mq9g z`V!R9saaElYc^a)g#E3_eQM|Gb5uB-&<{?>r}uX)eOGnrYX@yoZLJWCqoIOXli85T1zw&t=lv7&^L*6awGE^bOJ z=;~h`_ujI3UTu))$#kblNk%J4rxuZ2`oMWt$$~G{Z3s??Q?3&t*Pm^75a7Zv(b8dpF~l#mqsY z`Nt8`y{H!;q|ucNcGI9C{?%bhu2r-sKb*iHKPEWt?_<9vyU-uWm+S7I1PLk@>Wqk1rS`chL=(5EkDzz8 zj2`aAFY`3k>fJWN)SY-Zb?f|b)ZtFm*dk8tNzcVS7EEZ->TW4Ew+8xC?l_g;I-B|X ztw5=dLNw@Dx#&i@|rX2C6U!I+P2^MuTeM3)+uR-F;q0Ld<&Qjggec!MS)jjdj-=eIYV z%$>$OgDZK4xE~oQUNZvx!d82phS|4$0~=I3`yOYHud^nnY(TTV=PR`0v`(0~0}}pW zMA@MnY1#wz>!(N!FJlh@_nPjF>zh`j25Xma%H0b?+dq~+Frwg;7S+++Kb6cZxt8BN zeC@Iqr?J){!%c zc-qI_b!!nX!jLT=p+L7v&nOI4O82Q4J&8g`<-t&*1)^NZ!@1pd8O zXmhrUU`JoRom*)t0DmTA{Sh&^03qU6Qh?W>hL3K2XY9i8Ba~*sxx=fRF+OMS(5up> z&+*MqPj;|y=)95zUGPRN!b~Cf^s0B}w>1WQe=kwX;|&>k_~ui`il5&F`H|k!RdSNW zV|>T#aTb-rx|xBg7M2&ndLnI;%eM^&gR#-sq=Y-tB*DkXAmkt^v!~c#=C;N3WZ`Zz z=DTDP^65v?>pZz;$BwuTesQRy%KPG{`>lpGW$2Khff1c>CX yN5dH649Q{tlaE(`UDIyBa?zoG?cNqVF=rUD**OIBFS7_^-a80^SYH4b(Z2!I%EIXY delta 3249 zcmZXWc{J4BAIHbcm`Mx~vW}GHLG~?~A;~_T7=vtCLTR&RA47IZ%#6OJvTu`NDp^B{ zDHYiw*?BC92pIX?s>iL`*Xhcd+s^+e6}0Ke>91wJM+pt)WnUw`VyqG z9|Za!3j$dIU5#LY43#4Q-UN;$T=g9~&bO28YT)Ug-YC#WG&RGqK>%A<>_TyeCS9D{ z$IWW$k8V5V3aOeofuBp-NQ!iY8peVh zDP9UsKF=!uEH8HaxIe+fg9#7#ahp^kRep&t2aOKLV>~Hlkt>tZa`KIBFJITAXnNk$ z(#+6IiUKin)o3WlQfWHhN6mpql?1F#DLQo}7SD1IA zN*@f!n4MpQ4fe(;LkX6VR%BUBS5gFr<jYkez|X}O1Y-_!cWfkjNV=PgV?b5Gl&weCngX154U_}YfT z*dY~bM4O|F;s`RuNimh$pxQEjRcmQhvmG#Yarg4tX{z$8CQKT{-B>ndYD~!&qO59h zV1z~h*(wj0Wws_B+I%Q1Fo?nFFF~p#9eSaMoR0|%y<2=n^1o@0rx7gekK*% zQOv!PI7%DaCNwye-rX-gBX4>Ai+>^a%hK;cRvD)0*`{Gq4NIH?uz6~H9mkytE+ejt zI^W2#PBxUKWJ>y9C(Rys$ol!EKn!f?=|3JM8Qw+3L9&hW#{P6fAO(i9GDGN=#LFvmV z);!1Dflv-f`_NR}3sbSe`fd5D3MNFJr&p8lknOZ!EVFcu)C%n>|$#@ogi{E`tAd? z4rQg19+`c?>ULH&@?OIE=Lz|npf?>q?zM{w`3A)$Pr`UY(T+<@ShNs>a(zUm-*|rM zmFw8}4m?UArsnzQC8%x;&2v29tKSTEaXH%Dq!tGOD9W~zUlLUx_=j;A2jWGOW%ga( zA&i_|N;qX-k6M!|sEDn^i)FG$l4~^Zt zhSV61+sLFUyQlPj+cz)UnTIYfW447O2x=0S%9mj(CxoxGmu!4OBOu9tn3s2=@4N^) znvMT}taCjU!A#$2k?A;MSvgPfnn}%9Ii0#rr{15Utf_CUjg{!Llj}MO+ZnjP2kRoE zPQh&Sk1HF2&y;4b7iCdGk6T7|@Jc169S+x~ns^`S0+z+?k*_73^5{hjmqnB3gf9e) zyKGlhvf;q2-$;QM-olOaR77U{T=`f{L)?l_Wc{EnRcrqi{T>myT06zV-&&a;7-dd9 zG+VOi!9V*~H*-!kB1lz_!2I%R-}K^(2$PKpM~2s6W=)D)=h=r}KOdMx&%4Rk2+euPzSViog%9X<^-5YkcKW2t*huei#@Y4!YFQJ0^>c82WjFf|rTaFqM$+Q5(X% zf#H3Z11+mZB-axEsBYp^CyrH16U!YuXE7@gPm|Z#5583^*zWftBLtgPC-b$Nk&Dt)m}jxPK7Ym+#O-t72cZ^%isdsd zc=l_lKh#wG(8<)P!4Ex5;@ldpDOmjKp{aiv;Td+%@Y9i3f`sAV_(1-c7mB8xOi1qM zN%kAM@-?pdGmayIpw|UGQ-6-H>m++ZD*KGAvw{Yj>qXhF_D_%Dw;A}z@n}*g^180a z%0Y7B4JK{_!#Qm%QdrrWyTrWIifP@Y_tnt*f?It}l{~7)e=Ovb7za9NnNuyu44bk? zncMx<;n(9GzX&Ikm0qf~KCYk{dupbJo%rly)|3MWqUl8};hIE~;0uX*L2o}}wPEq$ zp_-zqzD?e&UCRzzf0~ia#hxQPBbS4MR|+XBJjSJs_$`Kt9%FJ?-G06L2G+O#zAL)Q z-YOW*J<`e$S(Mp)&u!<2It+uNc-W2=kqRd zVkK;;E5vky+!QbN-qs3yN!5S_PX6^^N8^Qd0jVuxi*90$e{^lJl z98St*+YdazvVsF#|GA+H=HcFB6#(tP`cSzOoEz-)%Nz$fZCG*I-#~JP3IiA%TpmJL zWFvJ$w1LMsxIS1r1z5(xeZbCbd#f(M)dc1Sifmc2R2T$c+XkBfqIN9Gt%Uq4BG|$8 zAPgk*7v%au*@104xGH2d{I}r)!t7ZE;F>+72@Ki8k3sDI3HoQ@24L~O2F{JM9{{W!SsBv*XlY<$Ps0}eJIx2&bb>2` z<-hIeam8i=pT4$`r7~i diff --git a/README.md b/README.md index 53a220b2..63309f08 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ [![Build Status](https://img.shields.io/travis/com/jvalegre/aqme?label=Linux%20CI&logo=Travis)](https://travis-ci.com/github/jvalegre/aqme) [![Tests](https://img.shields.io/static/v1?label=Tests&message=104&color=green&logo=Travis)](https://travis-ci.com/github/jvalegre/aqme) [![Codecov](https://img.shields.io/codecov/c/github/jvalegre/aqme?label=Codecov&logo=codecov)](https://codecov.io/gh/jvalegre/aqme) + [![CodeFactor](https://img.shields.io/codefactor/grade/github/jvalegre/aqme?label=Codefactor%20grade&logo=codefactor)](https://www.codefactor.io/repository/github/jvalegre/aqme/overview/master) -[![Codacy](https://img.shields.io/codacy/grade/047e9c6001a84713a82e180669e14c98?label=Codacy%20grade&logo=codacy)](https://www.codacy.com/manual/jvalegre/aqme?utm_source=github.com&utm_medium=referral&utm_content=jvalegre/aqme&utm_campaign=Badge_Grade) +[![Codacy](https://img.shields.io/codacy/grade/047e9c6001a84713a82e180669e14c98?label=Codacy%20grade&logo=codacy)](https://www.codacy.com/gh/jvalegre/aqme/dashboard?utm_source=github.com&utm_medium=referral&utm_content=jvalegre/aqme&utm_campaign=Badge_Grade) [![lgtm](https://img.shields.io/lgtm/grade/python/github/jvalegre/aqme?label=LGTM%20grade&logo=lgtm)](https://lgtm.com/projects/g/jvalegre/aqme/context:python) -![Size](https://img.shields.io/github/repo-size/jvalegre/aqme?label=Size) -![PyPI](https://img.shields.io/pypi/v/aqme?label=PyPI&logo=pypi) -![PyPI Downloads](https://img.shields.io/pypi/dm/aqme?label=PyPI%20downloads&logo=pypi&style=social) +![Size](https://img.shields.io/github/languages/code-size/jvalegre/aqme) +[![PyPI](https://img.shields.io/pypi/v/aqme?color=blue&label=PyPI&logo=pypi)](https://pypi.org/project/aqme) # Automated Quantum Mechanical Environments (AQME) @@ -68,7 +68,7 @@ Descriptor generator from multiple input types such as SMILES, log files, xyz, e ## Quickstart ### Using AQME in Jupyter Notebooks -There are multiple ready-to-use workflows presented as jupyter notebooks in the 'Example_workflows' folder. Some examples are: +There are multiple ready-to-use workflows presented as jupyter notebooks in the 'Example_workflows.zip' file. Some examples are: * QCORR_workflows.ipynb (QCORR analysis of Gaussian output files): 1) Analyze optimized QM ground and transition states and create json files of normally terminated files with no errors, extra imaginary frequencies, duplicates, etc. 2) Use json files to generate single-point energy corrections with multiple levels of theory, charge and multiplicity through for loops: diff --git a/aqme/aqme.py b/aqme/aqme.py index 9e607a37..c7e25b0d 100644 --- a/aqme/aqme.py +++ b/aqme/aqme.py @@ -26,8 +26,6 @@ import os import time -import json -import pandas as pd from pathlib import Path from aqme.argument_parser import parser_args @@ -35,7 +33,6 @@ csearch_main, geom_rules_main, qprep_main, - move_sdf_main, graph_main, geom_par_main, nmr_main, @@ -46,9 +43,8 @@ cclib_main, cmin_main, ) -from aqme.utils import Logger, get_filenames +from aqme.utils import Logger from aqme.qcorr import qcorr, json2input -from aqme.qprep import qprep def main(): diff --git a/aqme/cheshire_lookup.py b/aqme/cheshire_lookup.py deleted file mode 100644 index 1674e9cc..00000000 --- a/aqme/cheshire_lookup.py +++ /dev/null @@ -1,213 +0,0 @@ -import datetime -import sys -import pandas as pd - -## convert HTML table to Pandas dataframe -def parse_html_table(table): - """ - takes in an html table from the cheshire database and transforms it into - a pandas.Dataframe - - Parameters - ---------- - table : bs4.BeautifulSoup - A data structure obtained from parsing an html file containing a table - of scaling factors. - - Returns - ------- - pandas.Dataframe - A Dataframe version of the table contained in the html - """ - n_rows = 0 - n_columns = 0 - column_names = [] - - ## the tables contain different numbers of columns; get their names - rows = table.find_all('tr') - tds = rows[1].find_all('td') - - # avoid having two column names the same - - for i in range(0,len(tds)): - column_name = tds[i].get_text().split('\n')[0] - if i == 2: - column_names.append(f'scale_{column_name}') - elif i == 3 and column_name.upper() == '13C': - column_names.append(f'scale_{column_name}') - else: - column_names.append(column_name) - - n_columns = len(column_names) - - # Determine the number of rows in the table - i = 1 - for row in rows[2:]: - td_tags = row.find_all('td') - if len(td_tags) == n_columns: - n_rows+=1 - - columns = column_names - df = pd.DataFrame(columns=columns,index=list(range(n_rows))) - - row_marker = 0 - for row in rows[2:]: - column_marker = 0 - columns = row.find_all('td') - for column in columns: - #print row_marker, column_marker, ' '.join(column.get_text().split()) - df.iat[row_marker,column_marker] = ' '.join(column.get_text().split()) - column_marker += 1 - if len(columns) > 0: - row_marker += 1 - return df - -## Parse from local offline version of the CHESHIRE scaling factors html or directly from the web -def cheshire(online, nucleus, opt_method, opt_basis, opt_solv, nmr_method, nmr_basis, nmr_solv, nmr_aos,log): - """ - Checks the cheshire database (either downloading or from a pre-downloaded - file and returns the scaling factor for the nucleus specified if the theory - provided and the ones in the database match. - - Parameters - ---------- - online : bool - Control the download or not of the cheshire database. - nucleus : str - [description] - opt_method : str - method of optimization - opt_basis : str - basis of optimization - opt_solv : str or None - solvent used in the optimization - nmr_method : str - method - nmr_basis : str - basis used for nmr prediction - nmr_solv : str - solvent used for nmr prediction - nmr_aos : str - aos used for nmr prediction - log : aqme.Logger - Log instance. - - Returns - ------- - float ? - scaling factor - """ - try: - from bs4 import BeautifulSoup - except (ModuleNotFoundError,AttributeError): - log.write('\nThe bs4 module is not installed correctly - CHESHIRE search is not available') - sys.exit() - - ## current time for printing - now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') - - if online: - import requests - url = 'http://cheshirenmr.info/ScalingFactors.htm' - log.write(f" READING FROM {url} {now}") - response = requests.get(url) - html = BeautifulSoup(response.text, "lxml") - else: - log.write(f" READING FROM LOCAL VERSION OF CHESHIRE {now}") - with open('./scaling_factors.html','r') as F: - txt = F.read() - html = BeautifulSoup(txt, "lxml") - - calc_opt = f'{opt_method.upper()}/{opt_basis}' - calc_nmr = f'{nmr_method.upper()}/{nmr_basis}' - - # Log the theory - mid_name= f'{nmr_aos.upper()}-{calc_nmr}' - if nmr_solv == None: - top_name = '' - end_name = calc_opt - log.write(f' {mid_name}//{calc_opt}') - else: - top_name = f'{nmr_solv[0].upper()}({nmr_solv[1]})-' - if opt_solv == None: - end_name = calc_opt - else: - end_name = f'{opt_solv[0].upper()}({opt_solv[1]})-{calc_opt}' - log.write(f' {top_name}{mid_name}//{end_name}') - - for table in html.find_all('table'): - - id = table['id'] - - scaling_table = parse_html_table(table) - - # solvent details for the CHESHIRE database - # manually entered would be better to parse from HTML - will add in due course - if id == 'table1a': scrf = ['pcm', 'acetone'] - elif id == 'table1b': scrf = ['smd', 'chloroform'] - elif id == 'table1c': scrf = ['cpcm', 'chloroform'] #UAKS radii, nosymmcav - elif id == 'table1d': scrf = ['smd', 'chloroform'] - elif id == 'table2': scrf = ['pcm', 'chloroform'] - elif id == 'table3a': scrf = ['pcm', 'toluene'] - elif id == 'table5-acetone': scrf = ['pcm', 'acetone'] - elif id == 'table5-acetonitrile': scrf = ['pcm', 'acetonitrile'] - elif id == 'table5-benzene': scrf = ['pcm', 'benzene'] - elif id == 'table5-chloroform': scrf = ['pcm', 'chloroform'] - elif id == 'table5-dichloromethane': scrf = ['pcm', 'dichloromethane'] - elif id == 'table5-dimethylsulfoxide': scrf = ['pcm', 'dimethylsulfoxide'] - elif id == 'table5-methanol': scrf = ['pcm', 'methanol'] - elif id == 'table5-tetrahydrofuran': scrf = ['pcm', 'tetrahydrofuran'] - elif id == 'table5-toluene': scrf = ['pcm', 'toluene'] - elif id == 'table5-water': scrf = ['pcm', 'water'] - elif id == 'table7': scrf = ['smd', 'chloroform'] - else: scrf = None - - # Look for a match between calculation and database (case insensitive) - # Returns the first match and then breaks - for index, row in scaling_table.iterrows(): - # Get database's NMR theory - db_nmr = row['NMR'].lower().split()[0].split("/") - if 'scrf' in row['NMR'].lower(): - db_nmr_solv = scrf - else: - db_nmr_solv = None - same_nmr_solvent = db_nmr_solv == nmr_solv - - if 'cgst' in row['NMR'].lower(): - db_nmr_aos = 'CGST' - else: - db_nmr_aos = 'GIAO' - same_nmr_aos = db_nmr_aos.lower() == nmr_aos.lower() - - try: - [db_nmr_method, db_nmr_basis] = db_nmr - except ValueError: - same_nmr_theory = False - else: - if db_nmr_method[0] == '#': - db_nmr_method = db_nmr_method[1:] - same_nmr_method = db_nmr_method.lower() == nmr_method.lower() - same_nmr_basis = db_nmr_basis.lower() == nmr_basis.lower() - same_nmr_theory = same_nmr_method and same_nmr_basis and same_nmr_aos - - # Get database's OPT theory - db_opt = row['Geometry'].lower().split()[0].split("/") - if 'scrf' in row['Geometry'].lower(): - db_opt_solv = scrf - else: - db_opt_solv = None - same_opt_solvent = db_opt_solv == opt_solv - - try: - [db_opt_method, db_opt_basis] = db_opt - except ValueError: - pass - else: - if db_opt_method[0] == '#': - db_opt_method = db_opt_method[1:] - same_opt_method = db_opt_method.lower() == opt_method.lower() - same_opt_basis = db_opt_basis.lower() == opt_basis.lower() - same_opt_theory = same_opt_method and same_opt_basis - - if same_nmr_theory and same_opt_theory and same_nmr_solvent and same_opt_solvent: - log.write(f' --- MATCH --- {id.upper()}') - return row['scale_'+nucleus] diff --git a/aqme/cmin.py b/aqme/cmin.py index a8849ba5..788a2b88 100644 --- a/aqme/cmin.py +++ b/aqme/cmin.py @@ -3,7 +3,6 @@ # in conformer minimization with xTB and ANI # #####################################################. -from operator import itemgetter import os import sys import numpy as np @@ -13,7 +12,6 @@ from rdkit.Geometry import Point3D import time from aqme.argument_parser import set_options -from aqme.filter import CompoundFilter, EnergyFilter, RMSDFilter from aqme.utils import ( set_metal_atomic_number, Logger, @@ -53,12 +51,15 @@ def __init__( w_dir_initial=os.getcwd(), varfile=None, charge_default=0, + program=None, **kwargs, ): + self.mols = mols self.name = name self.w_dir_initial = w_dir_initial self.charge_default = charge_default + self.program = program if "options" in kwargs: self.args = kwargs["options"] @@ -640,7 +641,8 @@ def mult_min(name, args, program, charge, log, w_dir_initial): name_mol = os.path.basename(name).split("_" + args.CSEARCH)[0] # bar.next() - obj = cmin(inmols, name_mol, w_dir_initial, args.varfile, charge, program) + obj = cmin(mols=inmols, name=name_mol, w_dir_initial=w_dir_initial, varfile=args.varfile, + charge_default=charge, program=program) total_data = obj.compute_cmin() return total_data diff --git a/aqme/crest.py b/aqme/crest.py index 746e8c3a..b0db0369 100644 --- a/aqme/crest.py +++ b/aqme/crest.py @@ -6,11 +6,9 @@ ####################################################################### # Python Libraries -import sys, os +import os from rdkit.Chem import rdMolTransforms -import numpy as np import glob -from optparse import OptionParser import subprocess import rdkit from rdkit.Chem import AllChem @@ -125,7 +123,7 @@ def run_xtb( + "," + str(dihedral[4]) ) - xtb_result = subprocess.run(command) + subprocess.run(command) def crest_opt(mol, name, dup_data, dup_data_idx, sdwriter, args, log): @@ -223,7 +221,7 @@ def crest_opt(mol, name, dup_data, dup_data_idx, sdwriter, args, log): command.append("--cbonds") command.append(str(cbonds)) - crest_result = subprocess.run(command) + subprocess.run(command) if cregen: xyzcregenensemble = xyzoutall.split(".xyz")[0] + "_cregen.xyz" @@ -255,7 +253,7 @@ def crest_opt(mol, name, dup_data, dup_data_idx, sdwriter, args, log): "--ewin", str(cregen_ewin), ] - cregen_result = subprocess.run(command) + subprocess.run(command) # converting multiple xyz to single command_run_1 = [ @@ -292,7 +290,7 @@ def crest_opt(mol, name, dup_data, dup_data_idx, sdwriter, args, log): dup_data.at[dup_data_idx, "crest-conformers"] = len(xyz_files) - for f in glob.glob(os.getcwd() + "/" + name + "*.xyz"): - os.remove(f) +# for f in glob.glob(os.getcwd() + "/" + name + "*.xyz"): +# os.remove(f) return 1 diff --git a/aqme/csearch.py b/aqme/csearch.py index dc27a467..d672c4aa 100644 --- a/aqme/csearch.py +++ b/aqme/csearch.py @@ -9,14 +9,12 @@ import subprocess import time from pathlib import Path -import concurrent.futures as futures # RAUL: This is for the main import numpy as np import pandas as pd from rdkit.Chem import AllChem as Chem from rdkit.Chem import rdMolTransforms, PropertyMol, rdDistGeom, Lipinski -from progress.bar import IncrementalBar # RAUL: This is for the main from aqme.filter import filters, ewin_filter, pre_E_filter, RMSD_and_E_filter from aqme.tmbuild import template_embed diff --git a/aqme/dbstep_conf.py b/aqme/dbstep_conf.py deleted file mode 100644 index b221faf2..00000000 --- a/aqme/dbstep_conf.py +++ /dev/null @@ -1,125 +0,0 @@ -#####################################################. -# This file stores all the functions # -# used for genrating all parameters from DBSTEP # -#####################################################. -import sys -import os -import pandas as pd -import subprocess -from pathlib import Path - -from aqme.utils import move_file - -def calculate_db_parameters(qm_files,args,log,w_dir_initial,name_mol,lot,bs): - - try: - from dbstep.Dbstep import dbstep - except (ModuleNotFoundError,AttributeError): - log.write('\nx DBSTEP is not installed correctly - DBSTEP is not available') - sys.exit() - - total_data = pd.DataFrame() - - #find the ring atoms in the File - filelines = open(w_dir_initial+'/'+args.dbstep_cen_lig_file,'r').readlines() - - for counter,log in enumerate(qm_files): - for line in (filelines): - split_line = line.strip().split(',') - if split_line[0] == name_mol: - C = split_line[1] - L = split_line[2] - break - sterics = dbstep(log, atom1=str(C),atom2=str(L), volume=True, sterimol=True, commandline=True) - total_data.at[counter,'Name'] = name_mol - total_data.at[counter,'log'] = log.split('.log')[0] - total_data.at[counter,'bv'] = sterics.bur_vol - total_data.at[counter,'bmax'] = sterics.Bmax - total_data.at[counter,'bmin'] = sterics.Bmin - total_data.at[counter,'L'] = sterics.L - - #creating folder for all molecules to write geom parameter - if str(bs).find('/') > -1: - folder = w_dir_initial + '/QPRED/dbstep_parameters/all_confs_sterics/'+str(lot)+'-'+str(bs).split('/')[0] - else: - folder = w_dir_initial + '/QPRED/dbstep_parameters/all_confs_sterics/'+str(lot)+'-'+str(bs) - - try: - os.makedirs(folder) - except OSError: - if os.path.isdir(folder): - pass - - total_data.to_csv(folder+'/'+name_mol+'-all-steric-data.csv',index=False) - -def calculate_boltz_and_dbstep(val,args,log,name,w_dir,w_dir_initial,lot,bs): - - # GoodVibes must be installed as a module (through pip or conda) - cmd_boltz = ['python','-m', 'goodvibes', '--boltz', '--output', name ] - for file in val: - cmd_boltz.append(file) - subprocess.call(cmd_boltz) - - #writing to coorect places - if str(bs).find('/') > -1: - destination = Path(w_dir_initial+'/QPRED/dbstep_parameters/boltz/'+str(lot)+'-'+str(bs).split('/')[0]) - else: - destination = Path(w_dir_initial+'/QPRED/dbstep_parameters/boltz/'+str(lot)+'-'+str(bs)) - - move_file(destination, os.getcwd(),'Goodvibes_'+name+'.dat') - - if str(bs).find('/') > -1: - dbstep_parm_file = w_dir_initial + '/QPRED/dbstep_parameters/all_confs_sterics/'+str(lot)+'-'+str(bs).split('/')[0]+'/'+name+'-all-steric-data.csv' - else: - dbstep_parm_file = w_dir_initial + '/QPRED/dbstep_parameters/all_confs_sterics/'+str(lot)+'-'+str(bs)+'/'+name+'-all-steric-data.csv' - - - df_dbstep = pd.read_csv(dbstep_parm_file) - - if str(bs).find('/') > -1: - file = w_dir_initial+'/QPRED/dbstep_parameters/boltz/'+str(lot)+'-'+str(bs).split('/')[0]+'/Goodvibes_'+name+'.dat' - else: - file = w_dir_initial+'/QPRED/dbstep_parameters/boltz/'+str(lot)+'-'+str(bs)+'/Goodvibes_'+name+'.dat' - - outlines = open(file,"r").readlines() - #reading the data from boltz fileyiu - for i in range(len(outlines)): - # I remove the NMR from the file names using [0:-4] - if outlines[i].find(' ***************************************************************************************************************************************\n') > -1 and outlines[i-1].find(' Structure') > -1: - start_line = i+1 - elif outlines[i].find(' ***************************************************************************************************************************************\n') > -1: - end_line = i - - boltz_values = pd.DataFrame() - counter = 0 - for i in range(start_line,end_line): - values = outlines[i].split() - boltz_values.at[counter,'Name'] = values[1] - boltz_values.at[counter,'boltz'] = values[-1] - counter +=1 - - # multiply actual file and sum and add write to new FILE - df_dbstep['bv'] = df_dbstep['bv'].astype(float)*boltz_values['boltz'].astype(float) - df_dbstep['bmin'] = df_dbstep['bmin'].astype(float)*boltz_values['boltz'].astype(float) - df_dbstep['bmax'] = df_dbstep['bmax'].astype(float)*boltz_values['boltz'].astype(float) - df_dbstep['L'] = df_dbstep['L'].astype(float)*boltz_values['boltz'].astype(float) - - avg_data = pd.DataFrame() - avg_data.at[0,'Name'] = name - avg_data.at[0,'bv'] = df_dbstep.sum().bv - avg_data.at[0,'bmin'] = df_dbstep.sum().bmin - avg_data.at[0,'bmax'] = df_dbstep.sum().bmax - avg_data.at[0,'L'] = df_dbstep.sum().L - - if str(bs).find('/') > -1: - folder = w_dir_initial + '/QPRED/dbstep_parameters/average_sterics/'+str(lot)+'-'+str(bs).split('/')[0] - else: - folder = w_dir_initial + '/QPRED/dbstep_parameters/average_sterics/'+str(lot)+'-'+str(bs) - - try: - os.makedirs(folder) - except OSError: - if os.path.isdir(folder): - pass - - avg_data.to_csv(folder+'/'+name+'-steric-data.csv',index=False) diff --git a/aqme/energy.py b/aqme/energy.py deleted file mode 100644 index df2fb1e3..00000000 --- a/aqme/energy.py +++ /dev/null @@ -1,72 +0,0 @@ -#####################################################. -# This file stores all the functions # -# used for genrating details for energy # -#####################################################. - -from aqme.utils import move_file -from pathlib import Path - -import pandas as pd -import os -import subprocess - -def calculate_boltz_and_energy(val,args,log,name,w_dir_fin,w_dir_initial,lot,bs): - # GoodVibes must be installed as a module (through pip or conda) - cmd_boltz = ['python','-m', 'goodvibes', '--boltz', '--output', name ] - for file in val: - cmd_boltz.append(file) - subprocess.call(cmd_boltz) - - #writing to coorect places - if str(bs).find('/') > -1: - destination = Path(w_dir_initial+'/QPRED/energy/boltz/'+str(lot)+'-'+str(bs).split('/')[0]) - else: - destination = Path(w_dir_initial+'/QPRED/energy/boltz/'+str(lot)+'-'+str(bs)) - move_file('Goodvibes_'+name+'.dat', destination) - -def calculate_avg_and_energy(val,args,log,name,w_dir_fin,w_dir_initial,w_dir_boltz,lot,bs): - - all_file_data = [] - for file in val: - outlines = open(file,"r").readlines() - - for i in range(len(outlines)): - # I remove the NMR from the file names using [0:-4] - if outlines[i].find(' ***************************************************************************************************************************************\n') > -1 and outlines[i-1].find(' Structure') > -1: - start_line = i+1 - elif outlines[i].find(' ***************************************************************************************************************************************\n') > -1: - end_line = i - - one_file_data,values_avg= [],[] - for i in range(start_line,end_line): - values = outlines[i].split() - for j in range(2,9): - values_avg.append(float(values[j])*float(values[-1])) - one_file_data.append(values_avg) - values_avg = [] - - res = [] - res.append(name) - for j in range(0, len(one_file_data[0])): - tmp = 0 - for i in range(0, len(one_file_data)): - tmp = tmp + one_file_data[i][j] - res.append(tmp) - all_file_data.append(res) - if str(bs).find('/') > -1: - folder = w_dir_initial+'/QPRED/energy/average-energy/'+str(lot)+'-'+str(bs).split('/')[0] - else: - folder = w_dir_initial+'/QPRED/energy/average-energy/'+str(lot)+'-'+str(bs) - try: - os.makedirs(folder) - os.chdir(folder) - except OSError: - if os.path.isdir(folder): - os.chdir(folder) - else: - raise - all_file_data_df = pd.DataFrame(all_file_data,columns = ['Structure', 'E', 'ZPE', 'H', 'T.S', 'T.qh-S', 'G(T)', 'qh-G(T)']) - if str(bs).find('/') > -1: - all_file_data_df.to_csv(w_dir_initial+'/QPRED/energy/average-energy/'+str(lot)+'-'+str(bs).split('/')[0]+'/'+args.input.split('.')[0]+'.csv', index=False) - else: - all_file_data_df.to_csv(w_dir_initial+'/QPRED/energy/average-energy/'+str(lot)+'-'+str(bs)+'/'+args.input.split('.')[0]+'.csv', index=False) diff --git a/aqme/filter.py b/aqme/filter.py index 0c80d099..1400ff78 100644 --- a/aqme/filter.py +++ b/aqme/filter.py @@ -3,7 +3,6 @@ # used for filtering # #####################################################. from functools import partial -from itertools import chain from rdkit.Chem import AllChem as Chem from rdkit.Chem import rdMolTransforms, Descriptors @@ -644,589 +643,588 @@ def RMSD_and_E_filter( return selectedcids -# Base classes for the filters - - -class Filter(object): - """ - Base class for the definition of different types of filters. Defines the - basic API that any Filter object should have. - - Parameters - ---------- - function : Function - A single parameter function that returns true when the item should pass - the filter and False otherwise. - - Attributes - ---------- - dataset : list or None - A list containing a reference to all the elements that where filtered. - outcomes : list or None - A list of booleans with the same order as the dataset elements. - discarded : list or None - A list of discarded items from the dataset. - accepted : list or None - A list of accepted items from the dataset. - """ - - def __init__(self, function=None): - if function is not None: - self.function = function - elif getattr(self, "function", None) is None: - self.function = lambda x: True - self.dataset = None - self.outcomes = None - self._discarded = None - self._accepted = None - - def add_dummy_parameter(self, function): - """ - Adds a dummy parameter as the first positional parameter to a given - function and returns the wrapped function. - """ - - def f(dummy, parameter): - return function(parameter) - - return f - - def apply(self, dataset, key=None, force=False): - """ - Applies the filter to an iterable. Setting the 'dataset' and 'outcomes' - attributes of the Filter in the process. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - key : [type], optional - [description], by default None - force : bool, optional - A True value will apply the filter to the dataset forcefully, - overwriting any previous usage of the filter to other dataset. - - Raises - ------ - ValueError - If the dataset has been already applied to a dataset and the force - keyword is set to 'false' - """ - if self.dataset is not None and not force: - msg = "Attempting to apply a previously applied filter with force='false'" - raise ValueError(msg) - elif self.dataset is not None: - self.clean() - - if key is None: - key = lambda x: x - self.dataset = list(dataset) - outcomes = self.calc_outcomes(dataset, key) - self.outcomes = tuple(outcomes) - - def calc_outcomes(self, dataset, key): - """ - Runs the filter on a dataset and returns the outcomes without storing - the values. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - key : function - [description] - """ - return tuple(self.function(key(data)) for data in dataset) - - def extend(self, dataset, key=None): - """ - Extends the current dataset with the new dataset and includes the - outcomes of applying the filter to the new dataset. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - key : [type], optional - [description], by default None - """ - if key is None: - key = lambda x: x - new = list(dataset) - outcomes = self.calc_outcomes(new, key) - - self.dataset = self.dataset + new - self.outcomes = tuple(i for i in chain(self.outcomes, outcomes)) - # And resets the cache of discarded and accepted - self._accepted = None - self._discarded = None - - def clean(self): - """ - Resets the state of the Filter removing all the information stored - about the last application of the of the filter to a set of data. - """ - self.dataset = None - self.outcomes = None - self._discarded = None - self._accepted = None - - @property - def discarded(self): - if self._discarded is None and self.dataset is not None: - self._discarded = [ - d for out, d in zip(self.outcomes, self.dataset) if not out - ] - return self._discarded - - @property - def accepted(self): - if self._accepted is None and self.dataset is not None: - self._accepted = [d for out, d in zip(self.outcomes, self.dataset) if out] - return self._accepted - - -class CompoundFilter(Filter): - """ - Class used to apply several filters to a same dataset in a certain order and - store the information about which ones were discarded in which filter. - - Parameters - ---------- - *filters : Filter - An undefined number of Filter objects - - Attributes - ---------- - filters : list - List of Filter objects in application order. - dataset : list or None - A list containing a reference to all the elements that where filtered. - outcomes : list or None - A list of booleans with the same order as the dataset elements. - discarded : list or None - A list of discarded items from the dataset. - accepted : list or None - A list of accepted items from the dataset. - """ - - def __init__(self, *filters): - self.filters = filters - super().__init__() - - def insert(self, index, item): - """ - Inserts a filter before the index position. - - Parameters - ---------- - item : Filter - The filter object to include. - """ - self.filters.insert(index, item) - - def pop(self, index=-1): - """ - Remove and return a filter at index (default last). - """ - return self.filters.pop(index) - - def apply(self, dataset, key=None, keys=None, force=False): - """ - Applies the filter to an iterable. Setting the 'dataset' and 'outcomes' - attributes of each Filter in the process. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - key : function, optional - If no function is provided it is assumed that the filter can process - each item in the dataset without any change, by default None. - keys : iterable, optional - Iterable of functions with the same length as the number of filters - to provide different, by default None. Overrides the key argument. - force : bool, optional - A True value will apply the filter to the dataset forcefully, - overwriting any previous usage of the filter to other dataset. - - Raises - ------ - ValueError - If the dataset has been already applied to a dataset and the force - keyword is set to 'false' - """ - if self.dataset is not None and not force: - msg = "Attempting to apply a previously applied filter with force='false'" - raise ValueError(msg) - elif any([f.dataset is not None for f in self.filters]) and not force: - msg = ( - "At least one of the filters has already been applied and force='false'" - ) - raise ValueError(msg) - elif force: - self.clean() - - # Assign the keys iterable - if keys is None and key is None: - key = lambda x: x - keys = [key for _ in self.filters] - elif key is not None: - keys = [key for _ in self.filters] - - self.dataset = dataset - outcomes = self.calc_outcomes(dataset, keys) - - # Set the attributes for all the Filters - self.filters[0].dataset = dataset - self.filters[0].outcomes = outcomes[0] - out_old = outcomes[0] - for f, out in zip(self.filters[1:], outcomes[1:]): - dataset_n = [d for i, d in zip(out_old, self.dataset) if i] - f.dataset = dataset_n - f.outcomes = out - out_old = out - - # Set the attributes for the current object - outcomes = self.homogenize_outcomes(outcomes) - - self.outcomes = outcomes - - def calc_outcomes(self, dataset, keys): - """ - Runs the filter on a dataset and returns the outcomes without storing - the values. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - keys : iterable - Iterable of functions that ensure proper input per each filter. - """ - outcomes = [] - dataset_old = dataset - # outcomes_old = (True,)*len(dataset) - for f, key in zip(self.filters, keys): - # Use the filter and get the dataset for the next filter - out = f.calc_outcomes(dataset_old, key=key) - dataset_old = [d for i, d in zip(out, dataset_old) if i] - outcomes.append(tuple(out)) - return outcomes - - def homogenize_outcomes(self, outcomes): - """ - Returns a list of tuples where all tuples are the same size. - - Parameters - ---------- - outcomes : list - List of tuples with the outcomes of each of the filters. - n : int - size of the dataset - - Returns - ------- - list - list of the homogenized tuples. - """ - homogenized = [] - outcomes_old = (True,) * len(outcomes[0]) - for _, out in zip(self.filters, outcomes): - _iter = out.__iter__() - outcomes_new = [False if not i else next(_iter) for i in outcomes_old] - homogenized.append(tuple(outcomes_new)) - outcomes_old = outcomes_new - return homogenized - - def extend(self, dataset, key=None, keys=None): - """ - Extends the current dataset with the new dataset and includes the - outcomes of applying the filter to the new dataset. - - Parameters - ---------- - dataset : iterable - Iterable that contains the items to run the filtering - key : function, optional - If no function is provided it is assumed that the filter can process - each item in the dataset without any change, by default None. - keys : iterable, optional - Iterable of functions with the same length as the number of filters - to provide different, by default None. Overrides the key argument. - """ - # Assign the keys iterable - if keys is None and key is None: - key = lambda x: x - keys = [key for _ in self.filters] - elif key is not None: - keys = [key for _ in self.filters] - - new = list(dataset) - - _outcomes = self.outcomes - outcomes = self.calc_outcomes(new, keys) - - # Set the attributes for the current object - self.dataset = self.dataset + new - - # reset the cache of the properties - self._accepted = None - self._discarded = None - - # Set the attributes for all the Filters - self.filters[0].dataset = self.dataset - self.filters[0].outcomes = self.filters[0].outcomes + outcomes[0] - out_old = outcomes[0] - for f, out in zip(self.filters[1:], outcomes[1:]): - dataset_n = [d for i, d in zip(out_old, self.dataset) if i] - f.dataset = f.dataset + dataset_n - f.outcomes = f.outcomes + out - # reset the cache of the properties - f._accepted = None - f._discarded = None - outcomes = self.homogenize_outcomes(outcomes) - self.outcomes = [ - out_old + out_new for out_old, out_new in zip(_outcomes, outcomes) - ] - - def clean(self): - """ - Resets the state of the CompoundFilter removing all the information - stored about the last application of the of the filter to a set of data. - It cleans all the component filters. - """ - super().clean() - for f in self.filters: - f.clean() - - def accepted_from(self, index): - """ - returns the list of accepted items at the specified filter. - """ - return [d for out, d in zip(self.outcomes[index], self.dataset) if out] - - def discarded_from(self, index): - """ - returns the total list of discarded items after the specified filter. - """ - return [d for out, d in zip(self.outcomes[index], self.dataset) if not out] - - @Filter.discarded.getter - def discarded(self): - if self._discarded is None and self.dataset is not None: - self._discarded = self.discarded_from(-1) - return self._discarded - - @Filter.accepted.getter - def accepted(self): - if self._accepted is None and self.dataset is not None: - self._accepted = self.accepted_from(-1) - return self._accepted - - -class RMSDFilter(Filter): - """ - This filter inputs tuples of (molecule,cid). Each time a conformer that - passes the filter is found it is added to the pool of conformers. - - Note: The RMSD calculation done by rdkit has the side effect of leaving the - target conformer aligned to the probe conformer. - - Parameters - ---------- - threshold : float - Minimum RMSD to accept a conformer. - maxmatches : int - maximum number of atoms should match in the alignment previous to the - RMSD calculation. - heavyonly : [type], optional - [description], by default True. - reverse : bool, optional - Reverses the threshold. If True, only conformers with an RMSD < threshold - will be accepted. by default False. - is_rdkit : bool - If the conformers to compare have been generated with rdkit and the - cid to use to access the conformer is the one provided instead of -1. - by default False. - - """ - - def __init__( - self, threshold, maxmatches, heavyonly=True, reverse=False, is_rdkit=False - ): - self.threshold = threshold - self.maxmatches = maxmatches - self.heavyonly = heavyonly - self.pool = [] - self.is_rdkit = is_rdkit - super().__init__(function=self.function) - - def set_initial_pool(self, pool, key=None): - """ - Sets the initial pool of seen conformers. - - Parameters - ---------- - pool : list - A list of conformers ( or things convertible to conformers through - the key function) - key : function, optional - A function to convert each item in the pool to a conformer that - can be subjected to the filter, by default None. - i.e - >>> myconformers[cid<-int] = conformer - >>> pool = [cid1,cid2,...,cidn] - >>> key = lambda x: myconformers[x] - """ - if key is not None: - pool = list(map(key, pool)) - self.pool = pool - - def clean(self, also_pool=True): - """ - Resets the state of the Filter removing all the information stored - about the last application of the of the filter to a set of data. - - Parameters - ---------- - - also_pool : bool - If true it will also clear the initial pool of conformers. - """ - if also_pool: - self.pool = [] - super().__init__() - - def function(self, item): - """ - main function of the filter. inputs a tuple (molecule,cid) aligns it to - all the previously stored molecules and if it passes the filter, stores - it and returns True, otherwise returns False. - """ - probe, cid_p = item - if item in self.pool: - return True - for target, cid_t in self.pool: - c1 = c2 = -1 - if self.is_rdkit: - probe, target = target, probe - c1, c2 = cid_t, cid_p - rms = get_conf_RMS(probe, target, c1, c2, self.heavyonly, self.maxmatches) - if self.reverse: - reject = rms < self.threshold - else: - reject = rms > self.threshold - if reject: - return False - else: - self.pool.append(item) - return True - - -class EnergyFilter(Filter): - """ - This filter inputs energy values. Each time a conformer that - passes the filter is found it is added to the pool of conformers. - - Note: The RMSD calculation done by rdkit has the side effect of leaving the - target conformer aligned to the probe conformer. - - Parameters - ---------- - threshold : float - Energy threshold in kcal/mol. - mode : str, ['difference','window'] - 'difference' accepts all the conformers whose energy difference - with respect to all the previous conformers found is larger than - the threshold. - 'window' accepts all the conformers whose energy difference with respect - to the lowest in energy is smaller than the threshold. - - Attributes - ---------- - pool : list - In window mode, it is the list of minima used to calculate the energy - window. If the lowest conformer was either set as initial pool or - provided as the first item of the dataset the pool will remain of len=1. - In difference mode, it corresponds to all the conformers accepted. - """ - - def __init__(self, threshold, mode): - self.mode = mode # difference or window - self.threshold = threshold - self.pool = [] - super().__init__() - - def set_initial_pool(self, pool, key=None): - """ - Sets the initial pool of seen conformers. - - Parameters - ---------- - pool : list - A list of conformers ( or things convertible to conformers through - the key function) - key : function, optional - A function to convert each item in the pool to a conformer that - can be subjected to the filter, by default None. - i.e - >>> myconformers[cid<-int] = conformer - >>> pool = [cid1,cid2,...,cidn] - >>> key = lambda x: myconformers[x] - """ - if key is not None: - pool = list(map(key, pool)) - self.pool = pool - - def clean(self, also_pool=True): - """ - Resets the state of the Filter removing all the information stored - about the last application of the of the filter to a set of data. - - Parameters - ---------- - - also_pool : bool - If true it will also clear the initial pool of conformers. - """ - if also_pool: - self.pool = [] - super().__init__() - - def function(self, item): - """ - main function of the filter. Inputs a tuple (cid,energy) and accepts or - rejects the item depending on the energy threshold and filter mode. - """ - assert self.mode in ["window", "difference"] - if self.mode == "window": - out = self._window(item) - else: - out = self._difference(item) - return out - - def _difference(self, item): - cid_p, energy_p = item - if item in self.pool: - return True - for cid_t, energy_t in self.pool: - if abs(energy_p - energy_t) < self.threshold: - return False - else: - self.pool.append(item) - return True - - def _window(self, item): - if not self.pool: # Ensure the pool has len = 1 - self.pool.append(item) - return True - cid_p, energy_p = item - cid_t, energy_t = self.pool[-1] - reject = abs(energy_p - energy_t) < self.threshold - is_lowest = energy_p < energy_t - if reject: - return False - if is_lowest: - self.pool.append(item) - return True +# Base classes for the filters (Raul's refactoring part, work in progress) + +# class Filter(object): +# """ +# Base class for the definition of different types of filters. Defines the +# basic API that any Filter object should have. + +# Parameters +# ---------- +# function : Function +# A single parameter function that returns true when the item should pass +# the filter and False otherwise. + +# Attributes +# ---------- +# dataset : list or None +# A list containing a reference to all the elements that where filtered. +# outcomes : list or None +# A list of booleans with the same order as the dataset elements. +# discarded : list or None +# A list of discarded items from the dataset. +# accepted : list or None +# A list of accepted items from the dataset. +# """ + +# def __init__(self, function=None): +# if function is not None: +# self.function = function +# elif getattr(self, "function", None) is None: +# self.function = lambda x: True +# self.dataset = None +# self.outcomes = None +# self._discarded = None +# self._accepted = None + +# def add_dummy_parameter(self, function): +# """ +# Adds a dummy parameter as the first positional parameter to a given +# function and returns the wrapped function. +# """ + +# def f(dummy, parameter): +# return function(parameter) + +# return f + +# def apply(self, dataset, key=None, force=False): +# """ +# Applies the filter to an iterable. Setting the 'dataset' and 'outcomes' +# attributes of the Filter in the process. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# key : [type], optional +# [description], by default None +# force : bool, optional +# A True value will apply the filter to the dataset forcefully, +# overwriting any previous usage of the filter to other dataset. + +# Raises +# ------ +# ValueError +# If the dataset has been already applied to a dataset and the force +# keyword is set to 'false' +# """ +# if self.dataset is not None and not force: +# msg = "Attempting to apply a previously applied filter with force='false'" +# raise ValueError(msg) +# elif self.dataset is not None: +# self.clean() + +# if key is None: +# key = lambda x: x +# self.dataset = list(dataset) +# outcomes = self.calc_outcomes(dataset, key) +# self.outcomes = tuple(outcomes) + +# def calc_outcomes(self, dataset, key): +# """ +# Runs the filter on a dataset and returns the outcomes without storing +# the values. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# key : function +# [description] +# """ +# return tuple(self.function(key(data)) for data in dataset) + +# def extend(self, dataset, key=None): +# """ +# Extends the current dataset with the new dataset and includes the +# outcomes of applying the filter to the new dataset. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# key : [type], optional +# [description], by default None +# """ +# if key is None: +# key = lambda x: x +# new = list(dataset) +# outcomes = self.calc_outcomes(new, key) + +# self.dataset = self.dataset + new +# self.outcomes = tuple(i for i in chain(self.outcomes, outcomes)) +# # And resets the cache of discarded and accepted +# self._accepted = None +# self._discarded = None + +# def clean(self): +# """ +# Resets the state of the Filter removing all the information stored +# about the last application of the of the filter to a set of data. +# """ +# self.dataset = None +# self.outcomes = None +# self._discarded = None +# self._accepted = None + +# @property +# def discarded(self): +# if self._discarded is None and self.dataset is not None: +# self._discarded = [ +# d for out, d in zip(self.outcomes, self.dataset) if not out +# ] +# return self._discarded + +# @property +# def accepted(self): +# if self._accepted is None and self.dataset is not None: +# self._accepted = [d for out, d in zip(self.outcomes, self.dataset) if out] +# return self._accepted + + +# class CompoundFilter(Filter): +# """ +# Class used to apply several filters to a same dataset in a certain order and +# store the information about which ones were discarded in which filter. + +# Parameters +# ---------- +# *filters : Filter +# An undefined number of Filter objects + +# Attributes +# ---------- +# filters : list +# List of Filter objects in application order. +# dataset : list or None +# A list containing a reference to all the elements that where filtered. +# outcomes : list or None +# A list of booleans with the same order as the dataset elements. +# discarded : list or None +# A list of discarded items from the dataset. +# accepted : list or None +# A list of accepted items from the dataset. +# """ + +# def __init__(self, *filters): +# self.filters = filters +# super().__init__() + +# def insert(self, index, item): +# """ +# Inserts a filter before the index position. + +# Parameters +# ---------- +# item : Filter +# The filter object to include. +# """ +# self.filters.insert(index, item) + +# def pop(self, index=-1): +# """ +# Remove and return a filter at index (default last). +# """ +# return self.filters.pop(index) + +# def apply(self, dataset, key=None, keys=None, force=False): +# """ +# Applies the filter to an iterable. Setting the 'dataset' and 'outcomes' +# attributes of each Filter in the process. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# key : function, optional +# If no function is provided it is assumed that the filter can process +# each item in the dataset without any change, by default None. +# keys : iterable, optional +# Iterable of functions with the same length as the number of filters +# to provide different, by default None. Overrides the key argument. +# force : bool, optional +# A True value will apply the filter to the dataset forcefully, +# overwriting any previous usage of the filter to other dataset. + +# Raises +# ------ +# ValueError +# If the dataset has been already applied to a dataset and the force +# keyword is set to 'false' +# """ +# if self.dataset is not None and not force: +# msg = "Attempting to apply a previously applied filter with force='false'" +# raise ValueError(msg) +# elif any([f.dataset is not None for f in self.filters]) and not force: +# msg = ( +# "At least one of the filters has already been applied and force='false'" +# ) +# raise ValueError(msg) +# elif force: +# self.clean() + +# # Assign the keys iterable +# if keys is None and key is None: +# key = lambda x: x +# keys = [key for _ in self.filters] +# elif key is not None: +# keys = [key for _ in self.filters] + +# self.dataset = dataset +# outcomes = self.calc_outcomes(dataset, keys) + +# # Set the attributes for all the Filters +# self.filters[0].dataset = dataset +# self.filters[0].outcomes = outcomes[0] +# out_old = outcomes[0] +# for f, out in zip(self.filters[1:], outcomes[1:]): +# dataset_n = [d for i, d in zip(out_old, self.dataset) if i] +# f.dataset = dataset_n +# f.outcomes = out +# out_old = out + +# # Set the attributes for the current object +# outcomes = self.homogenize_outcomes(outcomes) + +# self.outcomes = outcomes + +# def calc_outcomes(self, dataset, keys): +# """ +# Runs the filter on a dataset and returns the outcomes without storing +# the values. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# keys : iterable +# Iterable of functions that ensure proper input per each filter. +# """ +# outcomes = [] +# dataset_old = dataset +# # outcomes_old = (True,)*len(dataset) +# for f, key in zip(self.filters, keys): +# # Use the filter and get the dataset for the next filter +# out = f.calc_outcomes(dataset_old, key=key) +# dataset_old = [d for i, d in zip(out, dataset_old) if i] +# outcomes.append(tuple(out)) +# return outcomes + +# def homogenize_outcomes(self, outcomes): +# """ +# Returns a list of tuples where all tuples are the same size. + +# Parameters +# ---------- +# outcomes : list +# List of tuples with the outcomes of each of the filters. +# n : int +# size of the dataset + +# Returns +# ------- +# list +# list of the homogenized tuples. +# """ +# homogenized = [] +# outcomes_old = (True,) * len(outcomes[0]) +# for _, out in zip(self.filters, outcomes): +# _iter = out.__iter__() +# outcomes_new = [False if not i else next(_iter) for i in outcomes_old] +# homogenized.append(tuple(outcomes_new)) +# outcomes_old = outcomes_new +# return homogenized + +# def extend(self, dataset, key=None, keys=None): +# """ +# Extends the current dataset with the new dataset and includes the +# outcomes of applying the filter to the new dataset. + +# Parameters +# ---------- +# dataset : iterable +# Iterable that contains the items to run the filtering +# key : function, optional +# If no function is provided it is assumed that the filter can process +# each item in the dataset without any change, by default None. +# keys : iterable, optional +# Iterable of functions with the same length as the number of filters +# to provide different, by default None. Overrides the key argument. +# """ +# # Assign the keys iterable +# if keys is None and key is None: +# key = lambda x: x +# keys = [key for _ in self.filters] +# elif key is not None: +# keys = [key for _ in self.filters] + +# new = list(dataset) + +# _outcomes = self.outcomes +# outcomes = self.calc_outcomes(new, keys) + +# # Set the attributes for the current object +# self.dataset = self.dataset + new + +# # reset the cache of the properties +# self._accepted = None +# self._discarded = None + +# # Set the attributes for all the Filters +# self.filters[0].dataset = self.dataset +# self.filters[0].outcomes = self.filters[0].outcomes + outcomes[0] +# out_old = outcomes[0] +# for f, out in zip(self.filters[1:], outcomes[1:]): +# dataset_n = [d for i, d in zip(out_old, self.dataset) if i] +# f.dataset = f.dataset + dataset_n +# f.outcomes = f.outcomes + out +# # reset the cache of the properties +# f._accepted = None +# f._discarded = None +# outcomes = self.homogenize_outcomes(outcomes) +# self.outcomes = [ +# out_old + out_new for out_old, out_new in zip(_outcomes, outcomes) +# ] + +# def clean(self): +# """ +# Resets the state of the CompoundFilter removing all the information +# stored about the last application of the of the filter to a set of data. +# It cleans all the component filters. +# """ +# super().clean() +# for f in self.filters: +# f.clean() + +# def accepted_from(self, index): +# """ +# returns the list of accepted items at the specified filter. +# """ +# return [d for out, d in zip(self.outcomes[index], self.dataset) if out] + +# def discarded_from(self, index): +# """ +# returns the total list of discarded items after the specified filter. +# """ +# return [d for out, d in zip(self.outcomes[index], self.dataset) if not out] + +# @Filter.discarded.getter +# def discarded(self): +# if self._discarded is None and self.dataset is not None: +# self._discarded = self.discarded_from(-1) +# return self._discarded + +# @Filter.accepted.getter +# def accepted(self): +# if self._accepted is None and self.dataset is not None: +# self._accepted = self.accepted_from(-1) +# return self._accepted + + +# class RMSDFilter(Filter): +# """ +# This filter inputs tuples of (molecule,cid). Each time a conformer that +# passes the filter is found it is added to the pool of conformers. + +# Note: The RMSD calculation done by rdkit has the side effect of leaving the +# target conformer aligned to the probe conformer. + +# Parameters +# ---------- +# threshold : float +# Minimum RMSD to accept a conformer. +# maxmatches : int +# maximum number of atoms should match in the alignment previous to the +# RMSD calculation. +# heavyonly : [type], optional +# [description], by default True. +# reverse : bool, optional +# Reverses the threshold. If True, only conformers with an RMSD < threshold +# will be accepted. by default False. +# is_rdkit : bool +# If the conformers to compare have been generated with rdkit and the +# cid to use to access the conformer is the one provided instead of -1. +# by default False. + +# """ + +# def __init__( +# self, threshold, maxmatches, heavyonly=True, reverse=False, is_rdkit=False +# ): +# self.threshold = threshold +# self.maxmatches = maxmatches +# self.heavyonly = heavyonly +# self.pool = [] +# self.is_rdkit = is_rdkit +# super().__init__(function=self.function) + +# def set_initial_pool(self, pool, key=None): +# """ +# Sets the initial pool of seen conformers. + +# Parameters +# ---------- +# pool : list +# A list of conformers ( or things convertible to conformers through +# the key function) +# key : function, optional +# A function to convert each item in the pool to a conformer that +# can be subjected to the filter, by default None. +# i.e +# >>> myconformers[cid<-int] = conformer +# >>> pool = [cid1,cid2,...,cidn] +# >>> key = lambda x: myconformers[x] +# """ +# if key is not None: +# pool = list(map(key, pool)) +# self.pool = pool + +# def clean(self, also_pool=True): +# """ +# Resets the state of the Filter removing all the information stored +# about the last application of the of the filter to a set of data. + +# Parameters +# ---------- + +# also_pool : bool +# If true it will also clear the initial pool of conformers. +# """ +# if also_pool: +# self.pool = [] +# super().__init__() + +# def function(self, item): +# """ +# main function of the filter. inputs a tuple (molecule,cid) aligns it to +# all the previously stored molecules and if it passes the filter, stores +# it and returns True, otherwise returns False. +# """ +# probe, cid_p = item +# if item in self.pool: +# return True +# for target, cid_t in self.pool: +# c1 = c2 = -1 +# if self.is_rdkit: +# probe, target = target, probe +# c1, c2 = cid_t, cid_p +# rms = get_conf_RMS(probe, target, c1, c2, self.heavyonly, self.maxmatches) +# if self.reverse: +# reject = rms < self.threshold +# else: +# reject = rms > self.threshold +# if reject: +# return False +# else: +# self.pool.append(item) +# return True + + +# class EnergyFilter(Filter): +# """ +# This filter inputs energy values. Each time a conformer that +# passes the filter is found it is added to the pool of conformers. + +# Note: The RMSD calculation done by rdkit has the side effect of leaving the +# target conformer aligned to the probe conformer. + +# Parameters +# ---------- +# threshold : float +# Energy threshold in kcal/mol. +# mode : str, ['difference','window'] +# 'difference' accepts all the conformers whose energy difference +# with respect to all the previous conformers found is larger than +# the threshold. +# 'window' accepts all the conformers whose energy difference with respect +# to the lowest in energy is smaller than the threshold. + +# Attributes +# ---------- +# pool : list +# In window mode, it is the list of minima used to calculate the energy +# window. If the lowest conformer was either set as initial pool or +# provided as the first item of the dataset the pool will remain of len=1. +# In difference mode, it corresponds to all the conformers accepted. +# """ + +# def __init__(self, threshold, mode): +# self.mode = mode # difference or window +# self.threshold = threshold +# self.pool = [] +# super().__init__() + +# def set_initial_pool(self, pool, key=None): +# """ +# Sets the initial pool of seen conformers. + +# Parameters +# ---------- +# pool : list +# A list of conformers ( or things convertible to conformers through +# the key function) +# key : function, optional +# A function to convert each item in the pool to a conformer that +# can be subjected to the filter, by default None. +# i.e +# >>> myconformers[cid<-int] = conformer +# >>> pool = [cid1,cid2,...,cidn] +# >>> key = lambda x: myconformers[x] +# """ +# if key is not None: +# pool = list(map(key, pool)) +# self.pool = pool + +# def clean(self, also_pool=True): +# """ +# Resets the state of the Filter removing all the information stored +# about the last application of the of the filter to a set of data. + +# Parameters +# ---------- + +# also_pool : bool +# If true it will also clear the initial pool of conformers. +# """ +# if also_pool: +# self.pool = [] +# super().__init__() + +# def function(self, item): +# """ +# main function of the filter. Inputs a tuple (cid,energy) and accepts or +# rejects the item depending on the energy threshold and filter mode. +# """ +# assert self.mode in ["window", "difference"] +# if self.mode == "window": +# out = self._window(item) +# else: +# out = self._difference(item) +# return out + +# def _difference(self, item): +# cid_p, energy_p = item +# if item in self.pool: +# return True +# for cid_t, energy_t in self.pool: +# if abs(energy_p - energy_t) < self.threshold: +# return False +# else: +# self.pool.append(item) +# return True + +# def _window(self, item): +# if not self.pool: # Ensure the pool has len = 1 +# self.pool.append(item) +# return True +# cid_p, energy_p = item +# cid_t, energy_t = self.pool[-1] +# reject = abs(energy_p - energy_t) < self.threshold +# is_lowest = energy_p < energy_t +# if reject: +# return False +# if is_lowest: +# self.pool.append(item) +# return True diff --git a/aqme/grapher.py b/aqme/grapher.py index 8e0a5ecb..3a7ebf75 100644 --- a/aqme/grapher.py +++ b/aqme/grapher.py @@ -8,7 +8,6 @@ import numpy as np from sklearn.metrics import mean_absolute_error import statistics as stats -from aqme.turbomole import TurbomoleOutput ev_2_kcal_mol = 23.061 #ev to kcal/mol hartree_to_kcal = 627.509 diff --git a/aqme/mainf.py b/aqme/mainf.py index f8a37315..7521a9d6 100644 --- a/aqme/mainf.py +++ b/aqme/mainf.py @@ -5,7 +5,6 @@ import glob import sys import os -import time import concurrent.futures as futures import multiprocessing as mp from pathlib import Path @@ -22,22 +21,12 @@ from aqme.qprep import qprep, get_molecule_list, load_charge_data -# from aqme.grapher import graph -# from aqme.descp import calculate_parameters -# from aqme.nmr import calculate_boltz_and_nmr -# from aqme.energy import calculate_boltz_and_energy,calculate_avg_and_energy -# from aqme.dbstep_conf import calculate_db_parameters,calculate_boltz_and_dbstep -# from aqme.nics_conf import calculate_nics_parameters,calculate_boltz_for_nics,calculate_avg_nics -# from aqme.cclib_conf import calculate_cclib,get_avg_cclib_param,calculate_boltz_for_cclib from aqme.cmin import mult_min from aqme.utils import ( - Logger, move_file, get_filenames, creation_of_dup_csv_csearch, creation_of_dup_csv_cmin, - read_energies, - get_name_and_charge, ) # need to and in energy diff --git a/aqme/nmr.py b/aqme/nmr.py deleted file mode 100644 index 74f4b7b2..00000000 --- a/aqme/nmr.py +++ /dev/null @@ -1,388 +0,0 @@ -#####################################################. -# This file stores all the functions # -# used for genrating details for nmr # -#####################################################. - -import numpy as np -import os -import subprocess -import glob -import pandas as pd - -from aqme.cheshire_lookup import cheshire - -from aqme.utils import move_file - - -def nmr_stats(y_exp, y_pred): - sum = 0 - for i, j in zip(y_exp, y_pred): - sum += abs(float(i) - float(j)) - mae = sum / len(y_pred) - y_sd = y_exp.astype(float).subtract(y_pred.astype(float)) - sd = y_sd.std() - return mae, sd - - -def get_exp_data(args, name, w_dir_initial, final_shieldings, conf_idx, conf_sym): - if args.nmr_exp == "fromsdf": - sdf_file = w_dir_initial + "/" + args.input - sdf_lines = open(sdf_file, "r").readlines() - found_nmr = 0 - for i, line in enumerate(sdf_lines): - if line.find("> ") > -1: - start = i + 1 - found_nmr = 1 - if len(line.strip()) == 0 and found_nmr == 1: - stop = i - break - - atom_num, atom_sheilding = [], [] - for i in range(start, stop): - split_line = sdf_lines[i].split(",") - if len(split_line) == 3: - if len(split_line[1].split("-")) > 1: - split_line_sheild = split_line[1].split("-") - split_line_atom = split_line[2].split("-") - split_line_atom[0] = split_line_atom[0].strip() - split_line_sheild[0] = split_line_sheild[0].strip() - split_line_atom[len(split_line_atom) - 1] = ( - split_line_atom[len(split_line_atom) - 1].strip().split("\\")[0] - ) - for idx, shield in zip(conf_idx, final_shieldings): - if idx in split_line_atom: - split_line_sheild_min = np.array( - split_line_sheild, dtype=float - ) - float(shield) - idx_min = np.argmin(abs(split_line_sheild_min)) - if split_line_atom[idx_min] in atom_num: - if split_line_atom[idx_min] != idx: - atom_num.append(idx) - atom_sheilding.append( - split_line_sheild[split_line_atom.index(idx)] - ) - else: - if idx_min == 1: - atom_num.append(split_line_atom[0]) - atom_sheilding.append(split_line_sheild[0]) - if idx_min == 0: - atom_num.append(split_line_atom[1]) - atom_sheilding.append(split_line_sheild[1]) - else: - atom_num.append(split_line_atom[idx_min]) - atom_sheilding.append(split_line_sheild[idx_min]) - else: - atom_num.append(split_line[2].strip().split("\\")[0]) - atom_sheilding.append(split_line[1].strip()) - if len(split_line) > 3: - for k in range(2, len(split_line)): - split_line[k] = split_line[k].strip() - if k == len(split_line) - 1: - split_line[k] = split_line[k].strip().split("\\")[0] - - atom_shield_cal, atom_idx_cal, sym = [], [], None - calcuate_idx = "-" - for j in range(2, len(split_line)): - for i, id_conf in enumerate(conf_idx): - if id_conf == split_line[j]: - idx_in_conf_idx = i - break - - atom_idx_cal.append(split_line[j]) - atom_shield_cal.append(final_shieldings[idx_in_conf_idx]) - final_shieldings = np.delete(final_shieldings, idx_in_conf_idx) - conf_idx = np.delete(conf_idx, idx_in_conf_idx) - sym = conf_sym[idx_in_conf_idx] - conf_sym = np.delete(conf_sym, idx_in_conf_idx) - - calculate_avg = np.average(atom_shield_cal) - calcuate_idx = calcuate_idx.join(atom_idx_cal) - final_shieldings = np.append(final_shieldings, calculate_avg) - conf_idx = np.append(conf_idx, calcuate_idx) - conf_sym = np.append(conf_sym, sym) - - atom_num.append(calcuate_idx) - atom_sheilding.append(split_line[1].strip()) - - return atom_num, atom_sheilding - - -def calculate_nmr( - nmr_qm_files, args, log, name, w_dir_fin, w_dir_initial, lot_sp, bs_sp, lot, bs -): - - # define the lot etc., - [opt_method, opt_basis, opt_solv] = [ - lot, - bs, - [args.solvent_model, args.solvent_name], - ] - [nmr_method, nmr_basis, nmr_solv] = [ - lot_sp, - bs_sp, - [args.solvent_model_sp, args.solvent_name], - ] - - log.write("\no Calculating NMR shieldings with this combination of methods:") - - if args.solvent_model == "gas_phase": - log.write( - "\no Optimization: {0}/{1} (gas phase)".format(opt_method, opt_basis) - ) - else: - log.write( - "\no Optimization: {0}/{1} ({2}, solvent = {3})".format( - opt_method, opt_basis, opt_solv[0], opt_solv[1] - ) - ) - - if args.solvent_model_sp == "gas_phase": - log.write( - "\no NMR calculation: {0}/{1} (gas phase)".format(nmr_method, nmr_basis) - ) - else: - log.write( - "\no NMR calculation: {0}/{1} ({2}, solvent = {3})".format( - nmr_method, nmr_basis, nmr_solv[0], nmr_solv[1] - ) - ) - - # Matching against the CHESHIRE database of scaling factors - if args.nmr_online: - slope, intercept, tms_ref = [], [], None - for nuc in args.nmr_nucleus: - scale = cheshire( - args.nmr_online, - nuc, - opt_method, - opt_basis, - opt_solv, - nmr_method, - nmr_basis, - nmr_solv, - args.nmr_aos, - log, - ) - try: - slope.append(float(scale.split()[1])) - intercept.append(float(scale.split()[3])) - except AttributeError: - log.write( - "x No scaling factors found for this level of theory! Input the values as arguments for AQME!" - ) - exit() - - log.write("\no The slope for nucleus {0} = {1}".format(nuc, slope)) - log.write("\no The intercept for nucleus {0} = {1}".format(nuc, intercept)) - - else: - slope = args.nmr_slope - intercept = args.nmr_intercept - tms_ref = args.nmr_tms_ref - - for i, nuc in enumerate(args.nmr_nucleus): - log.write("\no The slope for nucleus {0} = {1}".format(nuc, slope[i])) - log.write( - "\no The intercept for nucleus {0} = {1}".format(nuc, intercept[i]) - ) - - final_shieldings = [] - - for num_file, file in enumerate(nmr_qm_files): - # list of H and C shieldings for each individual conformer - conf_shieldings, conf_idx, conf_sym = [], [], [] - - # read the file and detects start and stop point enclosing the NMR section of the LOG file - start, stop = 0, 0 - - outfile = open(file, "r") - outlines = outfile.readlines() - - for i in range(len(outlines)): - if outlines[i].find("SCF GIAO Magnetic shielding tensor (ppm):") > -1: - start = i - if outlines[i].find("Population analysis using the SCF Density") > -1: - stop = i - break - - # stores H and C NMR shieldings within the NMR section of the LOG file - for i in range(start, stop): - try: - if outlines[i].split()[1] in args.nmr_nucleus: - atom_nuc = outlines[i].split()[1] - # assigning values from arrays - index = args.nmr_nucleus.index(atom_nuc) - if not args.nmr_online: - TMS_ref_nuc = tms_ref[index] - else: - pass - slope_nuc = slope[index] - intercept_nuc = intercept[index] - - conf_idx.append(outlines[i].split()[0]) - conf_sym.append(atom_nuc) - if args.nmr_online: - scaled_nmr = (intercept_nuc - float(outlines[i].split()[4])) / ( - -slope_nuc - ) - conf_shieldings.append(scaled_nmr) - else: - scaled_nmr = (intercept_nuc - float(outlines[i].split()[4])) / ( - -slope_nuc - ) - conf_shieldings.append(TMS_ref_nuc - scaled_nmr) - # conf_shieldings.append(scaled_nmr) - except: - pass - - outfile.close() - - # get the Boltzmann probabilities of each conformer from a GoodVibes file - if str(bs).find("/") > -1: - boltz_file = ( - w_dir_initial - + "/QPRED/nmr/boltz/" - + str(lot) - + "-" - + str(bs).split("/")[0] - + "/Goodvibes_" - + name - + ".dat" - ) - else: - boltz_file = ( - w_dir_initial - + "/QPRED/nmr/boltz/" - + str(lot) - + "-" - + str(bs) - + "/Goodvibes_" - + name - + ".dat" - ) - - boltz_outfile = open(boltz_file, "r") - boltz_outlines = boltz_outfile.readlines() - for i in range(len(boltz_outlines)): - # I remove the NMR from the file names using [0:-4] - if boltz_outlines[i].find(file.split("_NMR")[0]) > -1: - boltz_factor = float(boltz_outlines[i].split()[-1]) - break - - # Multiply the shieldings by the corresponding Boltzmann factor - conf_shieldings = [x * boltz_factor for x in conf_shieldings] - final_shieldings.append(conf_shieldings) - - # final additions - final_shieldings = np.array(final_shieldings) - final_shieldings = np.sum(final_shieldings, axis=0) - conf_idx = np.array(conf_idx) - conf_sym = np.array(conf_sym) - - if args.nmr_exp != "None": - # getting experimental shiftd for get_atom - atom_num_exp, atom_sheilding_exp = get_exp_data( - args, name, w_dir_initial, final_shieldings, conf_idx, conf_sym - ) - - # make final array with atom_num, exp, dft - df = pd.DataFrame( - {"Atom": atom_num_exp, "Shielding-Exp": atom_sheilding_exp}, - columns=["Atom", "Shielding-Exp"], - ) - - for i, atom_num in enumerate(atom_num_exp): - for j, atom_cal in enumerate(conf_idx): - if len(atom_num.split("-")) > 1: - if ( - atom_cal == atom_num.split("-")[0] - or atom_cal == atom_num.split("-")[1] - or atom_cal == atom_num.split("-")[2] - ): - df.at[i, "Shielding-Calc"] = final_shieldings[j] - df.at[i, "Atom-Symbol"] = conf_sym[j] - if atom_num == atom_cal: - df.at[i, "Shielding-Calc"] = final_shieldings[j] - df.at[i, "Atom-Symbol"] = conf_sym[j] - - # calculation of MAE and SD - for atom_nuc in args.nmr_nucleus: - df_nuc = df.loc[df["Atom-Symbol"] == atom_nuc] - mae_nuc, sd_nuc = nmr_stats( - df_nuc["Shielding-Exp"], df_nuc["Shielding-Calc"] - ) - log.write( - "\no The MAE and SD for molecule {0} is {1} and {2} respectively for {3}".format( - name, mae_nuc, sd_nuc, atom_nuc - ) - ) - - else: - # make final array with atom_num, exp, dft - df = pd.DataFrame( - { - "Atom": conf_idx, - "Atom-Symbol": conf_sym, - "Shielding-Calc": final_shieldings, - }, - columns=["Atom", "Atom-Symbol", "Shielding-Calc"], - ) - - folder = w_dir_initial + "/QPRED/nmr/average-nmr/" + str(lot_sp) + "-" + str(bs_sp) - log.write("\no Preparing final NMR results in {}".format(folder)) - try: - os.makedirs(folder) - os.chdir(folder) - except OSError: - if os.path.isdir(folder): - os.chdir(folder) - else: - pass - - export_param_excel = df.to_csv( - name + "_" + lot_sp + "-" + bs_sp + "_predicted_shifts.csv", index=None - ) - - -def calculate_boltz_and_nmr(val, args, log, name, w_dir_fin, w_dir_initial, lot, bs): - - # GoodVibes must be installed as a module (through pip or conda) - cmd_boltz = ["python", "-m", "goodvibes", "--boltz", "--output", name] - for file in val: - cmd_boltz.append(file) - subprocess.call(cmd_boltz) - - # writing to correct places - if str(bs).find("/") > -1: - destination = Path(w_dir_initial + "/QPRED/nmr/boltz/" + str(lot) + "-" + str(bs).split("/")[0]) - else: - destination = Path(w_dir_initial + "/QPRED/nmr/boltz/" + str(lot) + "-" + str(bs)) - - move_file(destination, os.getcwd(), "Goodvibes_" + name + ".dat") - - for lot_sp in args.level_of_theory_sp: - for bs_sp in args.basis_set_sp: - for bs_gcp_sp in args.gen_bs_sp: - # performing the nmr calculations - dir_sp_nmr = ( - w_dir_fin - + "/../G16-SP_input_files/" - + str(lot_sp) - + "-" - + str(bs_sp) - ) - os.chdir(dir_sp_nmr) - # grabbing the respective NMR files for a given molecules - nmr_qm_files = glob.glob(name + "*NMR.log") - calculate_nmr( - nmr_qm_files, - args, - log, - name, - w_dir_fin, - w_dir_initial, - lot_sp, - bs_sp, - lot, - bs, - ) diff --git a/aqme/qcorr.py b/aqme/qcorr.py index 911452f6..0a748679 100644 --- a/aqme/qcorr.py +++ b/aqme/qcorr.py @@ -7,12 +7,10 @@ import glob import pandas as pd import json -import cclib import subprocess from pathlib import Path from aqme.utils import periodic_table,get_info_input -from aqme.filter import geom_rules_output from aqme.utils import ( move_file, @@ -449,12 +447,12 @@ def qcorr_processing(self): os.chdir(self.w_dir_main) if errortype not in ['ts_no_imag_freq','atomicbasiserror','before_E_error','isomerization','duplicate_calc','spin_contaminated','none','sp_calc']: - qcorr_calcs = qprep(destination=Path(f'{os.getcwd()}/unsuccessful_QM_outputs/run_{self.round_num}/fixed_QM_inputs'), w_dir_main=self.w_dir_main, - molecule=file_name, charge=charge, mult=mult, - program=self.program, atom_types=atom_types, - cartesians=cartesians, qm_input=keywords_line, - mem=mem, nprocs=nprocs, chk=self.chk, qm_end=self.qm_end, - bs_gen=self.bs_gen, bs=self.bs, gen_atoms=self.gen_atoms) + qprep(destination=Path(f'{os.getcwd()}/unsuccessful_QM_outputs/run_{self.round_num}/fixed_QM_inputs'), w_dir_main=self.w_dir_main, + molecule=file_name, charge=charge, mult=mult, + program=self.program, atom_types=atom_types, + cartesians=cartesians, qm_input=keywords_line, + mem=mem, nprocs=nprocs, chk=self.chk, qm_end=self.qm_end, + bs_gen=self.bs_gen, bs=self.bs, gen_atoms=self.gen_atoms) print(f'{file}: Termination = {termination}, Error type = {errortype}') self.log.write(f'{file}: Termination = {termination}, Error type = {errortype}') @@ -1141,21 +1139,21 @@ def json2input(json_files=[], w_dir_main=os.getcwd(), destination=None, suffix=' print('x The json files do not contain coordinates and/or atom type information') # if no charge and multiplicity are specified, they are read from the json file - if charge_initial == None: + if charge_initial is None: charge = cclib_data['properties']['charge'] - if mult_initial == None: + if mult_initial is None: mult = cclib_data['properties']['multiplicity'] - if charge == None: + if charge is None: print('x No charge was specified in the json file or function input (i.e. json2input(charge=0) )') - elif mult == None: + elif mult is None: print('x No multiplicity was specified in the json file or function input (i.e. json2input(mult=1) )') - json_calcs = qprep(destination=destination, w_dir_main=w_dir_main, - molecule=file_name, charge=charge, mult=mult, - program=program, atom_types=atom_types, - cartesians=cartesians, qm_input=qm_input, - mem=mem, nprocs=nprocs, chk=chk, qm_end=qm_end, - bs_gen=bs_gen, bs=bs, gen_atoms=gen_atoms, suffix=suffix) + qprep(destination=destination, w_dir_main=w_dir_main, + molecule=file_name, charge=charge, mult=mult, + program=program, atom_types=atom_types, + cartesians=cartesians, qm_input=qm_input, + mem=mem, nprocs=nprocs, chk=chk, qm_end=qm_end, + bs_gen=bs_gen, bs=bs, gen_atoms=gen_atoms, suffix=suffix) print(f'o Final input files were generated in {destination}') diff --git a/aqme/qdesc.py b/aqme/qdesc.py deleted file mode 100644 index fb4743a6..00000000 --- a/aqme/qdesc.py +++ /dev/null @@ -1,151 +0,0 @@ -#####################################################. -# This file stores all the functions # -# used for genrating all parameters # -#####################################################. - -from rdkit.Chem import AllChem as Chem -from rdkit.Chem import rdMolTransforms -import os -import pandas as pd -from aqme.csearch import getDihedralMatches - - -def get_data(rdkit_mols,min_mols,dft_mols,lot,bs,name_mol,args,type_csearch,type_min,w_dir_initial): - geom_data = pd.DataFrame() - for j, mol_j in enumerate(rdkit_mols): - name = mol_j.GetProp('_Name') - name_dft= '_'.join(mol_j.GetProp('_Name').split())+'_'+type_min - geom_data.at[j,'Name'] = name - if len(args.dihedral) != 0: - for d,dh in enumerate(args.dihedral): - dihedral_rdkit = rdMolTransforms.GetDihedralDeg(mol_j.GetConformer(),dh[0],dh[1],dh[2],dh[3]) - geom_data.at[j,args.geom_par_name+'-Dihedral-'+type_csearch+'-'+str(dh[0])+'-'+str(dh[1])+'-'+str(dh[2])+'-'+str(dh[3])] = dihedral_rdkit - if len(args.angle) != 0: - for a,an in enumerate(args.angle): - angle_rdkit = rdMolTransforms.GetAngleDeg(mol_j.GetConformer(),an[0],an[1],an[2]) - geom_data.at[j,args.geom_par_name+'-Angle-'+type_csearch+'-'+str(an[0])+'-'+str(an[1])+'-'+str(an[2])] = angle_rdkit - if len(args.bond) != 0: - for b,bd in enumerate(args.angle): - bond_rdkit = rdMolTransforms.GetBondLength(mol_j.GetConformer(),bd[0],bd[1]) - geom_data.at[j,args.geom_par_name+'-Bond-'+type_csearch+'-'+str(bd[0])+'-'+str(bd[1])] = bond_rdkit - if min_mols is not None: - if type_min =='ani' or type_min=='xtb': - for i, mol_i in enumerate(min_mols): - if mol_i.GetProp('_Name') == name+' '+type_min: - if len(args.dihedral) != 0: - for d,dh in enumerate(args.dihedral): - dihedral_min = rdMolTransforms.GetDihedralDeg(mol_i.GetConformer(),dh[0],dh[1],dh[2],dh[3]) - geom_data.at[j,args.geom_par_name+'-Dihedral-'+type_min+'-'+str(dh[0])+'-'+str(dh[1])+'-'+str(dh[2])+'-'+str(dh[3])] = dihedral_min - if len(args.angle) != 0: - for a,an in enumerate(args.angle): - angle_min = rdMolTransforms.GetAngleDeg(mol_i.GetConformer(),an[0],an[1],an[2]) - geom_data.at[j,args.geom_par_name+'-Angle-'+type_min+'-'+str(an[0])+'-'+str(an[1])+'-'+str(an[2])] = angle_min - if len(args.bond) != 0: - for b,bd in enumerate(args.angle): - bond_min = rdMolTransforms.GetBondLength(mol_i.GetConformer(),bd[0],bd[1]) - - if dft_mols is not None: - if type_min =='ani' or type_min=='xtb': - for i, mol_i in enumerate(dft_mols): - if mol_i.GetProp('_Name').split('/')[-1].split('.log')[0] == name_dft: - if len(args.dihedral) != 0: - for d,dh in enumerate(args.dihedral): - dihedral_min = rdMolTransforms.GetDihedralDeg(mol_i.GetConformer(),dh[0],dh[1],dh[2],dh[3]) - geom_data.at[j,args.geom_par_name+'-Dihedral-'+lot+'-'+bs+'-'+str(dh[0])+'-'+str(dh[1])+'-'+str(dh[2])+'-'+str(dh[3])] = dihedral_min - if len(args.angle) != 0: - for a,an in enumerate(args.angle): - angle_min = rdMolTransforms.GetAngleDeg(mol_i.GetConformer(),an[0],an[1],an[2]) - geom_data.at[j,args.geom_par_name+'-Angle-'+lot+'-'+bs+'-'+str(an[0])+'-'+str(an[1])+'-'+str(an[2])] = angle_min - if len(args.bond) != 0: - for b,bd in enumerate(args.angle): - bond_min = rdMolTransforms.GetBondLength(mol_i.GetConformer(),bd[0],bd[1]) - geom_data.at[j,args.geom_par_name+'-Bond-'+lot+'-'+bs+'-'+str(bd[0])+'-'+str(bd[1])] = bond_min - return geom_data - - -def calculate_parameters(sdf_rdkit,sdf_ani,sdf_xtb,qm_files,args,log,w_dir_initial,name_mol,lot,bs): - #creating folder for all molecules to write geom parameter - folder = w_dir_initial + '/QSTAT/geom_parameters' - try: - os.makedirs(folder) - os.chdir(folder) - except OSError: - if os.path.isdir(folder): - os.chdir(folder) - else: - raise - #get mol objects - dft_mols= [] - rdkit_mols = Chem.SDMolSupplier(sdf_rdkit, removeHs=False) - if args.rot_dihedral: - args.dihedral = getDihedralMatches(rdkit_mols[0], args.heavyonly,log) - if sdf_ani is not None: - ani_mols = Chem.SDMolSupplier(sdf_ani, removeHs=False) - if sdf_xtb is not None: - xtb_mols = Chem.SDMolSupplier(sdf_xtb, removeHs=False) - ob_compat = True - try: - import openbabel as ob - except (ModuleNotFoundError,AttributeError): - ob_compat = False - log.write('\nx Open Babel is not installed correctly, it is not possible to get molecular descriptors') - - if ob_compat: - obConversion = ob.OBConversion() - obConversion.SetInAndOutFormats("log", "mol") - ob_mol = ob.OBMol() - for file in qm_files: - if str(bs).find('/') > -1: - obConversion.ReadFile(ob_mol, args.path + str(lot) + '-' + str(bs).split('/')[0] +'/success/output_files/'+file) - obConversion.WriteFile(ob_mol, args.path + str(lot) + '-' + str(bs).split('/')[0] +'/success/output_files/'+file.split('.')[0]+'.mol') - obConversion.CloseOutFile() - dft_mols.append(Chem.MolFromMolFile(args.path + str(lot) + '-' + str(bs).split('/')[0] +'/success/output_files/'+file.split('.')[0]+'.mol', removeHs=False)) - else: - obConversion.ReadFile(ob_mol, args.path + str(lot) + '-' + str(bs) +'/success/output_files/'+file) - obConversion.WriteFile(ob_mol, args.path + str(lot) + '-' + str(bs) +'/success/output_files/'+file.split('.')[0]+'.mol') - obConversion.CloseOutFile() - dft_mols.append(Chem.MolFromMolFile(args.path + str(lot) + '-' + str(bs) +'/success/output_files/'+file.split('.')[0]+'.mol', removeHs=False)) - - if os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/rdkit/'+name_mol+'_rdkit.sdf'): - geom_data = get_data(rdkit_mols,xtb_mols,dft_mols,lot,bs,name_mol,args,'rdkit','xtb',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-rdkit-xtb.csv',index=False) - - if os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/rdkit/'+name_mol+'_rdkit.sdf'): - geom_data = get_data(rdkit_mols,ani_mols,dft_mols,lot,bs,name_mol,args,'rdkit','ani',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-rdkit-ani.csv',index=False) - - ########## - - if os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/summ/'+name_mol+'_summ.sdf'): - geom_data = get_data(rdkit_mols,xtb_mols,dft_mols,lot,bs,name_mol,args,'summ','xtb',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-summ-xtb.csv',index=False) - - if os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/summ/'+name_mol+'_summ.sdf'): - geom_data = get_data(rdkit_mols,ani_mols,dft_mols,lot,bs,name_mol,args,'summ','ani',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-summ-ani.csv',index=False) - - ############# - - if os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/fullmonte/'+name_mol+'_fullmonte.sdf'): - geom_data = get_data(rdkit_mols,xtb_mols,dft_mols,lot,bs,name_mol,args,'fullmonte','xtb',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-fullmonte-xtb.csv',index=False) - - - if os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf') and os.path.exists(w_dir_initial+'/CSEARCH/fullmonte/'+name_mol+'_fullmonte.sdf'): - geom_data = get_data(rdkit_mols,ani_mols,dft_mols,lot,bs,name_mol,args,'fullmonte','ani',w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-fullmonte-ani.csv',index=False) - - ############ - - if os.path.exists(w_dir_initial+'/CSEARCH/summ/'+name_mol+'_summ.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf'): - geom_data = get_data(rdkit_mols,None,dft_mols,lot,bs,name_mol,args,'summ',None,w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-summ.csv',index=False) - - - if os.path.exists(w_dir_initial+'/CSEARCH/rdkit/'+name_mol+'_rdkit.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf') : - geom_data = get_data(rdkit_mols,None,dft_mols,lot,bs,name_mol,args,'rdkit',None,w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-rdkit.csv',index=False) - - if os.path.exists(w_dir_initial+'/CSEARCH/fullmonte/'+name_mol+'_fullmonte.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/xtb/'+name_mol+'_xtb.sdf') and not os.path.exists(w_dir_initial+'/CSEARCH/ani/'+name_mol+'_ani.sdf') : - geom_data = get_data(rdkit_mols,None,dft_mols,lot,bs,name_mol,args,'rdkit',None,w_dir_initial) - geom_data.to_csv(name_mol+'-all-geom-data-with-fullmonte.csv',index=False) diff --git a/aqme/qprep.py b/aqme/qprep.py index 2d7c807e..a421081c 100644 --- a/aqme/qprep.py +++ b/aqme/qprep.py @@ -7,7 +7,6 @@ import os import sys -import glob import pandas as pd from rdkit import Chem from aqme.utils import load_from_yaml, Logger, move_file diff --git a/aqme/turbomole.py b/aqme/turbomole.py deleted file mode 100644 index 431afa48..00000000 --- a/aqme/turbomole.py +++ /dev/null @@ -1,1164 +0,0 @@ -import os -import subprocess -import shutil -import shlex -import re -from collections import namedtuple -import glob -from pathlib import Path - -# ensure a proper import of pybel for openbabel v2 and v3 -try: - import pybel -except ImportError: - from openbabel import pybel # for openbabel>=3.0.0 - - -class TurbomoleOutput(object): - """ - Represents the output of a turbomole calculation in a specified folder. - - Parameters - ---------- - folder : str - A valid path to the folder where the turbomole calculation has - been carried out or is in the process. - T : float, optional - Temperature in K for thermochemistry calculations, by default 298.15 - P : float, optional - Pressure in MPa for thermochemistry calculations, by default 0.1 - scale_factor : float, optional - scaling factor for thermochemistry calculations between 0 and 1, - by default 1.0 - qh_thermo : bool, optional - If True the a qh rrho formalism will be used instead of the rrho - formalism to calculate the thermochemistry, by default False - fcutoffs : tuple, optional - lower and upper limits frequency thresholds to the qh calculations in - cm^-1, by default (100,150) - - Attributes - ---------- - calc_type - calc_state - cosmo - geometry - energy - frequencies - rotational_constants - zpe - enthalpy - entropy - gibbs - """ - def __init__(self,folder,T=298.15,P=0.1,scale_factor=1.0,qh_thermo=False, - fcutoffs=(100,150)): - self.folder = Path(folder) - self.calc_type = self.guess_calculation_type() - self.calc_state = self.guess_calculation_state() - self.cosmo = self.uses_cosmo() - self.geometry = self.parse_geometry() - self.energy = self.parse_energy() - self.T = T # Temperature in K - self.P = P # pressure in MPa - self.scale_factor = scale_factor # Scaling factor for frequencies - self.qh_thermo = qh_thermo # If the qh free energy is to be calculated - self.fcutoffs = fcutoffs # Cutoffs for frequencies for qh in cm^-1 - self.frequencies = self.parse_frequencies() - self.rotational_constants = self.parse_rotational_constants() - self._zpe = None - self._enthalpy = None - self._entropy = None - self._gibbs = None - self.calc_thermo() - def guess_calculation_type(self): - """ - Guesses the type of calculation using the existence/nonexistence of - certain files. - - Returns - ------- - str - 4 possible outcomes: 'sp','opt','freq', 'opt+freq' - """ - calc_type = [] - file = self.folder / 'ridft.out' - if file.exists(): - return 'sp' - file = self.folder / 'jobex.out' - if file.exists(): - calc_type.append('opt') - file = self.folder / 'vibspectrum' - if file.exists(): - calc_type.append('freq') - return '+'.join(calc_type) - def guess_calculation_state(self): - """ - Guesses the current state of the calculation. - - Returns - ------- - str - 1 of the following states: 'running','converged','failed' - """ - if self.calc_type == 'sp': - return self._guess_calculation_state_sp() - else: - return self._guess_calculation_state_optfreq() - def _guess_calculation_state_sp(self): - """ - Guesses the current state of the calculation depending on the last line - of the ridft.out file. - """ - file = self.folder / 'ridft.out' - with open(file,'r') as F: - lines = [line.strip() for line in F if line.strip()] - - for line in lines: - if 'ridft did not converge!' in line: - return 'failed' - - if 'ridft ended normally' in lines[-1]: - return 'converged' - - return 'running' - def _guess_calculation_state_optfreq(self): - """ - Guesses the current state of the calculation depending on the existence - of the 'GEO_OP_XXXX' file. - - Returns - ------- - str - 3 accepted states: 'running','converged','failed' - """ - running_file = self.folder / 'GEO_OPT_RUNNING' - if running_file.exists(): - # Check for out of time - # if out of time -> return 'failed' - # else: - return 'running' - for item in ['converged','failed']: - file = self.folder / f'GEO_OPT_{item.upper()}' - if file.exists(): - return item - else: - raise RuntimeError('No state was found for the calculation') - - def parse_geometry(self): - """ - Finds the last geometry from de coord file and returns its representation - as a .xyz file. - - Returns - ------- - str - contents of the xyz version of the coord file without title. - """ - file = str(self.folder/'coord') - mol = next(pybel.readfile('tmol',file)) - return mol.write('xyz') - def parse_energy(self): - """ - Reads the potential energy from the appropiate location depending on the - solvation and type of calculation. - - Returns - ------- - float - Final potential energy - """ - pattern = r'\|\s*?total\s*?energy\s*?\=\s*?(\-+[0-9]*\.[0-9]*\s*?\|)' - if self.calc_type == 'sp': - file = Path('ridft.out') - else: - file = Path('job.last') - with open(self.folder / file,'r') as F: - txt = F.read() - if self.cosmo: - pattern = r'Total\senergy\s\+\sOC\scorr\.\s\=\s*?(\-+[0-9]*\.[0-9]*)\n' - else: - pattern = r'\|\s*?total\s*?energy\s*?\=\s*?(\-+[0-9]*\.[0-9]*)\s*?\|' - regex = re.compile(pattern) - matches = regex.findall(txt) - if matches: - return float(matches[-1]) - def uses_cosmo(self): - """ - Checks if the calculation uses cosmo solvation. Returns True if it finds - the 'out.cosmo' file. - """ - file = self.folder / 'out.cosmo' - if file.exists(): - return True - # Check in the control file - regex = re.compile('cosmo') - with open(self.folder/'control','r') as F: - txt = F.read() - match = regex.findall(txt) - return bool(match) - def calc_thermo(self,executable=None): - """ - Runs the appropiate executable to calculate the thermochemistry to - calculate the zpe, H, S and G at T and P and parses the output to - retrieve said magnitudes. - - Parameters - ---------- - executable : str - Absolute path to the executable script in case that it is not - accesible with shutils.which. Grimme's "thermo" script for qh and - Turbomole's "freeh" for non-qh. - """ - if self.qh_thermo: - zpe,enthalpy,entropy,gibbs = self._calc_thermo_qh(executable) - else: - zpe,enthalpy,entropy,gibbs = self._calc_thermo_noqh(executable) - self._zpe = zpe - self._enthalpy = enthalpy - self._entropy = entropy - self._gibbs = gibbs - def _calc_thermo_noqh(self,executable=None): - """ - Runs the 'freeh' command line utility provided that the it is accesible - to calculate the zpe, H, S and G at T and parses the output to retrieve - said magnitudes. - - Parameters - ---------- - executable : str - Absolute path to the executable script in case that it is not - accesible with shutils.which. - - Returns - ------- - zpe,enthalpy,entropy,gibbs - Energy corrections to the potential energy in hartree if the output - of freeh is in kJ/mol, otherwise the units are the same as the ones - provided by the freeh script. (Returns (None,)*4 if the freeh - command is not accesible) - """ - freeh = shutil.which('freeh') # turbomole's executable - if freeh is None and executable is None: - return None, None, None, None - elif executable is not None: - freeh = executable - # Run the freeh command - cwd = os.path.abspath(os.getcwd()) - os.chdir(self.folder) - tstart = tend = self.T - pstart = pend = self.P - keywords = f'tstart={tstart} tend={tend} numt=1 pstart={pstart} pend={pend} nump=1' - echo_cmd = f'echo "\n1\n{keywords}\n*\n"' - cmd = freeh - p1 = subprocess.Popen(shlex.split(echo_cmd), stdout=subprocess.PIPE) - p2 = subprocess.Popen(shlex.split(cmd), stdin=p1.stdout, - stdout=subprocess.PIPE,stderr=subprocess.DEVNULL) - p1.stdout.close() - txt = [line.decode('utf-8') for line in p2.stdout] - p2.stdout.close() - os.chdir(cwd) - # Here do the parsing of txt - _iter = txt.__iter__() - for line in _iter: - if 'your wishes are' in line: - break - # read zpe - for line in _iter: - if 'zpe=' in line: - _, zpe, zpe_unit = line.strip().split() - zpe = float(zpe) - break - else: - zpe = None - zpe_unit = None - for line in _iter: - if 'entropy' in line: - items = line.strip().split() - index_e = items.index('entropy') - index_g = items.index('chem.pot.') - _ = next(_iter) - _ = next(_iter) - line = next(_iter) - entropy = float(line.strip().split()[index_e]) - gibbs = float(line.strip().split()[index_g]) - break - else: - entropy = gibbs = None - for line in _iter: - if 'enthalpy' in line: - items = line.strip().split() - index = items.index('enthalpy') - _ = next(_iter) - line = next(_iter) - enthalpy= float(line.strip().split()[index]) - break - else: - enthalpy = None - # We assume congruent units of all properties: - if zpe_unit == 'kJ/mol': - factor = 2625.5 #kJ/mol -> a.u. - zpe = zpe/factor - enthalpy = enthalpy/factor - entropy = entropy/factor - gibbs = entropy/factor - return zpe,enthalpy,entropy,gibbs - def _calc_thermo_qh(self,executable=None): - """ - Runs the 'thermo' script (developed by Grimme's) provided that the it - accesible to calculate the zpe, H, S and G at T and parses the output to - retrieve said magnitudes. - - Parameters - ---------- - executable : str - Absolute path to the executable script in case that it is not - accesible with shutils.which - - Returns - ------- - zpe,enthalpy,entropy,gibbs - Energies in kcal/mol if the output of freeh is in kJ/mol, otherwise - the units are the same as the ones provided by the freeh script. - (Returns (None,)*4 if the freeh command is not accesible) - """ - thermo = shutil.which('thermo') # Grimme's script - if thermo is None and executable is None: - return None, None, None, None - elif executable is not None: - thermo = executable - cwd = os.path.abspath(os.getcwd()) - os.chdir(self.folder) - cutoff1,cutoff2 = self.fcutoffs - cmd = f'{thermo} {cutoff1} {cutoff2} {self.T} {self.scale_factor}\n' - p1 = subprocess.Popen(shlex.split(cmd), - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) - zpe = enthalpy = entropy = gibbs = None - for bytes in p1.stdout: - line = bytes.decode('utf-8') - if 'ZPVE' in line: - zpe = float(line.strip().split()[1]) - if 'H(T)' in line and 'H(0)' not in line: - enthalpy = float(line.strip().split()[1]) - if 'T*S' in line and len(line.split())==4: - entropy = float(line.strip().split()[1])/self.T - if 'G(T)' in line: - gibbs = float(line.strip().split()[1]) - p1.stdout.close() - p1.wait() - if Path('.H298').exists(): - os.remove('.H298') - if Path('.G298').exists(): - os.remove('.G298') - os.chdir(cwd) - return zpe, enthalpy, entropy, gibbs - def parse_frequencies(self): - """ - Reads the frequency mode and wavenumber if the turbomole calculation - has frequencies. - - Returns - ------- - list - list of tuples where the first element is the number of the - vibrational mode and the second element is the wavenumber in cm^-1 - """ - Frequency = namedtuple('frequency','mode wavenumber') - if 'freq' not in self.calc_type: - return [] - with open(self.folder/'vibspectrum','r') as F: - _iter = F.__iter__() - line = next(_iter) - while "$vibrational spectrum" not in line: - line = next(_iter) - line = next(_iter) - while line.startswith('#'): - line = next(_iter) - lines = [] - while '$end' not in line: - lines.append(line.strip().split()) - line = next(_iter) - frequencies = [Frequency(int(line[0]),float(line[2])) for line in lines if len(line)==6] - return frequencies - def parse_rotational_constants(self): - """ - Reads the rotational constants in MHz and returns them in GHz. - - Returns - ------- - list - A three element list with the rotational constants around the principal - moments of inertia in GHz. - """ - if self.cosmo: - file = self.folder / 'numforce/aoforce.out' - else: - file = self.folder / 'aoforce.out' - if not file.exists(): - return [] - pattern = r'.*?b.*?\:(.*)\s*\(MHz\)' - with open(file,'r') as F: - txt = F.read() - regex = re.compile(pattern) - match = regex.findall(txt) - if match: - line = match[-1] - items = line.split() - return [float(item)/1000.0 for item in items] - return [] - - @property - def zpe(self): - if self._zpe is None: - return None - return self.energy + self._zpe - @property - def enthalpy(self): - if self._enthalpy is None: - return None - return self.energy + self._enthalpy - @property - def entropy(self): - if self._entropy is None: - return None - return self.energy + self._entropy - @property - def gibbs(self): - if self._gibbs is None: - return None - return self.energy + self._gibbs - -class TurbomoleInput(object): - """ - Turbomole Input Generator class. This class provides a simplified interface - for automating the usage of the 'define' command of Turbomole. For more - details on the options check turbomole define's manual. - - Parameters - ---------- - folder : str - Valid path to the folder where the input is going to be generated. - If the folder does not exist it will be created. If parent folders - do not exist it will raise an error. - functional : str, optional - dft functional to use or 'hf' or 'hf-3c', by default 'TPSS'. Ensure that - the functional is written as in Turbomole. - basis : str or dict, optional - basis set to use for all atoms, by default 'def2-SVP'. - auxbasis : str or dict, optional - auxiliar basis set, by default it attempts to use the same as the - selected basis set. - ecp : dict, optional - dictionary of atom -> ecp for the calculation. - dispersion : str, optional - Whether to include dft dispersion and which kind, by default 'off'. - charge : int, optional - Formal Charge, by default 0 - multiplicity : int, optional - Overall multiplicity 's','d','t' are also accepted as arguments but - integer argument is preferred, by default 1 - grid : str, optional - DFT integration grid, by default 'm4' - epsilon : float, optional - Dielectric constant of the cosmo solvation or 'gas' or 'infinity', - by default 'gas' - maxcore : int, optional - Maximum memory per core in MB, by default 200 - ricore : int, optional - Maximum memory per core in MB for the RI formalism, by default 200 - disable_ired : bool, optional - If True it will not generate internal redundant coordinates, - by default False - cavity : str, optional - Solvent cavity definition for cosmo, by default 'fine' - title : str, optional - title of the calculation, by default '' - """ - def __init__(self,folder, functional='TPSS',basis='def2-SVP', - auxbasis=None,ecp=None,dispersion='off',charge=0, - multiplicity=1,grid='m4',epsilon='gas',maxcore=200, - ricore=200,disable_ired=False,cavity='none',title=''): - self.folder = Path(folder) - self.ofile = self.folder/f'{self.folder.stem}.sh' - self.cwd = Path(os.path.abspath(os.getcwd())) - - self.title = title - - self.functional = functional - self.basis = basis - self.auxbasis = auxbasis - self.ecp = ecp - self.dispersion = dispersion - self.charge = charge - # initialize multiplicity - self.multiplicity = multiplicity - self.check_multiplicity() - self.grid = grid - self.epsilon = epsilon - self.maxcore = maxcore - self.ricore = ricore - self.disable_ired = disable_ired - self.cavity = cavity - - self.atoms = [] - self.check_atom_list() - self.check_auxbasis_availability() - - def __str__(self): - output = f'\nInformation for directory\n\n{self.folder}\n\n' - output += f'Functional: {self.functional}\n' - output += f'Basis set: {self.basis}\n' - output += f'Auxilary basis set: {self.auxbasis}\n' - if self.dispersion == 'on': - disp = 'd3(0)' - else: - disp = self.dispersion - output += f'Dispersion: {disp}\n' - output += f'Charge: {self.charge}\n' - output += f'Multiplicity: {self.multiplicity}\n' - output += f'ricore: {self.ricore}\n' - output += f'maxcore: {self.maxcore}\n' - output += f'Grid: {self.grid}\n' - #Infinity, gas or a number a.bcd - if not self.cavity : - output += f"COSMO settings: {self.epsilon} with {self.cavity}\n" - else : - output += f"COSMO settings: {self.epsilon}\n" - - return output - - # Initialization methods - def check_multiplicity(self): - """ - Checks the 'multiplicity' attribute and ensures proper values for the - 'multiplicity', 'is_closed_shell' and 'unpaired_electrons' attributes. - """ - # Ensure that multiplicity is a integer - m = self.multiplicity - if m == 's': - multiplicity = 1 - elif m == 'd': - multiplicity = 2 - elif m == 't': - multiplicity = 3 - else: - multiplicity = int(m) - # Calculate check for restricted/unrestricted and number of unpaired - # electrons - if multiplicity != 1: - is_closed_shell = False - unpaired_electrons = multiplicity-1 - else: - is_closed_shell = True - unpaired_electrons = 0 - # Assign the new object attributes - self.multiplicity = multiplicity - self.is_closed_shell = is_closed_shell - self.unpaired_electrons = unpaired_electrons - def check_atom_list(self): - """ - Gets a list of the different atoms present in the molecule in the 'atoms' - attribute. - """ - coord_file = self.folder/'coord' - if coord_file.exists(): - mol = next(pybel.readfile('tmol',str(coord_file))) - lines = mol.write('xyz').split('\n') - else: - xyzfiles = glob.glob(str(self.folder)+'/*.xyz') - if xyzfiles: - with open(xyzfiles[0],'r') as F: - lines = F.read().split('\n') - else: - return - - atoms = set([line.strip().split()[0] for line in lines[2:] if line.strip()]) - self.atoms = [element.lower() for element in list(atoms)] - def check_auxbasis_availability(self): - """ - Checks availability of the auxiliar basis sets in the jbasen and jkbasen - directories in of turbomole. If the auxiliar basis set is not specified - it assumes that the auxiliar basis set = basis set. - - Raises - ------ - ValueError - If a basis set for a certain set of atoms is not specified. - """ - if self.auxbasis is None and self.basis == 'minix': - self.auxbasis = 'universal' - elif self.auxbasis is None: - self.auxbasis = self.basis - - if type(self.auxbasis) == str: - self._check_auxbasis_availability_all() - else: - self._check_auxbasis_availability_per_atom() - - def _check_auxbasis_availability_all(self): - """ - Checks availability of the auxiliar basis sets in the jbasen and jkbasen - directories in of turbomole. If the auxiliar basis set is not specified - it assumes that the auxiliar basis set = basis set. Assumes both the - basis and auxiliar basis are specified for "all" atoms. - - Raises - ------ - ValueError - If a basis set for a certain set of atoms is not specified. - """ - turbodir = Path(os.environ['TURBODIR']) - self.use_jkbasis_as_jbasis = False - found_list = [] - for atom in self.atoms: - jbasen = turbodir/f'jbasen/{atom}' - jkbasen = turbodir/f'jkbasen/{atom}' - basis_name = f'{atom} {self.auxbasis}' - found_jbasis = self.find_basis_in_file(basis_name,jbasen) - found_jkbasis = self.find_basis_in_file(basis_name,jkbasen) - found_list.append((atom,found_jbasis,found_jkbasis)) - - errors = [] - for atom,found_jbasis,found_jkbasis in found_list: - if not found_jbasis and not found_jkbasis: - errors.append((atom,self.auxbasis)) - elif not found_jbasis: - self.use_jkbasis_as_jbasis = True - - if errors: - msg = ','.join([f"'{atom} {basis}'" for atom,basis in errors]) - raise ValueError(f'Basis sets [{msg}] are not found in jkbasen nor jbasen') - def _check_auxbasis_availability_per_atom(self): - """ - Checks availability of the auxiliar basis sets in the jbasen and jkbasen - directories in of turbomole. If the auxiliar basis set is not specified - it assumes that the auxiliar basis set = basis set. Assumes both the - basis and auxiliar basis are specified per each atom as a dictionary. - - Raises - ------ - ValueError - If a basis set for a certain set of atoms is not specified. - """ - turbodir = Path(os.environ['TURBODIR']) - self.use_jkbasis_as_jbasis = False - found_list = [] - for atom,auxbasis in self.auxbasis.items(): - sym = atom.lower() - jbasen = turbodir/f'jbasen/{sym}' - jkbasen = turbodir/f'jkbasen/{sym}' - basis_name = f'{sym} {auxbasis}' - found_jbasis = found_jkbasis = False - if jbasen.exists(): - found_jbasis = self.find_basis_in_file(basis_name,jbasen) - if jkbasen.exists(): - found_jkbasis = self.find_basis_in_file(basis_name,jkbasen) - found_list.append((sym,auxbasis,found_jbasis,found_jkbasis)) - - errors = [] - for sym,auxbasis,found_jbasis,found_jkbasis in found_list: - if not found_jbasis and not found_jkbasis: - errors.append((sym,auxbasis)) - elif not found_jbasis: - self.use_jkbasis_as_jbasis = True - - if errors: - msg = ','.join([f"'{atom} {basis}'" for atom,basis in errors]) - raise ValueError(f'Basis sets [{msg}] are not found in jkbasen nor jbasen') - - @staticmethod - def find_basis_in_file(basis,file): - """ - Attempts to find a basis set in the specified file. - - Parameters - ---------- - basis : str - basis set in the format '[sym] [basis name]' where the symbol is - lowercased. - file : Path - Path to the file where the basis is to be searched. - - Returns - ------- - bool - True if the basis is found within the file. - """ - assert file.exists() - with open(file,'r') as F: - for line in F: - if basis in line: - return True - return False - - # Generation methods - def write_input_command(self,script): - """ - Writes a file named 'input_command' with a line of text that contains - the necessary options used for the generation of the input file. - It is legacy code for input generation scripts. - - Parameters - ---------- - script : str - name of the python script used to generate the turbomole input. If - none it uses this file's name. - """ - if script is None: - script = __file__ - options = [('f',self.functional), - ('bs',self.basis), - ('d',self.dispersion), - ('c',self.charge), - ('u',self.multiplicity), - ('rm',self.ricore), - ('m',self.maxcore), - ('g',self.grid), - ('s',self.epsilon)] - if not self.disable_ired: - options.append(('ired','')) - if self.cavity == 'fine': - options.append(('fine','')) - - input_str = f'#python {script} ' - input_str += ' '.join([f'-{k} {val}' for k,val in options]) - with open(self.folder/'input_command','w') as F: - F.write(input_str) - def create_coord(self): - """ - Ensures the existence of a 'coord' file. Uses pybel to transform from - xyz to tmol format in case that a 'coord' file does not exist but a .xyz - does exist. - In case of several .xyz files (not recommended), it will use the first - one in alphabetical order. - - Raises - ------ - Run - If no xyz or coord file is found in the folder it will stop - completely. - """ - coord_file = self.folder/'coord' - if coord_file.exists(): - return - #print("File coord was not found in the folder.") - xyzfiles = glob.glob(str(self.folder)+'/*.xyz') - if xyzfiles: - #print("File coord will be generated from an .xyz-file.") - mol = next(pybel.readfile('xyz',xyzfiles[0])) - mol.write('tmol',str(self.folder/'coord')) - else: - msg = "There is no .xyz-file either in the folder. Exiting the script." - raise RuntimeError(msg) - - # define's interfacing methods - def write_geometry_menu(self): - """ - Encapsulates the interactions with 'define' related with the - geometry menu and returns a string with those interactions. - """ - output = 'a coord\n' # Add coordinates file 'coord' - if not self.disable_ired: # - output += '*\nno\n' - else: - output += 'ired\n*\n' - return output - def write_atom_attr_menu(self): - """ - Encapsulates the interactions with 'define' related with the - Atom Attributes menu (Essentially, Isotope definition and Basis set - definition) and returns a string with those interactions. - """ - if type(self.basis) == str: - return f'b all {self.basis}\n*\n' # Asign the basis set to all atoms and exit - output = '' - for key,val in self.basis.items(): - output += f'b "{key}" {val}\n' # Add basis set per atom - if self.ecp is not None: - ecps = [(k,val) for k,val in self.ecp.items()] - output += 'ecp\n' # Enter ecp menu - if ecps: - for key,val in ecps: - output += f'"{key}" {val}\n' # Add ecps - output += '\n' # Exit the ecp menu - output += '*\n' # Exit the atom attribute menu - return output - def write_occupation_MO_menu(self): - """ - Encapsulates the interactions with 'define' related with the - Occupation and Molecular Orbitals menu and returns a string with those - interactions. - """ - output = '' - if 'cu' in self.atoms: - # RAUL: Maybe there is an extra newline between eht and charge - output = f'eht\n\n\n{self.charge}\n' - else: - output = f'eht\n\n{self.charge}\n' - - if not self.is_closed_shell and self.unpaired_electrons != 0: - output += f'n\nu {self.unpaired_electrons}\n*\n\n' - elif not self.is_closed_shell: - output += f'n\n{self.unpaired_electrons}\n*\n\n' - else: - output += f'y\n' - - return output - - def write_dft_submenu(self): - """ - Encapsulates the interactions with 'define' related with the - DFT submenu within the General Menu and returns a string with those - interactions. - """ - if self.functional in ['hf','hf-3c']: - return '' - output = 'dft\n' # Enter the DFT menu - output += 'on\n' # Enable DFT - output += f'func {self.functional}\n' # Set the functional - output += f'grid {self.grid}\n' # Set the integration grid - output += f'\n' # End the DFT menu - return output - def write_auxbasis_submenu(self): - """ - Encapsulates the interactions with 'define' related with the - Auxiliary Basis sets submenu within the RI menu and returns a - string with those interactions. - """ - if type(self.auxbasis) == str: - basis_definition = '' - if self.basis != self.auxbasis: - for atom in self.atoms: - basis_definition += f'nick {atom} {self.auxbasis}\n' - basis_definition += f'b all {self.auxbasis}\n' - else: - basis_definition = '' - if self.basis != self.auxbasis: - for atom,auxbasis in self.basis.items(): - basis_definition += f'nick {atom} {auxbasis}\n' - for atom,auxbasis in self.basis.items(): - basis_definition += f'b "{atom}" {auxbasis}\n' - return f'jbas\n{basis_definition}*\n' - def write_ri_submenu(self): - """ - Encapsulates the interactions with 'define' related with the - RI submenu within the General menu and returns a string with those - interactions. - """ - # TODO LEGACY CODE, this comment should be removed when we are - # sure that it is definetly not usefull. - ## hybrid_or_HF = ['b2-plyp','b3-lyp','tpssh','pw6b95', - ## 'pbe0','m06-2x','bhlyp','hf','hf-3c'] - ## is_hybrid_or_HF = self.functional in hybrid_or_HF - - if self.use_jkbasis_as_jbasis: - output = 'rijk\n' # Enter the RI-JK-HF menu - output += 'on\n' # Activate it - #output += f'm {self.ricore}\n' # Set the maximum memory per core in MB - output += '\n' # Exit the RI-JK-HF menu - return output - - auxbasis_menu = self.write_auxbasis_submenu() - output = 'ri\n' # Enter the RI menu - output += 'on\n' # Activate it - output += f'm {self.ricore}\n' # Set the maximum memory per core in MB - output += f'{auxbasis_menu}' # Enter and exit the auxbasis submenu - output += '\n' # Exit the RI menu - return output - def write_multipole_menu(self): - """ - Encapsulates the interactions with 'define' related with the - Multipole submenu within the General menu and returns a string with - those interactions. - """ - output = 'marij\n' # Enter the multipole RI menu - output += '\n' # Accept defaults and exit - return output - def write_dispersion_menu(self): - """ - Encapsulates the interactions with 'define' related with the - Dispersion submenu within the General menu and returns a string with - those interactions. - """ - output = 'dsp\n' # Enter the dispersion menu - output += f'{self.dispersion}\n' # Set the dispersion - output += '\n' # End the menu - return output - def write_ricc_menu(self): - """ - Encapsulates the interactions with 'define' related with the - RICC submenu within the General menu and returns a string with those - interactions. - """ - output = 'cc\n' # Enter the ricc menu - output += f'memory {self.maxcore}' # Set the maximum memory per core - output += '*\n' # End the ricc menu - return f'cc\nmemory {self.maxcore}\n*\n' - def write_scf_menu(self): - """ - Encapsulates the interactions with 'define' related with the - SCF submenu within the General menu and returns a string with those - interactions. - """ - output = 'scf\n' # Enter the scf menu - output += 'iter\n' # Select the max iterations property for editing - output += '300\n' # Set it to 300 iterations - output += '\n' # End the scf menu - return output - def write_general_menu(self): - """ - Encapsulates the interactions with 'define' related with the - the General menu and centralizes all the interactions of its submenus. - returns a string with all interactions. - """ - dft_menu = self.write_dft_submenu() - ri_menu = self.write_ri_submenu() - multipole_menu = self.write_multipole_menu() - dispersion_menu = self.write_dispersion_menu() - ricc_menu = self.write_ricc_menu() - scf_menu = self.write_scf_menu() - - general_menu = '' - general_menu += dft_menu - general_menu += ri_menu - general_menu += multipole_menu - general_menu += dispersion_menu - general_menu += ricc_menu - general_menu += scf_menu - general_menu += '*\n' - - return general_menu - - def generate_cosmoprep_string(self): - """ - Centralizes the generation of all the interactions with the 'cosmoprep' - command and returns them in the appropiate format as a single string. - """ - if self.epsilon == 'gas': - return '' # No-solvation - output = '\n'*12 - output += 'r all b\n' - output += '*\n' - output += 'out.cosmo\n' - output += 'n\n' - output += '\n' - return output - def generate_define_string(self): - """ - Centralizes the generation of all the interactions with the 'define' - command and returns them in the appropiate format as a single string. - """ - molecular_geom = self.write_geometry_menu() - atom_attrib = self.write_atom_attr_menu() - ocupation_MO = self.write_occupation_MO_menu() - general_menu = self.write_general_menu() - - exec_str = f'\n{self.title}\n' - exec_str += f'{molecular_geom}' - exec_str += f'{atom_attrib}' - exec_str += f'{ocupation_MO}' - - exec_str += general_menu - exec_str += '*\n' - return exec_str - - # Post-generation modification methods - def modify_generated_control_file(self): - """ - Modifies the generated control file: - - Adds a scftol of 1d-16 - - Adds the cosmo_isorad keyword when needed - - Ensures that if the jkbasis were used, they are treated as jbasis and - removes the rik keyword - - Ensures proper dispersion treatment with hf-3c. - """ - with open(self.folder/'control','r') as F: - txt = F.read() - - head = f'$scftol 1d-16\n' - if self.cavity == 'fine': - head += '$cosmo_isorad\n' - - txt = head + txt - - if self.use_jkbasis_as_jbasis: - newtxt = txt.replace('jkbas','jbas') - lines = [line for line in newtxt.split('\n') if '$rik' not in line] - txt = '\n'.join(lines) - - if self.functional == 'hf-3c': - txt = txt.replace('disp3 bj','disp3 bj func hf-3c') - - with open(self.folder/'control','w') as F: - F.write(txt) - def modify_generated_auxbasis_file(self): - """ - Modifies the generated auxbasis file: - - If no jkbases were used, it does nothing - - Otherwise ensures that any jkbasis is treated as jbasis - """ - if not self.use_jkbasis_as_jbasis: - return - - with open(self.folder/'auxbasis','r') as F: - txt = F.read() - - newtxt = txt.replace('jkbas','jbas') - - with open(self.folder/'auxbasis','w') as F: - F.write(newtxt) - def modify_generated_files(self): - """ - Applies custom post-modifications to generated files. Currently only - the control and auxbasis files might be modified. - """ - self.modify_generated_control_file() - self.modify_generated_auxbasis_file() - - # Main API - def generate(self): - """ - Ensures the existence of the coord file, and generates the input - interacting with 'define' and 'cosmoprep' commands. Requires that - 'define' and 'cosmoprep' are accessible at the current path. - """ - self.create_coord() - define_str = self.generate_define_string() - cosmo_str = self.generate_cosmoprep_string() - # the define command requires to be started in the folder of the - # calculation - os.chdir(self.folder) - # Run the define and the cosmoprep processes - p_echo1 = subprocess.Popen(shlex.split(f'echo "{define_str}"'), - stdout=subprocess.PIPE) - p_define = subprocess.Popen(shlex.split('define'), - stdin=p_echo1.stdout, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) - p_echo1.wait() - p_define.wait() - p_echo1.stdout.close() - p_define.stdout.close() - - if cosmo_str: - p_echo2 = subprocess.Popen(shlex.split(f'echo "{cosmo_str}"'), - stdout=subprocess.PIPE) - p_cosmoprep = subprocess.Popen(shlex.split('cosmoprep'), - stdin=p_echo2.stdout, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) - p_echo2.wait() - p_echo2.stdout.close() - p_cosmoprep.wait() - p_cosmoprep.stdout.close() - - # After the define and cosmoprep are finished return to the original - # directory - os.chdir(self.cwd) - - # Apply the post-modifications to the generated files (will look for the - # files in the folder, so no need to be in the same directory). - self.modify_generated_files() - - -def read_basiset_file(filepath): - """ - Reads a file containing information on basis sets, auxiliar basis sets and - ECPs. The file follows the following format: - $basis - sym1 basis1 - sym2 basis2 - ... - $auxbasis - sym1 auxbasis1 - sym2 auxbasis2 - ... - $ecp - sym1 ecp1 - sym2 ecp2 - ... - $end - - Parameters - ---------- - filepath : str or Path - a valid path to a file with the previously specified format. - - Returns - ------- - dict or None - basis, auxbasis, ecp - """ - regex = r'\$([^\$]*)' - regex = re.compile(regex) - with open(filepath,'r') as F: - txt = F.read() - keywords = dict() - kwblocks = regex.findall(txt) - for block in kwblocks: - lines = [line for line in block.split('\n') if line] - kw_line = lines[0] - keyword = kw_line.split(' ')[0] - if keyword == 'end': - continue - keywords[keyword] = dict() - for line in lines[1:]: - sym,basis = line.split(' ',1) - keywords[keyword][sym.lower()] = basis.strip() - basis = keywords.get('basis',None) - auxbasis = keywords.get('auxbasis',None) - ecp = keywords.get('ecp',None) - return basis,auxbasis,ecp - -def create_tmol_input_opt_from_args(base_folder,args): - folder_name = '' - folder = f'{base_folder}/{folder_name}' - - kwargs = dict() - kwargs['functional'] = args.tmfunctional - if args.tmbasis is not None: - kwargs['basis'] = args.tmbasis - else: - basis,auxbasis,ecp = read_basiset_file(args.tmbasisfile) - kwargs['basis'] = basis - kwargs['auxbasis'] = auxbasis - kwargs['ecp'] = ecp - - kwargs['dispersion'] = args.tmdispersion - kwargs['charge'] = 0 - kwargs['multiplicity'] = 1 - kwargs['grid'] = args.tmgrid - kwargs['epsilon'] = args.tmepsilon - kwargs['maxcore'] = args.tmmaxcore - kwargs['ricore'] = args.tmricore - kwargs['cavity'] = args.tmcavity - kwargs['title'] = '' - tmol_input = TurbomoleInput(folder,**kwargs) - tmol_input.generate() - -def create_tmol_input_SP_from_args(base_folder,args): - folder_name = '' - folder = f'{base_folder}/{folder_name}' - - kwargs = dict() - kwargs['functional'] = args.tmfunctionalsp - if args.tmbasissp is not None: - kwargs['basis'] = args.tmbasissp - else: - basis,auxbasis,ecp = read_basiset_file(args.tmbasisfilesp) - kwargs['basis'] = basis - kwargs['auxbasis'] = auxbasis - kwargs['ecp'] = ecp - - kwargs['dispersion'] = args.tmdispersion - kwargs['charge'] = 0 - kwargs['multiplicity'] = 1 - kwargs['grid'] = args.tmgrid - kwargs['epsilon'] = args.tmepsilon - kwargs['maxcore'] = args.tmmaxcore - kwargs['ricore'] = args.tmricore - kwargs['cavity'] = args.tmcavity - kwargs['title'] = '' - tmol_input = TurbomoleInput(folder,**kwargs) - tmol_input.generate() \ No newline at end of file diff --git a/aqme/utils.py b/aqme/utils.py index 295f3c75..1241902e 100644 --- a/aqme/utils.py +++ b/aqme/utils.py @@ -1,7 +1,6 @@ """ This module contains some classes and functions that are used from other modules """ -import shutil import sys import subprocess import numpy as np @@ -517,7 +516,6 @@ def get_info_input(file): # Read charge and multiplicity charge = line.strip().split()[-2] - mult = line.strip().split()[-1] # Store the coordinates until next * atoms_and_coords = [] @@ -1081,8 +1079,6 @@ def check_isomerization( True there is a clearly distorted bond within the geometries. """ - isomerized, diff = None, None - # load connectivity matrix from the starting points and convert string into matrix if not init_csv.empty: filename = file.replace("_" + file.split("_")[-1], "")