diff --git a/atomisticparsers/asap/parser.py b/atomisticparsers/asap/parser.py index 7dca75ef..f9fd81e1 100644 --- a/atomisticparsers/asap/parser.py +++ b/atomisticparsers/asap/parser.py @@ -25,12 +25,8 @@ from nomad.units import ureg from nomad.parsing.file_parser import FileParser from runschema.run import Run, Program -from runschema.method import ( - Method, ForceField, Model -) -from simulationworkflowschema import ( - GeometryOptimization, GeometryOptimizationMethod -) +from runschema.method import Method, ForceField, Model +from simulationworkflowschema import GeometryOptimization, GeometryOptimizationMethod from atomisticparsers.utils import MDParser from .metainfo.asap import MolecularDynamics # pylint: disable=unused-import @@ -43,21 +39,21 @@ def __init__(self): def traj(self): if self._file_handler is None: try: - self._file_handler = Trajectory(self.mainfile, 'r') + self._file_handler = Trajectory(self.mainfile, "r") # check if traj file is really asap - if 'calculator' in self._file_handler.backend.keys(): - if self._file_handler.backend.calculator.name != 'emt': # pylint: disable=E1101 - self.logger.error('Trajectory is not ASAP.') + if "calculator" in self._file_handler.backend.keys(): + if self._file_handler.backend.calculator.name != "emt": # pylint: disable=E1101 + self.logger.error("Trajectory is not ASAP.") self._file_handler = None except Exception: - self.logger.error('Error reading trajectory file.') + self.logger.error("Error reading trajectory file.") return self._file_handler def get_version(self): - if hasattr(self.traj, 'ase_version') and self.traj.ase_version: + if hasattr(self.traj, "ase_version") and self.traj.ase_version: return self.traj.ase_version else: - return '3.x.x' + return "3.x.x" def parse(self): pass @@ -68,10 +64,6 @@ def __init__(self): self.traj_parser = TrajParser() super().__init__() - def init_parser(self): - self.traj_parser.mainfile = self.filepath - self.traj_parser.logger = self.logger - def parse_method(self): traj = self.traj_parser.traj sec_method = Method() @@ -80,71 +72,72 @@ def parse_method(self): if traj[0].calc is not None: sec_method.force_field = ForceField(model=[Model(name=traj[0].calc.name)]) - description = traj.description if hasattr(traj, 'description') else dict() + description = traj.description if hasattr(traj, "description") else dict() if not description: return - calc_type = description.get('type') - if calc_type == 'optimization': + calc_type = description.get("type") + if calc_type == "optimization": workflow = GeometryOptimization(method=GeometryOptimizationMethod()) - workflow.x_asap_maxstep = description.get('maxstep', 0) - workflow.method.method = description.get('optimizer', '').lower() + workflow.x_asap_maxstep = description.get("maxstep", 0) + workflow.method.method = description.get("optimizer", "").lower() self.archive.workflow2 = workflow - elif calc_type == 'molecular-dynamics': + elif calc_type == "molecular-dynamics": data = {} - data['x_asap_timestep'] = description.get('timestep', 0) - data['x_asap_temperature'] = description.get('temperature', 0) - md_type = description.get('md-type', '') + data["x_asap_timestep"] = description.get("timestep", 0) + data["x_asap_temperature"] = description.get("temperature", 0) + md_type = description.get("md-type", "") thermodynamic_ensemble = None - if 'Langevin' in md_type: - data['x_asap_langevin_friction'] = description.get('friction', 0) - thermodynamic_ensemble = 'NVT' - elif 'NVT' in md_type: - thermodynamic_ensemble = 'NVT' - elif 'Verlet' in md_type: - thermodynamic_ensemble = 'NVE' - elif 'NPT' in md_type: - thermodynamic_ensemble = 'NPT' - data['method'] = {'thermodynamic_ensemble': thermodynamic_ensemble} + if "Langevin" in md_type: + data["x_asap_langevin_friction"] = description.get("friction", 0) + thermodynamic_ensemble = "NVT" + elif "NVT" in md_type: + thermodynamic_ensemble = "NVT" + elif "Verlet" in md_type: + thermodynamic_ensemble = "NVE" + elif "NPT" in md_type: + thermodynamic_ensemble = "NPT" + data["method"] = {"thermodynamic_ensemble": thermodynamic_ensemble} self.parse_md_workflow(data) - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.maindir = os.path.dirname(self.filepath) - self.logger = logger if logger is not None else logging - - self.init_parser() - + def write_to_archive(self) -> None: + self.traj_parser.mainfile = self.mainfile if self.traj_parser.traj is None: return sec_run = Run() self.archive.run.append(sec_run) - sec_run. program = Program(name='ASAP', version=self.traj_parser.get_version()) + sec_run.program = Program(name="ASAP", version=self.traj_parser.get_version()) # TODO do we build the topology and method for each frame self.parse_method() # set up md parser - self.n_atoms = [traj.get_global_number_of_atoms() for traj in self.traj_parser.traj] - steps = [(traj.description if hasattr(traj, 'description') else dict()).get( - 'interval', 1) * n for n, traj in enumerate(self.traj_parser.traj)] + self.n_atoms = [ + traj.get_global_number_of_atoms() for traj in self.traj_parser.traj + ] + steps = [ + (traj.description if hasattr(traj, "description") else dict()).get( + "interval", 1 + ) + * n + for n, traj in enumerate(self.traj_parser.traj) + ] self.trajectory_steps = steps self.thermodynamics_steps = steps def get_constraint_name(constraint): def index(): - d = constraint['kwargs'].get('direction') + d = constraint["kwargs"].get("direction") return ((d / np.linalg.norm(d)) ** 2).argsort()[2] - name = constraint.get('name') - if name == 'FixedPlane': - return ['fix_yz', 'fix_xz', 'fix_xy'][index()] - elif name == 'FixedLine': - return ['fix_x', 'fix_y', 'fix_z'][index()] - elif name == 'FixAtoms': - return 'fix_xyz' + name = constraint.get("name") + if name == "FixedPlane": + return ["fix_yz", "fix_xz", "fix_xy"][index()] + elif name == "FixedLine": + return ["fix_x", "fix_y", "fix_z"][index()] + elif name == "FixAtoms": + return "fix_xyz" else: return name @@ -160,11 +153,25 @@ def index(): constraints = [] for constraint in traj.constraints: as_dict = constraint.todict() - indices = as_dict['kwargs'].get('a', as_dict['kwargs'].get('indices')) - constraints.append(dict(atom_indices=np.asarray(indices), kind=get_constraint_name(as_dict))) - self.parse_trajectory_step(dict(atoms=dict( - lattice_vectors=lattice_vectors, labels=labels, positions=positions, - periodic=periodic, velocities=velocities), constraint=constraints)) + indices = as_dict["kwargs"].get("a", as_dict["kwargs"].get("indices")) + constraints.append( + dict( + atom_indices=np.asarray(indices), + kind=get_constraint_name(as_dict), + ) + ) + self.parse_trajectory_step( + dict( + atoms=dict( + lattice_vectors=lattice_vectors, + labels=labels, + positions=positions, + periodic=periodic, + velocities=velocities, + ), + constraint=constraints, + ) + ) for step in self.thermodynamics_steps: traj = self.traj_parser.traj[steps.index(step)] @@ -174,6 +181,9 @@ def index(): forces = forces * ureg.eV / ureg.angstrom if (forces_raw := traj.get_forces(apply_constraint=False)) is not None: forces_raw * ureg.eV / ureg.angstrom - self.parse_thermodynamics_step(dict( - energy=dict(total=dict(value=total_energy)), - forces=dict(total=dict(value=forces, value_raw=forces_raw)))) + self.parse_thermodynamics_step( + dict( + energy=dict(total=dict(value=total_energy)), + forces=dict(total=dict(value=forces, value_raw=forces_raw)), + ) + ) diff --git a/atomisticparsers/bopfox/parser.py b/atomisticparsers/bopfox/parser.py index fed43eb8..f022b7ed 100644 --- a/atomisticparsers/bopfox/parser.py +++ b/atomisticparsers/bopfox/parser.py @@ -23,23 +23,33 @@ from nomad.units import ureg from nomad.parsing.file_parser import TextParser, Quantity, DataTextParser from runschema.run import Run, Program -from runschema.method import ( - Method, TB, xTB, ForceField, Model, Interaction -) +from runschema.method import Method, TB, xTB, ForceField, Model, Interaction from runschema.system import System, Atoms from runschema.calculation import ( - Calculation, Energy, EnergyEntry, Forces, ForcesEntry, Stress, StressEntry, Charges, - ChargesValue + Calculation, + Energy, + EnergyEntry, + Forces, + ForcesEntry, + Stress, + StressEntry, + Charges, + ChargesValue, ) from simulationworkflowschema import ( - GeometryOptimization, GeometryOptimizationMethod, SinglePoint + GeometryOptimization, + GeometryOptimizationMethod, + SinglePoint, ) from atomisticparsers.utils import MDParser -from atomisticparsers.bopfox.metainfo.bopfox import x_bopfox_onsite_levels, x_bopfox_onsite_levels_value +from atomisticparsers.bopfox.metainfo.bopfox import ( + x_bopfox_onsite_levels, + x_bopfox_onsite_levels_value, +) -re_f = r'[-+]?\d*\.\d*(?:[Ee][-+]\d+)?' -re_n = r'[\n\r]' +re_f = r"[-+]?\d*\.\d*(?:[Ee][-+]\d+)?" +re_n = r"[\n\r]" class ModelsbxParser(TextParser): @@ -47,36 +57,43 @@ def init_quantities(self): def to_parameters(val_in): parameters = dict() for val in val_in.strip().splitlines(): - if val.startswith('!'): + if val.startswith("!"): continue - val = val.split('=') + val = val.split("=") if len(val) == 2: val[1] = val[1].split() - parameters[val[0].strip().lower()] = val[1][0] if len(val[1]) == 1 else val[1] + parameters[val[0].strip().lower()] = ( + val[1][0] if len(val[1]) == 1 else val[1] + ) return parameters self._quantities = [ Quantity( - 'model', - rf'(el *= *\w+ *{re_n}[\s\S]+?)(?:{re_n} *mod|\Z)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('name', r'el *= *(\S+)', dtype=str), - Quantity( - 'parameters', - r'((?:[\w!]+ *= *\w+\s+)+)', - str_operation=to_parameters - ), - Quantity( - 'atom', - r'([aA]tom *= *[A-Z]\S*[\s\S]+?(?:[\w\!]+ *= *[\w\. \-\+]+\s+)+)', - repeats=True, str_operation=to_parameters - ), - Quantity( - 'bond', - r'([bB]ond *= *[A-Z]\S* +[A-Z]\S*[\s\S]+?(?:[\w\!]+ *= *[\w\. \-\+]+\s+)+)', - repeats=True, str_operation=to_parameters - ) - ]) + "model", + rf"(el *= *\w+ *{re_n}[\s\S]+?)(?:{re_n} *mod|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity("name", r"el *= *(\S+)", dtype=str), + Quantity( + "parameters", + r"((?:[\w!]+ *= *\w+\s+)+)", + str_operation=to_parameters, + ), + Quantity( + "atom", + r"([aA]tom *= *[A-Z]\S*[\s\S]+?(?:[\w\!]+ *= *[\w\. \-\+]+\s+)+)", + repeats=True, + str_operation=to_parameters, + ), + Quantity( + "bond", + r"([bB]ond *= *[A-Z]\S* +[A-Z]\S*[\s\S]+?(?:[\w\!]+ *= *[\w\. \-\+]+\s+)+)", + repeats=True, + str_operation=to_parameters, + ), + ] + ), ) ] @@ -85,23 +102,33 @@ class StrucbxParser(TextParser): def init_quantities(self): def to_magnetisation(val_in): val = val_in.strip().splitlines() - magnetisation = val[0].lower().startswith('t') - values = np.array([v.strip().split() for v in val[1:]], dtype=np.dtype(np.float64)) + magnetisation = val[0].lower().startswith("t") + values = np.array( + [v.strip().split() for v in val[1:]], dtype=np.dtype(np.float64) + ) return magnetisation, values self._quantities = [ - Quantity('lattice_constant', rf'[aA][lL][aA][tT] *= *({re_f})', dtype=np.float64), Quantity( - 'lattice_vectors', - rf'[aA]\d+ *= *({re_f} +{re_f} +{re_f})', - repeats=True, dtype=np.dtype(np.float64)), - Quantity('coordinate_type', r'[cC][oO][oO][rR][dD] *= *(\S+)', dtype=str), - Quantity('label_position', rf'{re_n} *([A-Z][a-z]* +{re_f} +{re_f} +{re_f})', repeats=True), + "lattice_constant", rf"[aA][lL][aA][tT] *= *({re_f})", dtype=np.float64 + ), Quantity( - 'magnetisation', - rf'[mM][aA][gG][nN][eE][tT][iI][sS][aA][tT][iI][oO][nN] *= *(\S+[\s\d\.]+)', - str_operation=to_magnetisation - ) + "lattice_vectors", + rf"[aA]\d+ *= *({re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + Quantity("coordinate_type", r"[cC][oO][oO][rR][dD] *= *(\S+)", dtype=str), + Quantity( + "label_position", + rf"{re_n} *([A-Z][a-z]* +{re_f} +{re_f} +{re_f})", + repeats=True, + ), + Quantity( + "magnetisation", + rf"[mM][aA][gG][nN][eE][tT][iI][sS][aA][tT][iI][oO][nN] *= *(\S+[\s\d\.]+)", + str_operation=to_magnetisation, + ), ] # def get_positions(self): @@ -120,7 +147,7 @@ def init_quantities(self): def to_frame(val_in): val = val_in.strip().splitlines() n_atoms = int(val[0]) - md = 'fs' in val[1] + md = "fs" in val[1] step = float(val[1].split()[0]) labels = [] positions = np.zeros((n_atoms, 3)) @@ -128,7 +155,7 @@ def to_frame(val_in): energies = np.zeros(n_atoms) forces = np.zeros((n_atoms, 3)) stresses = np.zeros((n_atoms, 6)) - for n, line in enumerate(val[2: 2 + n_atoms]): + for n, line in enumerate(val[2 : 2 + n_atoms]): line = line.split() labels.append(line[0]) positions[n] = line[1:4] @@ -140,177 +167,233 @@ def to_frame(val_in): forces[n] = line[6:9] stresses[n] = line[9:15] return dict( - n_atoms=n_atoms, step=step, labels=labels, positions=positions, - constraints=constraints, energies_total=energies, forces_total=forces, - stresses_total=stresses) + n_atoms=n_atoms, + step=step, + labels=labels, + positions=positions, + constraints=constraints, + energies_total=energies, + forces_total=forces, + stresses_total=stresses, + ) self._quantities = [ Quantity( - 'frame', - rf'(\d+\s+\d+.*?\s+(?:[A-Z][a-z]* +{re_f} +{re_f} +{re_f}.+\s+)+)', - repeats=True, str_operation=to_frame) + "frame", + rf"(\d+\s+\d+.*?\s+(?:[A-Z][a-z]* +{re_f} +{re_f} +{re_f}.+\s+)+)", + repeats=True, + str_operation=to_frame, + ) ] class InfoxParser(TextParser): def init_quantities(self): - self._quantities = [Quantity( - 'parameter', r'(\w+ *= *.+)', - repeats=True, str_operation=lambda x: [v.strip() for v in x.split('=')] - )] + self._quantities = [ + Quantity( + "parameter", + r"(\w+ *= *.+)", + repeats=True, + str_operation=lambda x: [v.strip() for v in x.split("=")], + ) + ] def get_parameters(self): - return {v[0].lower(): v[1] for v in self.get('parameter', [])} + return {v[0].lower(): v[1] for v in self.get("parameter", [])} class MainfileParser(TextParser): def init_quantities(self): - calc_quantities = [ Quantity( - 'energy', - r'(?:Contributions to the Energy|Energies)\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'contribution', - r'(U_\w+.+?\( *atom = +1 *\)[\s\S]+?U_\w+/atom.+)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('type', r'U_(\w+)', dtype=str), - Quantity( - 'atomic', - rf'atom += +\d+ \).+?({re_f})', - repeats=True, dtype=np.float64 + "energy", + r"(?:Contributions to the Energy|Energies)\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "contribution", + r"(U_\w+.+?\( *atom = +1 *\)[\s\S]+?U_\w+/atom.+)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity("type", r"U_(\w+)", dtype=str), + Quantity( + "atomic", + rf"atom += +\d+ \).+?({re_f})", + repeats=True, + dtype=np.float64, + ), + Quantity( + "total", + rf"U_\w+/atom.+?({re_f})", + dtype=np.float64, + ), + ] ), - Quantity('total', rf'U_\w+/atom.+?({re_f})', dtype=np.float64) - ]) - ) - ]) + ) + ] + ), ), Quantity( - 'forces', - r'(?:Contributions to the Forces|Forces \(Fx,Fy,Fz,x,y,z\))\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'contribution', - r'(FBOP \(\w+ *\) +1 +[\s\S]+?)\-{50}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('type', r'FBOP \((\w+)', dtype=str), - Quantity( - 'atomic', - rf'\) +\d+ +({re_f} +{re_f} +{re_f})', - repeats=True, dtype=np.dtype(np.float64) - ) - ]) - ) - ]) + "forces", + r"(?:Contributions to the Forces|Forces \(Fx,Fy,Fz,x,y,z\))\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "contribution", + r"(FBOP \(\w+ *\) +1 +[\s\S]+?)\-{50}", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity("type", r"FBOP \((\w+)", dtype=str), + Quantity( + "atomic", + rf"\) +\d+ +({re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + ] + ), + ) + ] + ), ), Quantity( - 'stress', - r'(?:Contributions for the stresses|stresses \(11,22,33,23,13,12\))\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'total', - rf'sum\(stress\)/volume +({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64) - ), - Quantity( - 'contribution', - r'(stress \(\w+ *\) +1 +[\s\S]+?)\-{50}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('type', r'stress \((\w+)', dtype=str), - Quantity( - 'atomic', - rf'\) +\d+ +({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})', - repeats=True, dtype=np.dtype(np.float64) - ) - ]) - ) - ]) + "stress", + r"(?:Contributions for the stresses|stresses \(11,22,33,23,13,12\))\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "total", + rf"sum\(stress\)/volume +({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + ), + Quantity( + "contribution", + r"(stress \(\w+ *\) +1 +[\s\S]+?)\-{50}", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity("type", r"stress \((\w+)", dtype=str), + Quantity( + "atomic", + rf"\) +\d+ +({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + ] + ), + ), + ] + ), ), - Quantity('energy_fermi', rf'E_Fermi +.+?({re_f})', dtype=np.float64), + Quantity("energy_fermi", rf"E_Fermi +.+?({re_f})", dtype=np.float64), Quantity( - 'charges', - r'(?:Charge terms|Charges)\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'n_electrons', - rf'Nelec \( atom = +\d+ \).+?({re_f})', - dtype=np.float64, repeats=True - ), - Quantity( - 'charge', - rf'Charge \( atom = +\d+ \).+?({re_f})', - dtype=np.float64, repeats=True - ) - ]) + "charges", + r"(?:Charge terms|Charges)\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "n_electrons", + rf"Nelec \( atom = +\d+ \).+?({re_f})", + dtype=np.float64, + repeats=True, + ), + Quantity( + "charge", + rf"Charge \( atom = +\d+ \).+?({re_f})", + dtype=np.float64, + repeats=True, + ), + ] + ), ), Quantity( - 'magnetic_moments', - r'Magnetic moments\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'mag_mom', - rf'Mag_mom \( atom = +(\d+), orbital = +((?:s|p|d)) \) +({re_f}) +({re_f}) +({re_f})', - repeats=True - ) - ]) + "magnetic_moments", + r"Magnetic moments\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "mag_mom", + rf"Mag_mom \( atom = +(\d+), orbital = +((?:s|p|d)) \) +({re_f}) +({re_f}) +({re_f})", + repeats=True, + ) + ] + ), ), Quantity( - 'onsite_levels', - r'Onsite levels\s+\=+([\s\S]+?)\={50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'energy', - rf'E((?:s|p|d)) \( atom = +(\d+), spin = \d+ \) +({re_f})', - repeats=True - ) - ]) - ) + "onsite_levels", + r"Onsite levels\s+\=+([\s\S]+?)\={50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "energy", + rf"E((?:s|p|d)) \( atom = +(\d+), spin = \d+ \) +({re_f})", + repeats=True, + ) + ] + ), + ), ] self._quantities = [ - Quantity('program_version', r'BOPfox \(v (\S+)\) (rev\. \d+)', dtype=str, flatten=False), Quantity( - 'simulation', - r'(\w+ +\: +\S+ +\( *\S+ *\)\s+[\s\S]+?)init\: N\(', - sub_parser=TextParser(quantities=[ - Quantity( - 'parameter', - r'(\w+) +\: +(\S+) +\( *(\S+) *\)\s+', - repeats=True - ) - ]) + "program_version", + r"BOPfox \(v (\S+)\) (rev\. \d+)", + dtype=str, + flatten=False, ), Quantity( - 'lattice_vectors', - rf'cell\(\:,\d+\) \: +({re_f}) +({re_f}) +({re_f})', - repeats=True, dtype=np.dtype(np.float64) + "simulation", + r"(\w+ +\: +\S+ +\( *\S+ *\)\s+[\s\S]+?)init\: N\(", + sub_parser=TextParser( + quantities=[ + Quantity( + "parameter", + r"(\w+) +\: +(\S+) +\( *(\S+) *\)\s+", + repeats=True, + ) + ] + ), ), Quantity( - 'label_position', - rf'init\: atom/type/pos/fix\: +\d+ +([A-Z]\S*) +({re_f}) +({re_f}) +({re_f}) +([ FT]+)', - repeats=True + "lattice_vectors", + rf"cell\(\:,\d+\) \: +({re_f}) +({re_f}) +({re_f})", + repeats=True, + dtype=np.dtype(np.float64), ), Quantity( - 'n_atoms', - r'Atoms in cell/cluster\: +(\d+) +(\d+)', dtype=np.dtype(np.int32) + "label_position", + rf"init\: atom/type/pos/fix\: +\d+ +([A-Z]\S*) +({re_f}) +({re_f}) +({re_f}) +([ FT]+)", + repeats=True, ), Quantity( - 'relaxation', - r'(relax\: [\s\S]+?(?:relax\: cycle finished|\Z))', - sub_parser=TextParser(quantities=[ - Quantity( - 'cycle', - rf'( \d+ +{re_f} +{re_f} +\d+ *{re_n}[\s\S]+?)(?:relax\: |\Z)', - repeats=True, sub_parser=TextParser(quantities=calc_quantities) - ) - ]) + "n_atoms", + r"Atoms in cell/cluster\: +(\d+) +(\d+)", + dtype=np.dtype(np.int32), ), - Quantity('md_column_names', r'col \d+\: (.+?) +\[', repeats=True) + Quantity( + "relaxation", + r"(relax\: [\s\S]+?(?:relax\: cycle finished|\Z))", + sub_parser=TextParser( + quantities=[ + Quantity( + "cycle", + rf"( \d+ +{re_f} +{re_f} +\d+ *{re_n}[\s\S]+?)(?:relax\: |\Z)", + repeats=True, + sub_parser=TextParser(quantities=calc_quantities), + ) + ] + ), + ), + Quantity("md_column_names", r"col \d+\: (.+?) +\[", repeats=True), ] + calc_quantities def get_simulation_parameters(self): - return {v[0]: (v[2] if v[2] != 'none' else None) if v[1] == '--' else v[1] for v in self.get('simulation', {}).get('parameter', [])} + return { + v[0]: (v[2] if v[2] != "none" else None) if v[1] == "--" else v[1] + for v in self.get("simulation", {}).get("parameter", []) + } class BOPfoxParser(MDParser): @@ -322,43 +405,45 @@ def __init__(self): self.dat_parser = DataTextParser() self.infox_parser = InfoxParser() self._metainfo_map = { - 'binding': 'total', 'coulomb': 'electrostatic', 'ionic': 'nuclear_repulsion', - 'total': 'total' + "binding": "total", + "coulomb": "electrostatic", + "ionic": "nuclear_repulsion", + "total": "total", } super().__init__() - def init_parser(self): + def write_to_archive(self) -> None: self.mainfile_parser.logger = self.logger - self.mainfile_parser.mainfile = self.filepath + self.mainfile_parser.mainfile = self.mainfile self.strucbx_parser.logger = self.logger self.xyz_parser.logger = self.logger self.modelsbx_parser.logger = self.logger self.dat_parser.logger = self.logger self.infox_parser.logger = self.logger - - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.maindir = os.path.dirname(self.filepath) - self.logger = logger if logger is not None else logging - - self.init_parser() + self.maindir = os.path.dirname(self.mainfile) sec_run = Run() - archive.run.append(sec_run) - sec_run.program = Program(name='BOPfox', version=self.mainfile_parser.get('program_version')) + self.archive.run.append(sec_run) + sec_run.program = Program( + name="BOPfox", version=self.mainfile_parser.get("program_version") + ) sec_method = Method() sec_run.method.append(sec_method) parameters = self.mainfile_parser.get_simulation_parameters() sec_method.x_bopfox_simulation_parameters = parameters # force field parameters - self.modelsbx_parser.mainfile = os.path.join(self.maindir, parameters.get('modelfile', 'models.bx')) - for model in self.modelsbx_parser.get('model', []): + self.modelsbx_parser.mainfile = os.path.join( + self.maindir, parameters.get("modelfile", "models.bx") + ) + for model in self.modelsbx_parser.get("model", []): # pick out only the model indicated in parameters - if model.name == parameters.get('model'): + if model.name == parameters.get("model"): # bop uses a tight-binding model - tb = model.parameters.get('version', 'bop').lower() in ['bop', 'tight-binding'] + tb = model.parameters.get("version", "bop").lower() in [ + "bop", + "tight-binding", + ] if tb: sec_model = xTB() sec_method.tb = TB(xtb=sec_model) @@ -369,56 +454,86 @@ def parse(self, filepath, archive, logger): sec_model.name = model.name sec_model.x_bopfox_parameters = model.parameters # interaction between each bond pair - for bond in model.get('bond', []): + for bond in model.get("bond", []): # functional terms for each contribution for key, val in bond.items(): key = key.lower() if tb: - if key.endswith('sigma') or key.endswith('pi') or key.endswith('delta'): - sec_model.hamiltonian.append(Interaction( - name=key, functional_form=val[0], parameters=val[1:], - atom_labels=bond.get('bond'), x_bopfox_valence=bond.get('valence'), - x_bopfox_cutoff=bond.get('rcut'), x_bopfox_dcutoff=bond.get('dcut'), - x_bopfox_chargetransfer=bond.get('chargetransfer') - )) - if key.endswith('overlap'): - sec_model.overlap.append(Interaction( - name=key, functional_form=val[0], parameters=val[1:], - atom_labels=bond.get('bond'), x_bopfox_valence=bond.get('valence'), - x_bopfox_cutoff=bond.get('rcut'), x_bopfox_dcutoff=bond.get('dcut') - )) - elif key.startswith('rep'): - sec_model.repulsion.append(Interaction( - name=key, functional_form=val[0], parameters=val[1:], - atom_labels=bond.get('bond'), x_bopfox_cutoff=bond.get('r2cut'), - x_bopfox_dcutoff=bond.get('d2cut') - )) + if ( + key.endswith("sigma") + or key.endswith("pi") + or key.endswith("delta") + ): + sec_model.hamiltonian.append( + Interaction( + name=key, + functional_form=val[0], + parameters=val[1:], + atom_labels=bond.get("bond"), + x_bopfox_valence=bond.get("valence"), + x_bopfox_cutoff=bond.get("rcut"), + x_bopfox_dcutoff=bond.get("dcut"), + x_bopfox_chargetransfer=bond.get( + "chargetransfer" + ), + ) + ) + if key.endswith("overlap"): + sec_model.overlap.append( + Interaction( + name=key, + functional_form=val[0], + parameters=val[1:], + atom_labels=bond.get("bond"), + x_bopfox_valence=bond.get("valence"), + x_bopfox_cutoff=bond.get("rcut"), + x_bopfox_dcutoff=bond.get("dcut"), + ) + ) + elif key.startswith("rep"): + sec_model.repulsion.append( + Interaction( + name=key, + functional_form=val[0], + parameters=val[1:], + atom_labels=bond.get("bond"), + x_bopfox_cutoff=bond.get("r2cut"), + x_bopfox_dcutoff=bond.get("d2cut"), + ) + ) else: - if key.startswith('rep'): - sec_model.contributions.append(Interaction( - name=key, functional_form=val[0], parameters=val[1:], - atom_labels=bond.get('bond'), x_bopfox_cutoff=bond.get('r2cut'), - x_bopfox_dcutoff=bond.get('d2cut') - )) + if key.startswith("rep"): + sec_model.contributions.append( + Interaction( + name=key, + functional_form=val[0], + parameters=val[1:], + atom_labels=bond.get("bond"), + x_bopfox_cutoff=bond.get("r2cut"), + x_bopfox_dcutoff=bond.get("d2cut"), + ) + ) def parse_system(source, target=None): if source is None: return - label_position = source.get('label_position') - lattice_vectors = source.get('lattice_vectors') + label_position = source.get("label_position") + lattice_vectors = source.get("lattice_vectors") if lattice_vectors is not None: - lattice_vectors = np.array(lattice_vectors) * source.get('lattice_constant', 1.0) + lattice_vectors = np.array(lattice_vectors) * source.get( + "lattice_constant", 1.0 + ) if label_position is not None: labels = [v[0] for v in label_position] positions = np.array([v[1:4] for v in label_position]) - if source.get('coordinate_type', '').lower().startswith('d'): + if source.get("coordinate_type", "").lower().startswith("d"): # positions are scaled by lattice vectors if lattice_vectors is not None: positions = np.dot(positions, lattice_vectors) else: - labels = source.get('labels') - positions = source.get('positions') + labels = source.get("labels") + positions = source.get("positions") if positions is None: return @@ -436,15 +551,15 @@ def parse_calculation(source, target=None): sec_run.calculation.append(sec_calc) if target is None else target # energy - n_atoms = self.mainfile_parser.get('n_atoms', [1, 1])[0] - if source.get('energy') is not None: + n_atoms = self.mainfile_parser.get("n_atoms", [1, 1])[0] + if source.get("energy") is not None: sec_energy = Energy() sec_calc.energy = sec_energy - for contribution in source.energy.get('contribution', []): + for contribution in source.energy.get("contribution", []): name = self._metainfo_map.get(contribution.type) energy_entry = EnergyEntry( value=contribution.total * ureg.eV * n_atoms, - values_per_atom=contribution.atomic * ureg.eV + values_per_atom=contribution.atomic * ureg.eV, ) if name is None: energy_entry.kind = contribution.type @@ -455,12 +570,14 @@ def parse_calculation(source, target=None): sec_energy.fermi = source.energy_fermi * ureg.eV # forces - if source.get('forces') is not None: + if source.get("forces") is not None: sec_forces = Forces() sec_calc.forces = sec_forces - for contribution in source.forces.get('contribution', []): + for contribution in source.forces.get("contribution", []): name = self._metainfo_map.get(contribution.type) - forces_entry = ForcesEntry(value=contribution.atomic * ureg.eV / ureg.angstrom) + forces_entry = ForcesEntry( + value=contribution.atomic * ureg.eV / ureg.angstrom + ) if name is None: forces_entry.kind = contribution.type sec_forces.contributions.append(forces_entry) @@ -478,84 +595,114 @@ def symmetrize(stress): return symmetrized # stress - if source.get('stress') is not None: + if source.get("stress") is not None: sec_stress = Stress() sec_calc.stress = sec_stress - for contribution in source.stress.get('contribution', []): + for contribution in source.stress.get("contribution", []): name = self._metainfo_map.get(contribution.type) - stress_entry = StressEntry(values_per_atom=[symmetrize( - atomic) for atomic in contribution.atomic] * ureg.eV / ureg.angstrom ** 3) + stress_entry = StressEntry( + values_per_atom=[ + symmetrize(atomic) for atomic in contribution.atomic + ] + * ureg.eV + / ureg.angstrom**3 + ) if name is None: stress_entry.kind = contribution.type sec_stress.contributions.append(stress_entry) else: - if name == 'total': - stress_entry.value = symmetrize(source.stress.total) * ureg.eV / ureg.angstrom ** 3 + if name == "total": + stress_entry.value = ( + symmetrize(source.stress.total) + * ureg.eV + / ureg.angstrom**3 + ) setattr(sec_stress, name, stress_entry) # charges - if source.get('charges') is not None: + if source.get("charges") is not None: sec_charges = Charges() sec_calc.charges.append(sec_charges) sec_charges.n_electrons = source.charges.n_electrons sec_charges.value = source.charges.charge * ureg.elementary_charge # magnetic moments if source.magnetic_moments is not None: - for mag_mom in source.magnetic_moments.get('mag_mom', []): - sec_charges.orbital_projected.append(ChargesValue( - atom_index=mag_mom[0] - 1, orbital=mag_mom[1], spin_z=mag_mom[2] - )) + for mag_mom in source.magnetic_moments.get("mag_mom", []): + sec_charges.orbital_projected.append( + ChargesValue( + atom_index=mag_mom[0] - 1, + orbital=mag_mom[1], + spin_z=mag_mom[2], + ) + ) # onsite levels - if source.get('onsite_levels') is not None: + if source.get("onsite_levels") is not None: sec_onsite = x_bopfox_onsite_levels() sec_calc.x_bopfox_onsite_levels.append(sec_onsite) - for onsite in source.onsite_levels.get('energy', []): - sec_onsite.orbital_projected.append(x_bopfox_onsite_levels_value( - orbital=onsite[0], atom_index=onsite[1] - 1, value=onsite[2] - )) + for onsite in source.onsite_levels.get("energy", []): + sec_onsite.orbital_projected.append( + x_bopfox_onsite_levels_value( + orbital=onsite[0], atom_index=onsite[1] - 1, value=onsite[2] + ) + ) # energies and forces from trajectory file - if source.get('energies_total') is not None: - sec_calc.energy = Energy(total=EnergyEntry( - values_per_atom=source.get('energies_total') * ureg.eV, - value=sum(source.get('energies_total')) * ureg.eV - )) - if source.get('forces_total') is not None: - sec_calc.forces = Forces(total=ForcesEntry( - value=source.get('forces_total') * ureg.eV / ureg.angstrom - )) + if source.get("energies_total") is not None: + sec_calc.energy = Energy( + total=EnergyEntry( + values_per_atom=source.get("energies_total") * ureg.eV, + value=sum(source.get("energies_total")) * ureg.eV, + ) + ) + if source.get("forces_total") is not None: + sec_calc.forces = Forces( + total=ForcesEntry( + value=source.get("forces_total") * ureg.eV / ureg.angstrom + ) + ) # total energy from struc.log.dat - if source.get('energy_total') is not None: - sec_calc.energy = Energy(total=EnergyEntry( - value=source.get('energy_total') * ureg.eV - )) + if source.get("energy_total") is not None: + sec_calc.energy = Energy( + total=EnergyEntry(value=source.get("energy_total") * ureg.eV) + ) return sec_calc - task = parameters.get('task') + task = parameters.get("task") # read the strucfile from infox because string may be truncated in mainfile - self.infox_parser.mainfile = os.path.join(self.maindir, 'infox.bx') - struc_basename = self.infox_parser.get_parameters().get('strucfile', '').rstrip('.bx') + self.infox_parser.mainfile = os.path.join(self.maindir, "infox.bx") + struc_basename = ( + self.infox_parser.get_parameters().get("strucfile", "").rstrip(".bx") + ) # initial structure - self.strucbx_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.bx') + self.strucbx_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.bx" + ) sec_system = parse_system(self.strucbx_parser) # initial single point calculation sec_calc = parse_calculation(self.mainfile_parser) sec_calc.system_ref = sec_system workflow = None - if task in ['energy', 'force']: - archive.workflow2 = SinglePoint() + if task in ["energy", "force"]: + self.archive.workflow2 = SinglePoint() - elif task == 'relax': + elif task == "relax": # relaxation trajectory from struc.RX.xyz - self.xyz_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.RX.xyz') - frames = {int(frame.get('step')): frame for frame in self.xyz_parser.get('frame', [])} - self.dat_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.log.dat') - for n, cycle in enumerate(self.mainfile_parser.relaxation.get('cycle', [])): + self.xyz_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.RX.xyz" + ) + frames = { + int(frame.get("step")): frame + for frame in self.xyz_parser.get("frame", []) + } + self.dat_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.log.dat" + ) + for n, cycle in enumerate(self.mainfile_parser.relaxation.get("cycle", [])): sec_calc = parse_calculation(cycle) frame = frames.get(n + 1, dict(energy_total=self.dat_parser.data[n][1])) if sec_calc.energy is None: @@ -568,63 +715,94 @@ def symmetrize(stress): sec_calc.system_ref = sec_system # read final structure from struc.final.bx - self.strucbx_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.final.bx') + self.strucbx_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.final.bx" + ) sec_calc.system_ref = parse_system(self.strucbx_parser, sec_calc.system_ref) workflow = GeometryOptimization(method=GeometryOptimizationMethod()) - workflow.method.convergence_tolerance_energy_difference = parameters.get('rxeconv', 0) * ureg.eV - workflow.method.convergence_tolerance_force_maximum = parameters.get('rxfconv', 0) * ureg.eV / ureg.angstrom - archive.workflow2 = workflow + workflow.method.convergence_tolerance_energy_difference = ( + parameters.get("rxeconv", 0) * ureg.eV + ) + workflow.method.convergence_tolerance_force_maximum = ( + parameters.get("rxfconv", 0) * ureg.eV / ureg.angstrom + ) + self.archive.workflow2 = workflow - elif task == 'md': + elif task == "md": # md trajectory from struc.MD.xyz - self.xyz_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.MD.xyz') + self.xyz_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.MD.xyz" + ) # thermodynamic properties from struc.erg.dat # TODO determine if the column names are arbitrary - self.dat_parser.mainfile = os.path.join(self.maindir, f'{struc_basename}.erg.dat') + self.dat_parser.mainfile = os.path.join( + self.maindir, f"{struc_basename}.erg.dat" + ) - traj_steps = [int(frame.get('step', 0)) for frame in self.xyz_parser.get('frame', [])] - time_step = parameters.get('mdtimestep', 1.0) + traj_steps = [ + int(frame.get("step", 0)) for frame in self.xyz_parser.get("frame", []) + ] + time_step = parameters.get("mdtimestep", 1.0) thermo_steps = [int(d[0] / time_step) for d in self.dat_parser.data] - n_atoms = self.mainfile_parser.get('n_atoms', [1, 1])[0] + n_atoms = self.mainfile_parser.get("n_atoms", [1, 1])[0] self.n_atoms = n_atoms self.trajectory_steps = traj_steps self.thermodynamics_steps = thermo_steps - if (lattice_vectors := self.strucbx_parser.get('lattice_vectors')) is None: + if (lattice_vectors := self.strucbx_parser.get("lattice_vectors")) is None: lattice_vectors = lattice_vectors * ureg.angstrom for step in self.trajectory_steps: frame = self.xyz_parser.frame[traj_steps.index(step)] - labels = frame.get('labels') - positions = frame.get('positions') + labels = frame.get("labels") + positions = frame.get("positions") if positions is None or labels is None: continue - self.parse_trajectory_step(dict(atoms=dict( - labels=labels, positions=positions * ureg.angstrom, - lattice_vectors=lattice_vectors))) + self.parse_trajectory_step( + dict( + atoms=dict( + labels=labels, + positions=positions * ureg.angstrom, + lattice_vectors=lattice_vectors, + ) + ) + ) for n, step in enumerate(self.thermodynamics_steps): # md does not do an initial single point calculation so override first calc data = self.dat_parser.data[n] thermo_data = dict( - time_physical=data[0] * ureg.fs, time_step=step, energy=dict( + time_physical=data[0] * ureg.fs, + time_step=step, + energy=dict( total=dict( - value=data[1] * n_atoms * ureg.eV, potential=data[2] * n_atoms * ureg.eV, - kinetic=data[3] * n_atoms * ureg.eV)), - temperature=data[5] * ureg.K, pressure=data[7] * ureg.MPa) + value=data[1] * n_atoms * ureg.eV, + potential=data[2] * n_atoms * ureg.eV, + kinetic=data[3] * n_atoms * ureg.eV, + ) + ), + temperature=data[5] * ureg.K, + pressure=data[7] * ureg.MPa, + ) if n == 0: - self.parse_section(thermo_data, archive.run[-1].calculation[0]) + self.parse_section(thermo_data, self.archive.run[-1].calculation[0]) else: self.parse_thermodynamics_step(thermo_data) - integrator_map = {'velocity-verlet': 'velocity_verlet'} + integrator_map = {"velocity-verlet": "velocity_verlet"} ensemble = None - if parameters.get('mdthermostat'): - ensemble = 'NVT' - elif parameters.get('mdbarostat'): - ensemble = 'NVE' - self.parse_md_workflow(dict(method=dict( - integration_timestep=time_step * ureg.fs, - integrator_type=integrator_map.get(parameters.get('mdkernel')), - n_steps=parameters.get('mdsteps'), thermodynamic_ensemble=ensemble))) + if parameters.get("mdthermostat"): + ensemble = "NVT" + elif parameters.get("mdbarostat"): + ensemble = "NVE" + self.parse_md_workflow( + dict( + method=dict( + integration_timestep=time_step * ureg.fs, + integrator_type=integrator_map.get(parameters.get("mdkernel")), + n_steps=parameters.get("mdsteps"), + thermodynamic_ensemble=ensemble, + ) + ) + ) diff --git a/atomisticparsers/dftbplus/parser.py b/atomisticparsers/dftbplus/parser.py index 66eac814..37603f83 100644 --- a/atomisticparsers/dftbplus/parser.py +++ b/atomisticparsers/dftbplus/parser.py @@ -23,129 +23,169 @@ import numpy as np from nomad.units import ureg -from nomad.parsing.file_parser import TextParser, Quantity, FileParser -from runschema.run import ( - Run, Program -) -from runschema.method import ( - Method, TB -) -from runschema.system import ( - System, Atoms -) +from nomad.parsing.file_parser import TextParser, Quantity, FileParser, Parser +from runschema.run import Run, Program +from runschema.method import Method, TB +from runschema.system import System, Atoms from runschema.calculation import ( - Calculation, Energy, EnergyEntry, ScfIteration, BandEnergies, - Multipoles, MultipolesEntry, Forces, ForcesEntry + Calculation, + Energy, + EnergyEntry, + ScfIteration, + BandEnergies, + Multipoles, + MultipolesEntry, + Forces, + ForcesEntry, ) -re_n = r'[\n\r]' -re_f = r'[-+]?\d+\.\d*(?:[DdEe][-+]\d+)?' +re_n = r"[\n\r]" +re_f = r"[-+]?\d+\.\d*(?:[DdEe][-+]\d+)?" class DetailedParser(TextParser): def init_quantities(self): def to_kpoint(val_in): - val = np.transpose(np.array( - [line.split()[1:] for line in val_in.strip().splitlines()], dtype=np.float64)) + val = np.transpose( + np.array( + [line.split()[1:] for line in val_in.strip().splitlines()], + dtype=np.float64, + ) + ) eigs = np.array([val[i] for i in range(len(val)) if i % 2 == 0]) occs = np.array([val[i] for i in range(len(val)) if i % 2 == 0]) return eigs, occs self._quantities = [ Quantity( - 'coordinates', - r'Coordinates of moved atoms \(au\):\s+([\d\.\-\+\s]+)', - dtype=np.dtype(np.float64), repeats=True + "coordinates", + r"Coordinates of moved atoms \(au\):\s+([\d\.\-\+\s]+)", + dtype=np.dtype(np.float64), + repeats=True, ), Quantity( - 'charges', - r' Net atomic charges \(e\)\s+Atom +Net charge\s+([\d\.\-\+Ee\s]+)', - dtype=np.dtype(np.float64), repeats=True + "charges", + r" Net atomic charges \(e\)\s+Atom +Net charge\s+([\d\.\-\+Ee\s]+)", + dtype=np.dtype(np.float64), + repeats=True, ), Quantity( - 'eigenvalues', - r'Eigenvalues \/H\s+([\d\.\-\+\s]+)', - dtype=np.dtype(np.float64), repeats=True + "eigenvalues", + r"Eigenvalues \/H\s+([\d\.\-\+\s]+)", + dtype=np.dtype(np.float64), + repeats=True, ), Quantity( - 'occupations', - r'Fillings\s+([\d\.\-\+\s]+)', - dtype=np.dtype(np.float64), repeats=True + "occupations", + r"Fillings\s+([\d\.\-\+\s]+)", + dtype=np.dtype(np.float64), + repeats=True, ), Quantity( - 'eigenvalues_occupations', - rf'Eigenvalues \(H\) and fillings \(e\)\s+([\s\S]+?)Eigenvalues', - sub_parser=TextParser(quantities=[ - Quantity( - 'kpoint', - r'K-points* \d+\:*\d*\s+([\d\.\-\+\s]+)', - str_operation=to_kpoint, repeats=True - ) - ]) + "eigenvalues_occupations", + rf"Eigenvalues \(H\) and fillings \(e\)\s+([\s\S]+?)Eigenvalues", + sub_parser=TextParser( + quantities=[ + Quantity( + "kpoint", + r"K-points* \d+\:*\d*\s+([\d\.\-\+\s]+)", + str_operation=to_kpoint, + repeats=True, + ) + ] + ), ), Quantity( - 'fermi_level', - rf'Fermi level: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "fermi_level", + rf"Fermi level: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_band', - rf'Band energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_band", + rf"Band energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_ts', - rf'TS: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_ts", + rf"TS: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_band_free', - rf'Band free energy \(E\-TS\): +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_band_free", + rf"Band free energy \(E\-TS\): +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_band_t0', - rf'Extrapolated E\(0K\): +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_band_t0", + rf"Extrapolated E\(0K\): +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_sum_eigenvalues', - rf'Energy H0: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_sum_eigenvalues", + rf"Energy H0: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_scc', - rf'Energy SCC: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_scc", + rf"Energy SCC: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_electronic', - rf'Total Electronic energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_electronic", + rf"Total Electronic energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_nuclear_repulsion', - rf'Repulsive energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_nuclear_repulsion", + rf"Repulsive energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_dispersion', - rf'Dispersion energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_dispersion", + rf"Dispersion energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_total', - rf'Total energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_total", + rf"Total energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'energy_x_dftbp_total_mermin', - rf'Total Mermin free energy: +({re_f}) H', dtype=np.float64, unit=ureg.hartree + "energy_x_dftbp_total_mermin", + rf"Total Mermin free energy: +({re_f}) H", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'pressure', - rf'Pressure: +({re_f}) au', dtype=np.float64, unit=ureg.hartree / ureg.bohr ** 3 + "pressure", + rf"Pressure: +({re_f}) au", + dtype=np.float64, + unit=ureg.hartree / ureg.bohr**3, ), Quantity( - 'forces', - r'Total Forces\s+([\d\.\-\+\sE]+)', - dtype=np.dtype(np.float64), repeats=True + "forces", + r"Total Forces\s+([\d\.\-\+\sE]+)", + dtype=np.dtype(np.float64), + repeats=True, ), Quantity( - 'dipole', - rf'Dipole moment +: +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), repeats=True - ) + "dipole", + rf"Dipole moment +: +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + repeats=True, + ), ] @@ -157,7 +197,7 @@ def parse(self, key=None): if geometry is None: return - input_file = re.search(r'\<\<\< +\"(\S+)\"', geometry) + input_file = re.search(r"\<\<\< +\"(\S+)\"", geometry) if input_file: with open(os.path.join(self.maindir, input_file.group(1))) as f: geometry = f.read() @@ -172,24 +212,31 @@ def parse(self, key=None): symbols.append(elements[int(line[1]) - 1]) positions.append(line[2:5]) lattice_vectors = None - if lattice_type in ['S', 'F']: + if lattice_type in ["S", "F"]: # line immediately after coordinates is origin - lattice_vectors = np.array([ - v.split() for v in geometry[n_atoms + 3: n_atoms + 6]], dtype=np.float64) * ureg.angstrom + lattice_vectors = ( + np.array( + [v.split() for v in geometry[n_atoms + 3 : n_atoms + 6]], + dtype=np.float64, + ) + * ureg.angstrom + ) positions = np.array(positions, dtype=np.float64) * ureg.angstrom - if lattice_type == 'F': + if lattice_type == "F": # fractional coordinates positions = np.dot(positions, lattice_vectors) - self._results['symbols'] = symbols - self._results['positions'] = positions - self._results['lattice_vectors'] = lattice_vectors + self._results["symbols"] = symbols + self._results["positions"] = positions + self._results["lattice_vectors"] = lattice_vectors class HSDParser(FileParser): def __init__(self): super().__init__() - self._re_section_value = re.compile(r'(?: *([A-Z]\w+) *= *(\w*) *\{|([A-Z][\w\[\]]+) *= *([^}]+)|(}))') + self._re_section_value = re.compile( + r"(?: *([A-Z]\w+) *= *(\w*) *\{|([A-Z][\w\[\]]+) *= *([^}]+)|(}))" + ) self.gen_parser = GenParser() @property @@ -202,23 +249,23 @@ def parse(self, key=None): self._results = dict(data=dict()) line = self.hsd.readline() - current_section = self._results['data'] + current_section = self._results["data"] previous_sections = [] # value as a whole block - block = '' + block = "" while line: matches = self._re_section_value.findall(line) for match in matches: section, sub_section, key, value, close = match if section: current_section[section] = dict() - block = '' + block = "" previous_sections.append(current_section) current_section = current_section[section] if sub_section: - current_section['_function'] = sub_section + current_section["_function"] = sub_section if key and value: - value = value.strip().replace('"', '').replace('\'', '') + value = value.strip().replace('"', "").replace("'", "") try: value = float(value) except Exception: @@ -226,7 +273,7 @@ def parse(self, key=None): current_section[key] = value if close: if block: - current_section['_block'] = block + current_section["_block"] = block current_section = previous_sections[-1] previous_sections.pop(-1) if not matches: @@ -235,182 +282,216 @@ def parse(self, key=None): self.gen_parser.mainfile = self.mainfile self.gen_parser._mainfile_obj = io.StringIO( - self._results.get('data', {}).get('Geometry', {}).get('_block', '')) + self._results.get("data", {}).get("Geometry", {}).get("_block", "") + ) self.gen_parser.parse() - self._results['symbols'] = self.gen_parser.symbols - self._results['positions'] = self.gen_parser.positions - self._results['lattice_vectors'] = self.gen_parser.lattice_vectors + self._results["symbols"] = self.gen_parser.symbols + self._results["positions"] = self.gen_parser.positions + self._results["lattice_vectors"] = self.gen_parser.lattice_vectors class OutParser(TextParser): def init_quantities(self): self._quantities = [ - Quantity('program_version', r'\| DFTB\+ +(.+)', flatten=False, dtype=str), - Quantity('input_file', r'Interpreting input file \'(\S+)\'', dtype=str), - Quantity('processed_input_file', r'Processed input in HSD format written to \'(\S+)\'', dtype=str), - Quantity('parser_version', r'Parser version: +(.+)', flatten=False, dtype=str), + Quantity("program_version", r"\| DFTB\+ +(.+)", flatten=False, dtype=str), + Quantity("input_file", r"Interpreting input file \'(\S+)\'", dtype=str), Quantity( - 'sk_files', - r'Reading SK-files:\s+([\s\S]+?)Done', - str_operation=lambda x: [v.strip() for v in x.strip().splitlines()] + "processed_input_file", + r"Processed input in HSD format written to \'(\S+)\'", + dtype=str, ), Quantity( - 'input_parameters', - r'Starting initialization\.\.\.\s+\-+\s+([\s\S]+?)\-{50}', - sub_parser=TextParser(quantities=[ - Quantity( - 'key_val', rf'([A-Z].+?\: +[\w\-\+\. \(\)\:]+){re_n}', - str_operation=lambda x: [v.strip() for v in x.strip().split(':', 1)], - repeats=True - ), - Quantity( - 'kpoints_weights', - rf'K\-points and weights: +([\s\S]+?){re_n} *{re_n}', - str_operation=lambda x: [v.strip().split()[1:5] for v in x.strip().splitlines()], - dtype=np.dtype(np.float64) - ) - ]) + "parser_version", r"Parser version: +(.+)", flatten=False, dtype=str ), Quantity( - 'step', - r'(Geometry step\: +\d+[\s\S]+?)(?:\-{50}|\Z)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'scf', - rf'\d+ +({re_f} +{re_f} +{re_f})', - repeats=True, dtype=np.dtype(np.float64) - ), - Quantity( - 'energy_total', - rf'Total Energy: +({re_f}) +H', - dtype=np.float64, unit=ureg.hartree - ), - Quantity( - 'energy_total_t0', - rf'Extrapolated to 0: +({re_f}) +H', - dtype=np.float64, unit=ureg.hartree - ), - Quantity( - 'energy_x_dftbp_total_mermin', - rf'Total Mermin free energy: +({re_f}) +H', - dtype=np.float64, unit=ureg.hartree - ), - Quantity( - 'pressure', - rf'Pressure: +({re_f}) +au', - dtype=np.float64, unit=ureg.hartree / ureg.bohr ** 3 - ), - Quantity( - 'maximum_force', - rf'Maximal force component: +({re_f})', - dtype=np.float64, unit=ureg.hartree / ureg.bohr - ) - - ]) - ) + "sk_files", + r"Reading SK-files:\s+([\s\S]+?)Done", + str_operation=lambda x: [v.strip() for v in x.strip().splitlines()], + ), + Quantity( + "input_parameters", + r"Starting initialization\.\.\.\s+\-+\s+([\s\S]+?)\-{50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "key_val", + rf"([A-Z].+?\: +[\w\-\+\. \(\)\:]+){re_n}", + str_operation=lambda x: [ + v.strip() for v in x.strip().split(":", 1) + ], + repeats=True, + ), + Quantity( + "kpoints_weights", + rf"K\-points and weights: +([\s\S]+?){re_n} *{re_n}", + str_operation=lambda x: [ + v.strip().split()[1:5] for v in x.strip().splitlines() + ], + dtype=np.dtype(np.float64), + ), + ] + ), + ), + Quantity( + "step", + r"(Geometry step\: +\d+[\s\S]+?)(?:\-{50}|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "scf", + rf"\d+ +({re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + Quantity( + "energy_total", + rf"Total Energy: +({re_f}) +H", + dtype=np.float64, + unit=ureg.hartree, + ), + Quantity( + "energy_total_t0", + rf"Extrapolated to 0: +({re_f}) +H", + dtype=np.float64, + unit=ureg.hartree, + ), + Quantity( + "energy_x_dftbp_total_mermin", + rf"Total Mermin free energy: +({re_f}) +H", + dtype=np.float64, + unit=ureg.hartree, + ), + Quantity( + "pressure", + rf"Pressure: +({re_f}) +au", + dtype=np.float64, + unit=ureg.hartree / ureg.bohr**3, + ), + Quantity( + "maximum_force", + rf"Maximal force component: +({re_f})", + dtype=np.float64, + unit=ureg.hartree / ureg.bohr, + ), + ] + ), + ), ] -class DFTBPlusParser: +class DFTBPlusParser(Parser): def __init__(self): self.out_parser = OutParser() self.hsd_parser = HSDParser() self.detailed_parser = DetailedParser() self.gen_parser = GenParser() - def init_parser(self): - self.out_parser.mainfile = self.filepath + def write_to_archive(self) -> None: + self.out_parser.mainfile = self.mainfile self.out_parser.logger = self.logger self.hsd_parser.logger = self.logger self.gen_parser.logger = self.logger self.detailed_parser.logger = self.logger - - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.logger = logging.getLogger(__name__) if logger is None else logger - self.maindir = os.path.dirname(self.filepath) - self.init_parser() + self.maindir = os.path.dirname(self.mainfile) sec_run = Run() self.archive.run.append(sec_run) - sec_run.program = Program(name='DFTB+', version=self.out_parser.get('program_version')) + sec_run.program = Program( + name="DFTB+", version=self.out_parser.get("program_version") + ) def parse_system(source): sec_system = System() sec_run.system.append(sec_system) sec_system.atoms = Atoms( - labels=source.get('symbols'), positions=source.get('positions'), - lattice_vectors=source.get('lattice_vectors')) + labels=source.get("symbols"), + positions=source.get("positions"), + lattice_vectors=source.get("lattice_vectors"), + ) - input_file = self.out_parser.get('processed_input_file', self.out_parser.get('input_file')) + input_file = self.out_parser.get( + "processed_input_file", self.out_parser.get("input_file") + ) input_parameters = self.out_parser.input_parameters - if input_file.endswith('.hsd'): + if input_file.endswith(".hsd"): self.hsd_parser.mainfile = os.path.join(self.maindir, input_file) - input_parameters = self.hsd_parser.get('data') + input_parameters = self.hsd_parser.get("data") # parse initial structure parse_system(self.hsd_parser) - for key in ['input_file', 'processed_input_file', 'parser_version']: - setattr(sec_run, f'x_dftbp_{key}', self.out_parser.get(key)) + for key in ["input_file", "processed_input_file", "parser_version"]: + setattr(sec_run, f"x_dftbp_{key}", self.out_parser.get(key)) sec_method = Method() sec_run.method.append(sec_method) sec_tb = TB() sec_method.tb = sec_tb - sec_tb.name = 'DFTB' + sec_tb.name = "DFTB" sec_tb.x_dftbp_input_parameters = input_parameters sec_tb.x_dftbp_sk_files = self.out_parser.sk_files - for step in self.out_parser.get('step', []): + for step in self.out_parser.get("step", []): sec_scc = Calculation() sec_run.calculation.append(sec_scc) sec_scc.energy = Energy( total=EnergyEntry(value=step.energy_total), - total_t0=EnergyEntry(value=step.energy_total_t0)) - sec_scc.energy.x_dftbp_total_mermin = EnergyEntry(value=step.energy_x_dftbp_total_mermin) + total_t0=EnergyEntry(value=step.energy_total_t0), + ) + sec_scc.energy.x_dftbp_total_mermin = EnergyEntry( + value=step.energy_x_dftbp_total_mermin + ) sec_scc.pressure = step.pressure - for scf in step.get('scf', []): + for scf in step.get("scf", []): sec_scf = ScfIteration() sec_scc.scf_iteration.append(sec_scf) sec_scf.energy = Energy( total=EnergyEntry(value=scf[0] * ureg.hartree), - change=scf[1] * ureg.hartree) + change=scf[1] * ureg.hartree, + ) # reference the initial structure sec_run.calculation[0].system_ref = sec_run.system[0] # parse the final relaxed structure - self.gen_parser.mainfile = os.path.join(self.maindir, 'geo_end.gen') + self.gen_parser.mainfile = os.path.join(self.maindir, "geo_end.gen") if self.gen_parser.mainfile is not None: parse_system(self.gen_parser) sec_run.calculation[-1].system_ref = sec_run.system[-1] # properties in detailed.out # TODO add more properties e.g. charges - self.detailed_parser.mainfile = os.path.join(self.maindir, 'detailed.out') + self.detailed_parser.mainfile = os.path.join(self.maindir, "detailed.out") for key, val in self.detailed_parser.items(): if val is None: continue - if key.startswith('energy_'): - setattr(sec_scc.energy, key.replace('energy_', ''), EnergyEntry(value=val)) - elif key == 'forces': - sec_scc.forces = Forces(total=ForcesEntry(value=val * ureg.hartree / ureg.bohr)) - elif key == 'fermi_level': + if key.startswith("energy_"): + setattr( + sec_scc.energy, key.replace("energy_", ""), EnergyEntry(value=val) + ) + elif key == "forces": + sec_scc.forces = Forces( + total=ForcesEntry(value=val * ureg.hartree / ureg.bohr) + ) + elif key == "fermi_level": sec_scc.energy.fermi = val - elif key == 'pressure': + elif key == "pressure": sec_scc.pressure = val - elif key == 'eigenvalues_occupations': + elif key == "eigenvalues_occupations": sec_eigenvalues = BandEnergies() sec_scc.eigenvalues.append(sec_eigenvalues) # TODO handle spin polarization n_spin = 1 - eigs = np.vstack([kpoint[0] for kpoint in val.get('kpoint', [])]) - sec_eigenvalues.energies = np.reshape(eigs, (n_spin, *np.shape(eigs))) * ureg.hartree - occs = np.vstack([kpoint[1] for kpoint in val.get('kpoint', [])]) - sec_eigenvalues.occupations = np.reshape(occs, (n_spin, *np.shape(occs))) - elif key == 'dipole': + eigs = np.vstack([kpoint[0] for kpoint in val.get("kpoint", [])]) + sec_eigenvalues.energies = ( + np.reshape(eigs, (n_spin, *np.shape(eigs))) * ureg.hartree + ) + occs = np.vstack([kpoint[1] for kpoint in val.get("kpoint", [])]) + sec_eigenvalues.occupations = np.reshape( + occs, (n_spin, *np.shape(occs)) + ) + elif key == "dipole": sec_multipole = Multipoles() sec_scc.multipoles.append(sec_multipole) sec_multipole.dipole = MultipolesEntry(total=val) diff --git a/atomisticparsers/dlpoly/parser.py b/atomisticparsers/dlpoly/parser.py index 2eeb92c0..b33794b9 100644 --- a/atomisticparsers/dlpoly/parser.py +++ b/atomisticparsers/dlpoly/parser.py @@ -25,24 +25,34 @@ from nomad.parsing.file_parser import TextParser, Quantity from runschema.run import Run, Program from runschema.method import ( - Method, ForceField, Model, Interaction, AtomParameters, MoleculeParameters + Method, + ForceField, + Model, + Interaction, + AtomParameters, + MoleculeParameters, ) from atomisticparsers.utils import MDParser from .metainfo.dl_poly import m_package # pylint: disable=unused-import -re_f = r'[-+]?\d*\.\d*(?:[Ee][-+]\d+)?' -re_n = r'[\n\r]' -MOL = 6.02214076e+23 +re_f = r"[-+]?\d*\.\d*(?:[Ee][-+]\d+)?" +re_n = r"[\n\r]" +MOL = 6.02214076e23 class TrajParser(TextParser): def init_quantities(self): - def to_info(val_in): val = val_in.strip().split() if len(val) == 5: - return dict(step=val[0], n_atoms=val[1], log_level=val[2], pbc_type=val[3], dt=val[4]) + return dict( + step=val[0], + n_atoms=val[1], + log_level=val[2], + pbc_type=val[3], + dt=val[4], + ) elif len(val) == 2: return dict(log_level=val[0], pbc_type=val[1]) elif len(val) > 2: @@ -51,31 +61,45 @@ def to_info(val_in): self._quantities = [ Quantity( - 'frame', - rf'(\d+ +\d+.*\s+{re_f}[\s\S]+?)(?:timestep|\Z)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'info', - # rf'(\d+ +\d+[ \d]*(?:{re_f})*) *(?:{re_f})*) *{re_n}', - rf'(\d+ +\d+[ \d\.Ee\-\+]*){re_n}', - str_operation=to_info - ), - Quantity( - 'lattice_vectors', - rf'({re_f} +{re_f} +{re_f})\s+' - rf'({re_f} +{re_f} +{re_f})\s+' - rf'({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), shape=(3, 3) - ), - Quantity( - 'atoms', - rf'({re_n} *[A-Za-z]+\S* *\d*.*(?:\s+{re_f})+)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('label', rf'{re_n} *([A-Z][a-z]*)\S* *\d*.*'), - Quantity('array', rf'({re_f} +{re_f} +{re_f})', repeats=True, dtype=np.dtype(np.float64)), - ]) - ) - ]) + "frame", + rf"(\d+ +\d+.*\s+{re_f}[\s\S]+?)(?:timestep|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "info", + # rf'(\d+ +\d+[ \d]*(?:{re_f})*) *(?:{re_f})*) *{re_n}', + rf"(\d+ +\d+[ \d\.Ee\-\+]*){re_n}", + str_operation=to_info, + ), + Quantity( + "lattice_vectors", + rf"({re_f} +{re_f} +{re_f})\s+" + rf"({re_f} +{re_f} +{re_f})\s+" + rf"({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + shape=(3, 3), + ), + Quantity( + "atoms", + rf"({re_n} *[A-Za-z]+\S* *\d*.*(?:\s+{re_f})+)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "label", rf"{re_n} *([A-Z][a-z]*)\S* *\d*.*" + ), + Quantity( + "array", + rf"({re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + ] + ), + ), + ] + ), ) ] @@ -86,231 +110,331 @@ def __init__(self): def init_quantities(self): def to_atoms(val_in): - keys = ['label', 'mass', 'charge', 'x_dl_poly_nrept', 'x_dl_poly_ifrz', 'x_dl_poly_igrp'] - return [{keys[n]: val_n for n, val_n in enumerate( - val[:len(keys)])} for val in [v.split() for v in val_in.strip().splitlines()]] + keys = [ + "label", + "mass", + "charge", + "x_dl_poly_nrept", + "x_dl_poly_ifrz", + "x_dl_poly_igrp", + ] + return [ + {keys[n]: val_n for n, val_n in enumerate(val[: len(keys)])} + for val in [v.split() for v in val_in.strip().splitlines()] + ] def to_shell(val_in): keys = [ - 'x_dl_poly_atom_indices_core', 'x_dl_poly_atom_indices_shell', 'x_dl_poly_force_constant', - 'x_dl_poly_force_constant_anharmonic'] - return [{keys[n]: val_n for n, val_n in enumerate( - val[:len(keys)])} for val in [v.split() for v in val_in.strip().splitlines()]] + "x_dl_poly_atom_indices_core", + "x_dl_poly_atom_indices_shell", + "x_dl_poly_force_constant", + "x_dl_poly_force_constant_anharmonic", + ] + return [ + {keys[n]: val_n for n, val_n in enumerate(val[: len(keys)])} + for val in [v.split() for v in val_in.strip().splitlines()] + ] def to_bonds(val_in): val = [v.split() for v in val_in.strip().splitlines()] potentials = { - 'harm': 'Harmonic', 'mors': 'Morse', '12-6': '12-6', - 'rhrm': 'Restraint', 'quar': 'Quartic', 'buck': 'Buckingham', - 'fene': 'FENE', 'coul': 'Coulombic'} + "harm": "Harmonic", + "mors": "Morse", + "12-6": "12-6", + "rhrm": "Restraint", + "quar": "Quartic", + "buck": "Buckingham", + "fene": "FENE", + "coul": "Coulombic", + } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[0], val_n[0]), - # atom index starts from 1 - atom_indices=[int(n) - 1 for n in val_n[1:3]], - parameters=[float(v) for v in val_n[3:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[0], val_n[0]), + # atom index starts from 1 + atom_indices=[int(n) - 1 for n in val_n[1:3]], + parameters=[float(v) for v in val_n[3:]], + ) + ) return interactions def to_angles(val_in): val = [v.split() for v in val_in.strip().splitlines()] potentials = { - 'harm': 'Harmonic', 'quar': 'Quartic', 'thrm': 'Truncated harmonic', - 'sharm': 'Screened harmonic', 'bvs1': 'Screened Vessa', 'bvs2': 'Truncated Vessa', - 'bcos': 'Harmonic cosine', 'cos': 'Cosine', 'mmsb': 'MM strech-bend', - 'stst': 'Compass stretch-stretch', 'stbe': 'Compass stretch-bend', 'cmps': 'Compass all terms' + "harm": "Harmonic", + "quar": "Quartic", + "thrm": "Truncated harmonic", + "sharm": "Screened harmonic", + "bvs1": "Screened Vessa", + "bvs2": "Truncated Vessa", + "bcos": "Harmonic cosine", + "cos": "Cosine", + "mmsb": "MM strech-bend", + "stst": "Compass stretch-stretch", + "stbe": "Compass stretch-bend", + "cmps": "Compass all terms", } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[0], val_n[0]), - atom_indices=[int(n) - 1 for n in val_n[1:4]], - parameters=[float(v) for v in val_n[4:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[0], val_n[0]), + atom_indices=[int(n) - 1 for n in val_n[1:4]], + parameters=[float(v) for v in val_n[4:]], + ) + ) return interactions def to_dihedrals(val_in): val = [v.split() for v in val_in.strip().splitlines()] potentials = { - 'cos': 'Cosine', 'harm': 'Harmonic', 'bcos': 'Harmonic cosine', 'cos3': 'Triple cosine', - 'ryck': 'Ryckaert-Bellemans', 'rbf': 'Fluorinated Ryckaert-Bellemans', 'opls': 'OPLS' + "cos": "Cosine", + "harm": "Harmonic", + "bcos": "Harmonic cosine", + "cos3": "Triple cosine", + "ryck": "Ryckaert-Bellemans", + "rbf": "Fluorinated Ryckaert-Bellemans", + "opls": "OPLS", } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[0], val_n[0]), - atom_indices=[int(n) - 1 for n in val_n[1:5]], - parameters=[float(v) for v in val_n[5:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[0], val_n[0]), + atom_indices=[int(n) - 1 for n in val_n[1:5]], + parameters=[float(v) for v in val_n[5:]], + ) + ) return interactions def to_inversions(val_in): val = [v.split() for v in val_in.strip().splitlines()] - potentials = {'harm': 'Harmonic', 'bcos': 'Harmonic cosine', 'plan': 'Planar', 'calc': 'Calcite'} + potentials = { + "harm": "Harmonic", + "bcos": "Harmonic cosine", + "plan": "Planar", + "calc": "Calcite", + } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[0], val_n[0]), - atom_indices=[int(n) - 1 for n in val_n[1:5]], - parameters=[float(v) for v in val_n[5:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[0], val_n[0]), + atom_indices=[int(n) - 1 for n in val_n[1:5]], + parameters=[float(v) for v in val_n[5:]], + ) + ) return interactions def to_teth(val_in): val = [v.split() for v in val_in.strip().splitlines()] - potentials = {'harm': 'Harmonic', 'rhrm': 'Restraint', 'quar': 'Quartic'} + potentials = {"harm": "Harmonic", "rhrm": "Restraint", "quar": "Quartic"} interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[0], val_n[0]), - atom_indices=[int(n) - 1 for n in val_n[1:2]], - parameters=[float(v) for v in val_n[2:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[0], val_n[0]), + atom_indices=[int(n) - 1 for n in val_n[1:2]], + parameters=[float(v) for v in val_n[2:]], + ) + ) return interactions def to_rigid(val_in): val = [v.split() for v in val_in.strip().splitlines()] - return [[int(vi) - 1 for vi in v[1: int(v[0]) + 1]] for v in val] + return [[int(vi) - 1 for vi in v[1 : int(v[0]) + 1]] for v in val] def to_vdw(val_in): val = [v.split() for v in val_in.strip().splitlines()] potentials = { - '12-6': '12-6', 'lj': 'Lennard-Jones', 'nm': 'n-m', - 'buck': 'Buckingham', 'bhm': 'Born-Huggins-Meyer', 'hbnd': '12-10 H-bond', - 'snm': 'Shifted force n-m', 'mors': 'Morse', 'wca': 'WCA', 'tab': 'Table' + "12-6": "12-6", + "lj": "Lennard-Jones", + "nm": "n-m", + "buck": "Buckingham", + "bhm": "Born-Huggins-Meyer", + "hbnd": "12-10 H-bond", + "snm": "Shifted force n-m", + "mors": "Morse", + "wca": "WCA", + "tab": "Table", } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[2], val_n[2]), - atom_labels=val_n[:2], - parameters=[float(v) for v in val_n[3:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[2], val_n[2]), + atom_labels=val_n[:2], + parameters=[float(v) for v in val_n[3:]], + ) + ) return interactions def to_tbp(val_in): val = [v.split() for v in val_in.strip().splitlines()] potentials = { - 'thrm': 'Truncated harmonic', 'shrm': 'Screened Harmonic', 'bvs1': 'Screened Vessa', - 'bvs2': 'Truncated Vessa', 'hbnd': 'H-bond', + "thrm": "Truncated harmonic", + "shrm": "Screened Harmonic", + "bvs1": "Screened Vessa", + "bvs2": "Truncated Vessa", + "hbnd": "H-bond", } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[3], val_n[3]), - atom_labels=val_n[:3], - parameters=[float(v) for v in val_n[4:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[3], val_n[3]), + atom_labels=val_n[:3], + parameters=[float(v) for v in val_n[4:]], + ) + ) return interactions def to_fbp(val_in): val = [v.split() for v in val_in.strip().splitlines()] - potentials = {'harm': 'Harmonic', 'hcos': 'Harmonic cosine', 'plan': 'Planar'} + potentials = { + "harm": "Harmonic", + "hcos": "Harmonic cosine", + "plan": "Planar", + } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[4], val_n[4]), - atom_labels=val_n[:4], - parameters=[float(v) for v in val_n[5:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[4], val_n[4]), + atom_labels=val_n[:4], + parameters=[float(v) for v in val_n[5:]], + ) + ) return interactions def to_metal(val_in): val = [v.split() for v in val_in.strip().splitlines()] - potentials = {'eam': 'EAM', 'fnsc': 'Finnis-Sinclair', 'stch': 'Sutton-Chen', 'gupt': 'Gupta'} + potentials = { + "eam": "EAM", + "fnsc": "Finnis-Sinclair", + "stch": "Sutton-Chen", + "gupt": "Gupta", + } interactions = [] for val_n in val: - interactions.append(dict( - functional_form=potentials.get(val_n[2], val_n[2]), - atom_labels=val_n[:2], - parameters=[float(v) for v in val_n[3:]] - )) + interactions.append( + dict( + functional_form=potentials.get(val_n[2], val_n[2]), + atom_labels=val_n[:2], + parameters=[float(v) for v in val_n[3:]], + ) + ) return interactions self._quantities = [ - Quantity('units', r'[Uu][Nn][Ii][Tt][Ss] +(.+)', dtype=str), - Quantity('neutral_groups', r'(neutral group)', str_operation=lambda x: True), - Quantity('molecule_types', r'[Mm][Oo][Ll][Ee][Cc][Uu][Ll].+ +(\d+)', dtype=np.int32), + Quantity("units", r"[Uu][Nn][Ii][Tt][Ss] +(.+)", dtype=str), + Quantity( + "neutral_groups", r"(neutral group)", str_operation=lambda x: True + ), Quantity( - 'molecule', - r'(.+\s*[Nn][Uu][Mm][Mm][Oo][Ll][Ss] +\d+[\s\S]+?[Ff][Ii][Nn][Ii][Ss][Hh])', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'label_nummols', - rf' *(.+?){re_n} *[Nn][Uu][Mm][Mm][Oo][Ll][Ss] *(\d+)', - str_operation=lambda x: x.rsplit(' ', 1) - ), - Quantity( - 'atoms', - rf'[Aa][Tt][Oo][Mm][Ss] +\d+\s+((?:[A-Z][\w\-\+]* +{re_f}.+\s*)+)', - str_operation=to_atoms - ), - Quantity( - 'shell', - rf'[Ss][Hh][Ee][Ll][Ll] +\d+ +\d+\s+((?:\d+ +\d+ +{re_f}.+\s*)+)', - str_operation=to_shell, convert=False - ), - Quantity( - 'bonds', - rf'[Bb][Oo][Nn][Dd][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +{re_f}.+\s*)+)', - str_operation=to_bonds, convert=False - ), - Quantity( - 'angles', - rf'[Aa][Nn][Gg][Ll][Ee][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +{re_f} +{re_f}.+\s*)+)', - str_operation=to_angles, convert=False - ), - Quantity( - 'constraints', - rf'[Cc][Oo][Nn][Ss][Tt][Rr][Aa][Ii][Nn][Tt][Ss] +\d+\s+((?:\d+ +\d+ +{re_f}.*\s*)+)', - convert=False, str_operation=lambda x: [dict( - atom_indices=[int(v) - 1 for v in val[:2]], - parameters=[float(v) for v in val[2:3]]) for val in [v.split() for v in x.strip().splitlines()]] - ), - # TODO add pmf constraints - Quantity( - 'dihedrals', - rf'[Dd][Ii][Hh][Ee][Dd][Rr][Aa][Ll][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +\d+ +{re_f}.+\s*)+)', - str_operation=to_dihedrals, convert=False - ), - Quantity( - 'inversions', - rf'[Ii][Nn][Vv][Ee][Rr][Ss][Ii][Oo][Nn][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +\d+ +{re_f}.+\s*)+)', - str_operation=to_inversions, convert=False - ), - Quantity( - 'rigid', - r'[Rr][Ii][Gg][Ii][Dd].+?\d+\s+((?:\d+.+\s+)+)', - str_operation=to_rigid, convert=False - ), - Quantity( - 'teth', - rf'[Tt][Ee][Tt][Hh] +\d+\s+((?:\w+ +\d+ +{re_f}.+\s+)+)', - str_operation=to_teth, convert=False - ), - ]), + "molecule_types", + r"[Mm][Oo][Ll][Ee][Cc][Uu][Ll].+ +(\d+)", + dtype=np.int32, + ), + Quantity( + "molecule", + r"(.+\s*[Nn][Uu][Mm][Mm][Oo][Ll][Ss] +\d+[\s\S]+?[Ff][Ii][Nn][Ii][Ss][Hh])", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "label_nummols", + rf" *(.+?){re_n} *[Nn][Uu][Mm][Mm][Oo][Ll][Ss] *(\d+)", + str_operation=lambda x: x.rsplit(" ", 1), + ), + Quantity( + "atoms", + rf"[Aa][Tt][Oo][Mm][Ss] +\d+\s+((?:[A-Z][\w\-\+]* +{re_f}.+\s*)+)", + str_operation=to_atoms, + ), + Quantity( + "shell", + rf"[Ss][Hh][Ee][Ll][Ll] +\d+ +\d+\s+((?:\d+ +\d+ +{re_f}.+\s*)+)", + str_operation=to_shell, + convert=False, + ), + Quantity( + "bonds", + rf"[Bb][Oo][Nn][Dd][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +{re_f}.+\s*)+)", + str_operation=to_bonds, + convert=False, + ), + Quantity( + "angles", + rf"[Aa][Nn][Gg][Ll][Ee][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +{re_f} +{re_f}.+\s*)+)", + str_operation=to_angles, + convert=False, + ), + Quantity( + "constraints", + rf"[Cc][Oo][Nn][Ss][Tt][Rr][Aa][Ii][Nn][Tt][Ss] +\d+\s+((?:\d+ +\d+ +{re_f}.*\s*)+)", + convert=False, + str_operation=lambda x: [ + dict( + atom_indices=[int(v) - 1 for v in val[:2]], + parameters=[float(v) for v in val[2:3]], + ) + for val in [v.split() for v in x.strip().splitlines()] + ], + ), + # TODO add pmf constraints + Quantity( + "dihedrals", + rf"[Dd][Ii][Hh][Ee][Dd][Rr][Aa][Ll][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +\d+ +{re_f}.+\s*)+)", + str_operation=to_dihedrals, + convert=False, + ), + Quantity( + "inversions", + rf"[Ii][Nn][Vv][Ee][Rr][Ss][Ii][Oo][Nn][Ss] +\d+\s+((?:\w+ +\d+ +\d+ +\d+ +\d+ +{re_f}.+\s*)+)", + str_operation=to_inversions, + convert=False, + ), + Quantity( + "rigid", + r"[Rr][Ii][Gg][Ii][Dd].+?\d+\s+((?:\d+.+\s+)+)", + str_operation=to_rigid, + convert=False, + ), + Quantity( + "teth", + rf"[Tt][Ee][Tt][Hh] +\d+\s+((?:\w+ +\d+ +{re_f}.+\s+)+)", + str_operation=to_teth, + convert=False, + ), + ] + ), ), # non-bonded interactions Quantity( - 'vdw', - rf'[Vv][Dd][Ww].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]* +\w+.*\s*)+)', - str_operation=to_vdw, convert=False + "vdw", + rf"[Vv][Dd][Ww].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]* +\w+.*\s*)+)", + str_operation=to_vdw, + convert=False, ), Quantity( - 'tbp', - r'[Tt][Bb][Pp].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)', - str_operation=to_tbp, convert=False + "tbp", + r"[Tt][Bb][Pp].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)", + str_operation=to_tbp, + convert=False, ), Quantity( - 'fbp', - r'[Ff][Bb][Pp].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)', - str_operation=to_fbp, convert=False + "fbp", + r"[Ff][Bb][Pp].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)", + str_operation=to_fbp, + convert=False, ), Quantity( - 'metal', - r'[Mm][Ee][Tt][Aa][Ll].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)', - str_operation=to_metal, convert=False + "metal", + r"[Mm][Ee][Tt][Aa][Ll].+\d+\s+((?:[A-Z][\w\-\+]* +[A-Z][\w\-\+]*.*\s*)+)", + str_operation=to_metal, + convert=False, ), # TODO implement Tersoff and external fields ] @@ -323,58 +447,79 @@ def __init__(self): def init_quantities(self): self._quantities = [ Quantity( - 'program_version_date', r'\*\* +version\: +(\S+) +/ +(\w+) +(\d+)', - dtype=str + "program_version_date", + r"\*\* +version\: +(\S+) +/ +(\w+) +(\d+)", + dtype=str, ), Quantity( - 'program_name', r'when publishing research data obtained using (\S+)', - dtype=str + "program_name", + r"when publishing research data obtained using (\S+)", + dtype=str, ), Quantity( - 'control_parameters', - r'SIMULATION CONTROL PARAMETERS([\s\S]+?)SYS', - sub_parser=TextParser(quantities=[ - Quantity( - 'parameter', - r'([ \w]+) \(*\w*\)*((?: |\:) [\w \+\-\.]+)', - str_operation=lambda x: [v.strip() for v in x.replace(':', ' ').rsplit(' ', 1)], - repeats=True - ) - ]) + "control_parameters", + r"SIMULATION CONTROL PARAMETERS([\s\S]+?)SYS", + sub_parser=TextParser( + quantities=[ + Quantity( + "parameter", + r"([ \w]+) \(*\w*\)*((?: |\:) [\w \+\-\.]+)", + str_operation=lambda x: [ + v.strip() for v in x.replace(":", " ").rsplit(" ", 1) + ], + repeats=True, + ) + ] + ), ), Quantity( - 'system_specification', - r'TEM SPECIFICATION([\s\S]+?system volume.+)', - sub_parser=TextParser(quantities=[ - Quantity('energy_unit', r'energy units += +(.+)', dtype=str, flatten=False) - ]) + "system_specification", + r"TEM SPECIFICATION([\s\S]+?system volume.+)", + sub_parser=TextParser( + quantities=[ + Quantity( + "energy_unit", + r"energy units += +(.+)", + dtype=str, + flatten=False, + ) + ] + ), ), Quantity( - 'properties', - r'(step +eng_tot +temp_tot[\s\S]+?)(?:run terminating|\Z)', - sub_parser=TextParser(quantities=[ - Quantity( - 'names', - r'(step +eng_tot +temp_tot.+\s+.+\s+.+)', - str_operation=lambda x: [v.strip() for v in x.replace('\n', ' ').split(' ') if v] - ), - Quantity( - 'instantaneous', - rf'{re_n} +(\d+ +{re_f}.+\s+.+\s+.+)', - repeats=True - ), - Quantity( - 'average', - r'rolling(.+)\s+averages(.+)\s+(.+)', - repeats=True - ) - ]) - ) - + "properties", + r"(step +eng_tot +temp_tot[\s\S]+?)(?:run terminating|\Z)", + sub_parser=TextParser( + quantities=[ + Quantity( + "names", + r"(step +eng_tot +temp_tot.+\s+.+\s+.+)", + str_operation=lambda x: [ + v.strip() + for v in x.replace("\n", " ").split(" ") + if v + ], + ), + Quantity( + "instantaneous", + rf"{re_n} +(\d+ +{re_f}.+\s+.+\s+.+)", + repeats=True, + ), + Quantity( + "average", + r"rolling(.+)\s+averages(.+)\s+(.+)", + repeats=True, + ), + ] + ), + ), ] def get_control_parameters(self): - return {val[0]: val[1] for val in self.get('control_parameters', {}).get('parameter', [])} + return { + val[0]: val[1] + for val in self.get("control_parameters", {}).get("parameter", []) + } class DLPolyParser(MDParser): @@ -383,83 +528,85 @@ def __init__(self): self.traj_parser = TrajParser() self.field_parser = FieldParser() self._units = { - 'kj/mol': ureg.kJ / MOL, - 'kj': ureg.kJ / MOL, - 'ev': ureg.eV, - 'kcal/mol': ureg.J * 4184.0 / MOL, - 'kcal': ureg.J * 4184.0 / MOL, - 'dl_poly internal units (10 j/mol)': 10 * ureg.J / MOL + "kj/mol": ureg.kJ / MOL, + "kj": ureg.kJ / MOL, + "ev": ureg.eV, + "kcal/mol": ureg.J * 4184.0 / MOL, + "kcal": ureg.J * 4184.0 / MOL, + "dl_poly internal units (10 j/mol)": 10 * ureg.J / MOL, } self._metainfo_map = { - 'step': 'step', - 'eng_tot': 'energy_total', - 'temp_tot': 'temperature', - 'eng_cfg': 'energy_contribution_configurational', - 'eng_vdw': 'energy_van_der_waals', - 'eng_src': 'energy_contribution_short_range', - 'eng_cou': 'energy_coulomb', - 'eng_bnd': 'energy_contribution_bond', - 'eng_ang': 'energy_contribution_angle', - 'eng_dih': 'energy_contribution_dihedral', - 'eng_tet': 'energy_contribution_tethering', - 'time(ps)': 'time', - 'time': 'time', - 'eng_pv': 'enthalpy', - 'temp_rot': 'x_dl_poly_temperature_rotational', + "step": "step", + "eng_tot": "energy_total", + "temp_tot": "temperature", + "eng_cfg": "energy_contribution_configurational", + "eng_vdw": "energy_van_der_waals", + "eng_src": "energy_contribution_short_range", + "eng_cou": "energy_coulomb", + "eng_bnd": "energy_contribution_bond", + "eng_ang": "energy_contribution_angle", + "eng_dih": "energy_contribution_dihedral", + "eng_tet": "energy_contribution_tethering", + "time(ps)": "time", + "time": "time", + "eng_pv": "enthalpy", + "temp_rot": "x_dl_poly_temperature_rotational", # TODO include virial to nomad metainfo - 'vir_cfg': 'x_dl_poly_virial_configurational', - 'vir_src': 'x_dl_poly_virial_short_range', - 'vir_cou': 'x_dl_poly_virial_coulomb', - 'vir_bnd': 'x_dl_poly_virial_bond', - 'vir_ang': 'x_dl_poly_virial_angle', - 'vir_con': 'x_dl_poly_virial_constraint', - 'vir_tet': 'x_dl_poly_virial_tethering', - 'cpu (s)': 'time_physical', - 'cpu time': 'time_physical', - 'volume': 'x_dl_poly_volume', - 'temp_shl': 'x_dl_poly_core_shell', - 'eng_shl': 'energy_contribution_core_shell', - 'vir_shl': 'x_dl_poly_core_shell', - 'vir_pmf': 'x_dl_poly_virial_potential_mean_force', - 'press': 'pressure' + "vir_cfg": "x_dl_poly_virial_configurational", + "vir_src": "x_dl_poly_virial_short_range", + "vir_cou": "x_dl_poly_virial_coulomb", + "vir_bnd": "x_dl_poly_virial_bond", + "vir_ang": "x_dl_poly_virial_angle", + "vir_con": "x_dl_poly_virial_constraint", + "vir_tet": "x_dl_poly_virial_tethering", + "cpu (s)": "time_physical", + "cpu time": "time_physical", + "volume": "x_dl_poly_volume", + "temp_shl": "x_dl_poly_core_shell", + "eng_shl": "energy_contribution_core_shell", + "vir_shl": "x_dl_poly_core_shell", + "vir_pmf": "x_dl_poly_virial_potential_mean_force", + "press": "pressure", } super().__init__() - def init_parser(self): + def write_to_archive(self) -> None: self.mainfile_parser.logger = self.logger - self.mainfile_parser.mainfile = self.filepath + self.mainfile_parser.mainfile = self.mainfile self.traj_parser.logger = self.logger self.field_parser.logger = self.logger - - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.logger = logging.getLogger(__name__) if logger is None else logger - self.maindir = os.path.dirname(self.filepath) - - self.init_parser() + self.maindir = os.path.dirname(self.mainfile) sec_run = Run() - archive.run.append(sec_run) - version_date = self.mainfile_parser.get('program_version_date', [None, None]) - sec_run.program = Program(name=self.mainfile_parser.program_name, version=version_date[0]) + self.archive.run.append(sec_run) + version_date = self.mainfile_parser.get("program_version_date", [None, None]) + sec_run.program = Program( + name=self.mainfile_parser.program_name, version=version_date[0] + ) sec_run.x_dl_poly_program_version_date = str(version_date[1]) def get_system_data(frame_index): - frame = self.traj_parser.get('frame')[frame_index] - labels = [atom.get('label') for atom in frame.get('atoms', [])] - lattice_vectors = frame.get('lattice_vectors') * ureg.angstrom - array = np.transpose([atom.get('array') for atom in frame.get('atoms', [])], axes=(1, 0, 2)) + frame = self.traj_parser.get("frame")[frame_index] + labels = [atom.get("label") for atom in frame.get("atoms", [])] + lattice_vectors = frame.get("lattice_vectors") * ureg.angstrom + array = np.transpose( + [atom.get("array") for atom in frame.get("atoms", [])], axes=(1, 0, 2) + ) positions = array[0] * ureg.angstrom velocities = None if len(array) > 1: velocities = array[1] * ureg.angstrom / ureg.ps - return dict(atoms=dict( - labels=labels, lattice_vectors=lattice_vectors, positions=positions, - velocities=velocities)) + return dict( + atoms=dict( + labels=labels, + lattice_vectors=lattice_vectors, + positions=positions, + velocities=velocities, + ) + ) # parse initial system from CONFIG - self.traj_parser.mainfile = os.path.join(self.maindir, 'CONFIG') + self.traj_parser.mainfile = os.path.join(self.maindir, "CONFIG") self.parse_trajectory_step(get_system_data(0)) # method @@ -472,21 +619,29 @@ def get_system_data(frame_index): sec_model = Model() sec_force_field.model.append(sec_model) # get interactions from FIELD file - self.field_parser.mainfile = os.path.join(self.maindir, 'FIELD') - units = {'mass': ureg.amu, 'charge': ureg.elementary_charge} - for molecule in self.field_parser.get('molecule', []): + self.field_parser.mainfile = os.path.join(self.maindir, "FIELD") + units = {"mass": ureg.amu, "charge": ureg.elementary_charge} + for molecule in self.field_parser.get("molecule", []): # atom parameters sec_molecule_parameters = MoleculeParameters() sec_method.molecule_parameters.append(sec_molecule_parameters) - sec_molecule_parameters.label = molecule.get('label_nummols', [None, None])[0] - for atom in molecule.get('atoms', []): + sec_molecule_parameters.label = molecule.get("label_nummols", [None, None])[ + 0 + ] + for atom in molecule.get("atoms", []): sec_atom_parameters = AtomParameters() sec_molecule_parameters.atom_parameters.append(sec_atom_parameters) for key, val in atom.items(): val = val * units.get(key, 1) setattr(sec_atom_parameters, key, val) # interactions - for interaction_type in ['bonds', 'angles', 'dihedrals', 'inversions', 'teth']: + for interaction_type in [ + "bonds", + "angles", + "dihedrals", + "inversions", + "teth", + ]: for interaction in molecule.get(interaction_type, []): sec_interaction = Interaction() sec_model.contributions.append(sec_interaction) @@ -494,55 +649,70 @@ def get_system_data(frame_index): setattr(sec_interaction, key, val) # add constraints to initial system constraint_data = [] - for constraint in molecule.get('constraints', []): - constraint_data.append(dict( - kind='fixed bond length', atom_indices=constraint.get('atom_indices'), - parameters=constraint.get('parameters'))) + for constraint in molecule.get("constraints", []): + constraint_data.append( + dict( + kind="fixed bond length", + atom_indices=constraint.get("atom_indices"), + parameters=constraint.get("parameters"), + ) + ) # rigid atoms - for rigid in molecule.get('rigid', []): - constraint_data.append(dict( - kind='static atoms', atom_indices=rigid)) + for rigid in molecule.get("rigid", []): + constraint_data.append(dict(kind="static atoms", atom_indices=rigid)) self.parse_section(dict(constraint=constraint_data), sec_run.system[0]) # TODO add atom groups in system # non-bonded - for interaction_type in ['vdw', 'tbp', 'fbp', 'metal']: + for interaction_type in ["vdw", "tbp", "fbp", "metal"]: for interaction in self.field_parser.get(interaction_type, []): sec_interaction = Interaction() sec_model.contributions.append(sec_interaction) for key, val in interaction.items(): setattr(sec_interaction, key, val) - system_spec = self.mainfile_parser.get('system_specification', {}) + system_spec = self.mainfile_parser.get("system_specification", {}) n_atoms = len(sec_run.system[-1].atoms.positions) - energy_unit = self._units.get(system_spec.get('energy_unit', '').strip().lower(), 1) * n_atoms - properties = self.mainfile_parser.get('properties', {}) - names = [self._metainfo_map.get(name) for name in properties.get('names', [])] + energy_unit = ( + self._units.get(system_spec.get("energy_unit", "").strip().lower(), 1) + * n_atoms + ) + properties = self.mainfile_parser.get("properties", {}) + names = [self._metainfo_map.get(name) for name in properties.get("names", [])] # parse trajectory from HISTORY - self.traj_parser.mainfile = os.path.join(self.maindir, 'HISTORY') + self.traj_parser.mainfile = os.path.join(self.maindir, "HISTORY") # set up md parser self.n_atoms = n_atoms - traj_steps = [frame.get('info', {}).get('step') for frame in self.traj_parser.get('frame', [])] + traj_steps = [ + frame.get("info", {}).get("step") + for frame in self.traj_parser.get("frame", []) + ] self.trajectory_steps = traj_steps - step_n = names.index('step') - self.thermodynamics_steps = [int(val[step_n]) for val in properties.get('instantaneous', [])] + step_n = names.index("step") + self.thermodynamics_steps = [ + int(val[step_n]) for val in properties.get("instantaneous", []) + ] for step in self.trajectory_steps: self.parse_trajectory_step(get_system_data(traj_steps.index(step))) - unit_conversion = {'m': 60, 'f': 0.001, 's': 1, 'p': 1} + unit_conversion = {"m": 60, "f": 0.001, "s": 1, "p": 1} for step in self.thermodynamics_steps: data = {} - energy = {'contributions': []} - instantaneous = properties.get('instantaneous')[self.thermodynamics_steps.index(step)] + energy = {"contributions": []} + instantaneous = properties.get("instantaneous")[ + self.thermodynamics_steps.index(step) + ] # instataneous should be an array of floats, however some outputs may not be all floats and in that # case TextParser fails to convert them properly because it reuses the data type from # previous parsing run. Convert them manually here for n, val in enumerate(instantaneous): try: if isinstance(val, str) and val[-1] in unit_conversion: - instantaneous[n] = float(val[:-1]) * unit_conversion.get(val[-1]) + instantaneous[n] = float(val[:-1]) * unit_conversion.get( + val[-1] + ) else: instantaneous[n] = float(val) except Exception: @@ -551,43 +721,71 @@ def get_system_data(frame_index): for n, name in enumerate(names): if name is None or instantaneous[n] is None: continue - if name.startswith('energy_contribution_'): - energy['contributions'].append(dict( - kind=name.replace('energy_contribution_', ''), value=instantaneous[n] * energy_unit)) - elif name == 'energy_enthalpy': - energy['enthalpy'] = instantaneous[n] * energy_unit - elif name.startswith('energy_'): - energy[name.replace('energy_', '')] = dict(value=instantaneous[n] * energy_unit) - elif name == 'enthalpy': - data['enthalpy'] = instantaneous[n] * energy_unit - elif 'temperature' in name: + if name.startswith("energy_contribution_"): + energy["contributions"].append( + dict( + kind=name.replace("energy_contribution_", ""), + value=instantaneous[n] * energy_unit, + ) + ) + elif name == "energy_enthalpy": + energy["enthalpy"] = instantaneous[n] * energy_unit + elif name.startswith("energy_"): + energy[name.replace("energy_", "")] = dict( + value=instantaneous[n] * energy_unit + ) + elif name == "enthalpy": + data["enthalpy"] = instantaneous[n] * energy_unit + elif "temperature" in name: data[name] = instantaneous[n] * ureg.kelvin - elif 'pressure' in name: + elif "pressure" in name: # TODO verify if atmosphere is the unit data[name] = instantaneous[n] * ureg.atm - elif 'volume' in name: - data[name] = instantaneous[n] * ureg.angstrom ** 3 - elif name == 'step': + elif "volume" in name: + data[name] = instantaneous[n] * ureg.angstrom**3 + elif name == "step": data[name] = int(instantaneous[n]) - elif name == 'time': + elif name == "time": data[name] = instantaneous[n] * ureg.ps - elif name == 'time_physical': + elif name == "time_physical": data[name] = instantaneous[n] * ureg.s index = self.thermodynamics_steps.index(step) - time_start = properties.get('instantaneous')[index - 1][n] if index > 0 else 0 - data['time_calculation'] = data[name] - time_start * ureg.s + time_start = ( + properties.get("instantaneous")[index - 1][n] + if index > 0 + else 0 + ) + data["time_calculation"] = data[name] - time_start * ureg.s else: data[name] = instantaneous[n] - data['energy'] = energy + data["energy"] = energy if step in traj_steps: - frame = self.traj_parser.get('frames')[traj_steps.index(step)] - array = np.transpose([atom.get('array') for atom in frame.get('atoms', [])]) + frame = self.traj_parser.get("frames")[traj_steps.index(step)] + array = np.transpose( + [atom.get("array") for atom in frame.get("atoms", [])] + ) if len(array) > 2: - data['forces'] = dict( - total=dict(value=np.transpose(array[2]) * ureg.amu * ureg.angstrom / ureg.ps ** 2)) + data["forces"] = dict( + total=dict( + value=np.transpose(array[2]) + * ureg.amu + * ureg.angstrom + / ureg.ps**2 + ) + ) self.parse_thermodynamics_step(data) - ensemble_type = control_parameters.get('Ensemble') - self.parse_md_workflow(dict(method=dict( - thermodynamic_ensemble=ensemble_type.split()[0] if ensemble_type is not None else None, - integration_timestep=control_parameters.get('fixed simulation timestep', 0) * ureg.ps))) + ensemble_type = control_parameters.get("Ensemble") + self.parse_md_workflow( + dict( + method=dict( + thermodynamic_ensemble=ensemble_type.split()[0] + if ensemble_type is not None + else None, + integration_timestep=control_parameters.get( + "fixed simulation timestep", 0 + ) + * ureg.ps, + ) + ) + ) diff --git a/atomisticparsers/gulp/parser.py b/atomisticparsers/gulp/parser.py index 68f9c332..98964f32 100644 --- a/atomisticparsers/gulp/parser.py +++ b/atomisticparsers/gulp/parser.py @@ -24,618 +24,1054 @@ from ase.spacegroup import crystal as ase_crystal from nomad.units import ureg -from nomad.parsing.file_parser import TextParser, Quantity +from nomad.parsing.file_parser import TextParser, Quantity, Parser from runschema.run import Run, Program, TimeRun -from runschema.method import ( - Method, ForceField, Model, Interaction, AtomParameters -) -from runschema.system import ( - System, Atoms -) -from runschema.calculation import ( - Calculation, Energy, EnergyEntry -) +from runschema.method import Method, ForceField, Model, Interaction, AtomParameters +from runschema.system import System, Atoms +from runschema.calculation import Calculation, Energy, EnergyEntry from simulationworkflowschema import ( - Elastic, ElasticMethod, ElasticResults, MolecularDynamics, MolecularDynamicsMethod + Elastic, + ElasticMethod, + ElasticResults, + MolecularDynamics, + MolecularDynamicsMethod, +) +from atomisticparsers.gulp.metainfo.gulp import ( + x_gulp_bulk_optimisation, + x_gulp_bulk_optimisation_cycle, ) -from atomisticparsers.gulp.metainfo.gulp import x_gulp_bulk_optimisation, x_gulp_bulk_optimisation_cycle -re_f = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' -re_n = r'[\n\r]' +re_f = r"[-+]?\d+\.\d*(?:[Ee][-+]\d+)?" +re_n = r"[\n\r]" class MainfileParser(TextParser): def init_quantities(self): - def to_species(val_in): data = dict( - label=[], x_gulp_type=[], atom_number=[], mass=[], charge=[], - x_gulp_covalent_radius=[], x_gulp_ionic_radius=[], x_gulp_vdw_radius=[] + label=[], + x_gulp_type=[], + atom_number=[], + mass=[], + charge=[], + x_gulp_covalent_radius=[], + x_gulp_ionic_radius=[], + x_gulp_vdw_radius=[], ) for val in val_in.strip().splitlines(): val = val.strip().split() - data['label'].append(val[0]) - data['x_gulp_type'].append(val[1].lower()) - data['atom_number'].append(int(val[2])) - data['mass'].append(float(val[3]) * ureg.amu) - data['charge'].append(float(val[4]) * ureg.elementary_charge) - data['x_gulp_covalent_radius'].append(float(val[5]) * ureg.angstrom) - data['x_gulp_ionic_radius'].append(float(val[6]) * ureg.angstrom) - data['x_gulp_vdw_radius'].append(float(val[7]) * ureg.angstrom) + data["label"].append(val[0]) + data["x_gulp_type"].append(val[1].lower()) + data["atom_number"].append(int(val[2])) + data["mass"].append(float(val[3]) * ureg.amu) + data["charge"].append(float(val[4]) * ureg.elementary_charge) + data["x_gulp_covalent_radius"].append(float(val[5]) * ureg.angstrom) + data["x_gulp_ionic_radius"].append(float(val[6]) * ureg.angstrom) + data["x_gulp_vdw_radius"].append(float(val[7]) * ureg.angstrom) return data def to_cell_parameters(val_in): val = val_in.strip().split() - return np.array([val[0], val[2], val[4], val[1], val[3], val[5]], np.dtype(np.float64)) + return np.array( + [val[0], val[2], val[4], val[1], val[3], val[5]], np.dtype(np.float64) + ) coordinates_quantities = [ - Quantity('unit', r'Label +\((\w+)\)', dtype=str), - Quantity('auxilliary_keys', r'x +y +z +(.+)', str_operation=lambda x: x.split()), + Quantity("unit", r"Label +\((\w+)\)", dtype=str), + Quantity( + "auxilliary_keys", r"x +y +z +(.+)", str_operation=lambda x: x.split() + ), Quantity( - 'atom', - rf'\d+ +([A-Z][a-z]*)\S* +(\w+ +{re_f}.+)', - repeats=True, str_operation=lambda x: x.strip().replace('*', '').split() + "atom", + rf"\d+ +([A-Z][a-z]*)\S* +(\w+ +{re_f}.+)", + repeats=True, + str_operation=lambda x: x.strip().replace("*", "").split(), ), ] calc_quantities = [ Quantity( - 'energy_components', - rf'Components of .*energy \:\s+([\s\S]+?){re_n} *{re_n}', - sub_parser=TextParser(quantities=[Quantity( - 'key_val', rf'(.+) *= +({re_f}) eV', - repeats=True, str_operation=lambda x: [v.strip() for v in x.rsplit(' ', 1)]) - ]) + "energy_components", + rf"Components of .*energy \:\s+([\s\S]+?){re_n} *{re_n}", + sub_parser=TextParser( + quantities=[ + Quantity( + "key_val", + rf"(.+) *= +({re_f}) eV", + repeats=True, + str_operation=lambda x: [ + v.strip() for v in x.rsplit(" ", 1) + ], + ) + ] + ), ), Quantity( - 'bulk_optimisation', - rf'(Number of variables += +\d+[\s\S]+?Start of bulk optimisation \:\s+[\s\S]+?){re_n} *{re_n}', - sub_parser=TextParser(quantities=[ - Quantity('x_gulp_n_variables', r'Number of variables += +(\d+)', dtype=np.int32), - Quantity('x_gulp_max_n_calculations', r'Maximum number of calculations += +(\d+)', dtype=np.int32), - Quantity('x_gulp_max_hessian_update_interval', r'Maximum Hessian update interval += +(\d+)', dtype=np.int32), - Quantity('x_gulp_max_step_size', rf'Maximum step size += +({re_f})', dtype=np.float64), - Quantity('x_gulp_max_parameter_tolerance', rf'Maximum parameter tolerance += +({re_f})', dtype=np.float64), - Quantity('x_gulp_max_function_tolerance', rf'Maximum function +tolerance += +({re_f})', dtype=np.float64), - Quantity('x_gulp_max_gradient_tolerance', rf'Maximum gradient +tolerance += +({re_f})', dtype=np.float64), - Quantity('x_gulp_max_gradient_component', rf'Maximum gradient +component += +({re_f})', dtype=np.float64), - Quantity( - 'cycle', - rf'Cycle\: +\d+ +Energy\: +({re_f}) +Gnorm\: +({re_f}) +CPU\: +({re_f})', - repeats=True, dtype=np.dtype(np.float64) - ) - ]) + "bulk_optimisation", + rf"(Number of variables += +\d+[\s\S]+?Start of bulk optimisation \:\s+[\s\S]+?){re_n} *{re_n}", + sub_parser=TextParser( + quantities=[ + Quantity( + "x_gulp_n_variables", + r"Number of variables += +(\d+)", + dtype=np.int32, + ), + Quantity( + "x_gulp_max_n_calculations", + r"Maximum number of calculations += +(\d+)", + dtype=np.int32, + ), + Quantity( + "x_gulp_max_hessian_update_interval", + r"Maximum Hessian update interval += +(\d+)", + dtype=np.int32, + ), + Quantity( + "x_gulp_max_step_size", + rf"Maximum step size += +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_gulp_max_parameter_tolerance", + rf"Maximum parameter tolerance += +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_gulp_max_function_tolerance", + rf"Maximum function +tolerance += +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_gulp_max_gradient_tolerance", + rf"Maximum gradient +tolerance += +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_gulp_max_gradient_component", + rf"Maximum gradient +component += +({re_f})", + dtype=np.float64, + ), + Quantity( + "cycle", + rf"Cycle\: +\d+ +Energy\: +({re_f}) +Gnorm\: +({re_f}) +CPU\: +({re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + ] + ), ), Quantity( - 'coordinates', - r'Final.+coordinates.+\:\s+\-+\s+' - r'No\. +Atomic +(x +y +z +.+\s+Label .+)\s*\-+\s*([\s\S]+?)\-{50}', - sub_parser=TextParser(quantities=coordinates_quantities) + "coordinates", + r"Final.+coordinates.+\:\s+\-+\s+" + r"No\. +Atomic +(x +y +z +.+\s+Label .+)\s*\-+\s*([\s\S]+?)\-{50}", + sub_parser=TextParser(quantities=coordinates_quantities), ), Quantity( - 'lattice_vectors', - rf'Final Cartesian lattice vectors \(Angstroms\) \:\s+((?:{re_f}\s+)+)', - dtype=np.dtype(np.float64), shape=(3, 3) + "lattice_vectors", + rf"Final Cartesian lattice vectors \(Angstroms\) \:\s+((?:{re_f}\s+)+)", + dtype=np.dtype(np.float64), + shape=(3, 3), ), Quantity( - 'cell_parameters_primitive', - rf'Final cell parameters and derivatives \:\s+\-+\s+' - rf'((?:\w+ +{re_f}.+\s+)+)', + "cell_parameters_primitive", + rf"Final cell parameters and derivatives \:\s+\-+\s+" + rf"((?:\w+ +{re_f}.+\s+)+)", str_operation=lambda x: np.array( - [v.split()[1] for v in x.strip().splitlines()], np.dtype(np.float64)) + [v.split()[1] for v in x.strip().splitlines()], np.dtype(np.float64) + ), ), Quantity( - 'cell_parameters', - r'Non\-primitive lattice parameters \:\s+' - rf'a += +({re_f}) +b += +({re_f}) +c += +({re_f})\s+' - rf'alpha *= +({re_f}) +beta *= +({re_f}) +gamma *= +({re_f})\s+', - dtype=np.dtype(np.float64) + "cell_parameters", + r"Non\-primitive lattice parameters \:\s+" + rf"a += +({re_f}) +b += +({re_f}) +c += +({re_f})\s+" + rf"alpha *= +({re_f}) +beta *= +({re_f}) +gamma *= +({re_f})\s+", + dtype=np.dtype(np.float64), ), Quantity( - 'elastic_constants', - r'Elastic Constant Matrix.+\s+\-+\s+Indices.+\s+\-+\s+' - rf'((?:\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}\s+)+)', + "elastic_constants", + r"Elastic Constant Matrix.+\s+\-+\s+Indices.+\s+\-+\s+" + rf"((?:\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}\s+)+)", str_operation=lambda x: np.array( - [v.split()[1:7] for v in x.strip().splitlines()], np.dtype(np.float64)) * ureg.GPa + [v.split()[1:7] for v in x.strip().splitlines()], + np.dtype(np.float64), + ) + * ureg.GPa, ), Quantity( - 'elastic_compliance', - r'Elastic Compliance Matrix.+\s+\-+\s+Indices.+\s+\-+\s+' - rf'((?:\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}\s+)+)', + "elastic_compliance", + r"Elastic Compliance Matrix.+\s+\-+\s+Indices.+\s+\-+\s+" + rf"((?:\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}\s+)+)", str_operation=lambda x: np.array( - [v.split()[1:7] for v in x.strip().splitlines()], np.dtype(np.float64)) * 1 / ureg.GPa + [v.split()[1:7] for v in x.strip().splitlines()], + np.dtype(np.float64), + ) + * 1 + / ureg.GPa, ), Quantity( - 'mechanical_properties', - rf'Mechanical properties \:\s+([\s\S]+?){re_n} *{re_n}', - sub_parser=TextParser(quantities=[ - Quantity( - 'bulk_modulus', - rf'Bulk +Modulus \(GPa\) += +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.GPa - ), - Quantity( - 'shear_modulus', - rf'Shear +Modulus \(GPa\) += +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.GPa - ), - Quantity( - 'x_gulp_velocity_s_wave', - rf'Velocity S\-wave \(km/s\) += +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.km / ureg.s - ), - Quantity( - 'x_gulp_velocity_p_wave', - rf'Velocity P\-wave \(km/s\) += +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.km / ureg.s - ), - Quantity( - 'compressibility', - rf'Compressibility \(1/GPa\) += +({re_f})', dtype=np.float64, unit=1 / ureg.GPa - ), - Quantity( - 'x_gulp_youngs_modulus', - rf'Youngs Moduli \(GPa\) += +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.GPa - ), - Quantity( - 'poissons_ratio', - rf'Poissons Ratio \((?:x|y|z)\) += +(.+)', - dtype=np.dtype(np.float64), repeats=True - ), - ]) + "mechanical_properties", + rf"Mechanical properties \:\s+([\s\S]+?){re_n} *{re_n}", + sub_parser=TextParser( + quantities=[ + Quantity( + "bulk_modulus", + rf"Bulk +Modulus \(GPa\) += +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.GPa, + ), + Quantity( + "shear_modulus", + rf"Shear +Modulus \(GPa\) += +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.GPa, + ), + Quantity( + "x_gulp_velocity_s_wave", + rf"Velocity S\-wave \(km/s\) += +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.km / ureg.s, + ), + Quantity( + "x_gulp_velocity_p_wave", + rf"Velocity P\-wave \(km/s\) += +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.km / ureg.s, + ), + Quantity( + "compressibility", + rf"Compressibility \(1/GPa\) += +({re_f})", + dtype=np.float64, + unit=1 / ureg.GPa, + ), + Quantity( + "x_gulp_youngs_modulus", + rf"Youngs Moduli \(GPa\) += +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.GPa, + ), + Quantity( + "poissons_ratio", + rf"Poissons Ratio \((?:x|y|z)\) += +(.+)", + dtype=np.dtype(np.float64), + repeats=True, + ), + ] + ), ), Quantity( - 'x_gulp_piezoelectric_strain_matrix', - rf'Piezoelectric Strain Matrix\: \(Units=C/m\*\*2\)\s+\-+\s+' - rf'Indices.+\s*\-+\s+((?:\w +{re_f}.+\s+)+)', - dtype=np.dtype(np.float64), str_operation=lambda x: np.array( + "x_gulp_piezoelectric_strain_matrix", + rf"Piezoelectric Strain Matrix\: \(Units=C/m\*\*2\)\s+\-+\s+" + rf"Indices.+\s*\-+\s+((?:\w +{re_f}.+\s+)+)", + dtype=np.dtype(np.float64), + str_operation=lambda x: np.array( [v.strip().split()[1:7] for v in x.strip().splitlines()], - np.dtype(np.float64)) * ureg.C / ureg.m ** 2 + np.dtype(np.float64), + ) + * ureg.C + / ureg.m**2, ), Quantity( - 'x_gulp_piezoelectric_stress_matrix', - rf'Piezoelectric Stress Matrix\: \(Units=10\*\*\-11 C/N\)\s+\-+\s+' - rf'Indices.+\s*\-+\s+((?:\w +{re_f}.+\s+)+)', - dtype=np.dtype(np.float64), str_operation=lambda x: np.array( + "x_gulp_piezoelectric_stress_matrix", + rf"Piezoelectric Stress Matrix\: \(Units=10\*\*\-11 C/N\)\s+\-+\s+" + rf"Indices.+\s*\-+\s+((?:\w +{re_f}.+\s+)+)", + dtype=np.dtype(np.float64), + str_operation=lambda x: np.array( [v.strip().split()[1:7] for v in x.strip().splitlines()], - np.dtype(np.float64)) * 10 ** -11 * ureg.C / ureg.N + np.dtype(np.float64), + ) + * 10**-11 + * ureg.C + / ureg.N, ), Quantity( - 'x_gulp_static_dielectric_constant_tensor', - r'Static dielectric constant tensor \:\s+\-+\s+x +y +z\s+\-+\s+' - rf'((?:\w +{re_f} +{re_f} +{re_f}\s+)+)', + "x_gulp_static_dielectric_constant_tensor", + r"Static dielectric constant tensor \:\s+\-+\s+x +y +z\s+\-+\s+" + rf"((?:\w +{re_f} +{re_f} +{re_f}\s+)+)", str_operation=lambda x: np.array( [v.strip().split()[1:4] for v in x.strip().splitlines()], - np.dtype(np.float64)), + np.dtype(np.float64), + ), ), Quantity( - 'x_gulp_high_frequency_dielectric_constant_tensor', - r'High frequency dielectric constant tensor \:\s+\-+\s+x +y +z\s+\-+\s+' - rf'((?:\w +{re_f} +{re_f} +{re_f}\s+)+)', + "x_gulp_high_frequency_dielectric_constant_tensor", + r"High frequency dielectric constant tensor \:\s+\-+\s+x +y +z\s+\-+\s+" + rf"((?:\w +{re_f} +{re_f} +{re_f}\s+)+)", str_operation=lambda x: np.array( - [v.strip().split()[1:4] for v in x.strip().splitlines()], np.dtype(np.float64)) + [v.strip().split()[1:4] for v in x.strip().splitlines()], + np.dtype(np.float64), + ), ), Quantity( - 'x_gulp_static_refractive_indices', - r'Static refractive indices \:\s+\-+\s+([\s\S]+?)\-{50}', - sub_parser=TextParser(quantities=[Quantity( - 'value', rf'\d+ += +({re_f})', repeats=True, dtype=np.float64)]) + "x_gulp_static_refractive_indices", + r"Static refractive indices \:\s+\-+\s+([\s\S]+?)\-{50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "value", + rf"\d+ += +({re_f})", + repeats=True, + dtype=np.float64, + ) + ] + ), ), Quantity( - 'x_gulp_high_frequency_refractive_indices', - r'High frequency refractive indices \:\s+\-+\s+([\s\S]+?)\-{50}', - sub_parser=TextParser(quantities=[Quantity( - 'value', rf'\d+ += +({re_f})', repeats=True, dtype=np.float64)]) - ) + "x_gulp_high_frequency_refractive_indices", + r"High frequency refractive indices \:\s+\-+\s+([\s\S]+?)\-{50}", + sub_parser=TextParser( + quantities=[ + Quantity( + "value", + rf"\d+ += +({re_f})", + repeats=True, + dtype=np.float64, + ) + ] + ), + ), ] interaction_quantities = [ - Quantity('atom_type', r'([A-Z]\S* +(?:core|shell|c|s))\s', repeats=True), + Quantity("atom_type", r"([A-Z]\S* +(?:core|shell|c|s))\s", repeats=True), Quantity( - 'functional_form', r'([A-Z].{12})', - dtype=str, flatten=False, str_operation=lambda x: x.strip() - ) + "functional_form", + r"([A-Z].{12})", + dtype=str, + flatten=False, + str_operation=lambda x: x.strip(), + ), ] self._quantities = [ Quantity( - 'header', - rf'(Version[\s\S]+?){re_n} *{re_n}', - sub_parser=TextParser(quantities=[ - Quantity('program_version', r'Version = (\S+)', dtype=str), - Quantity('task', r'\* +(\w+) +\- .+', repeats=True, dtype=str), - Quantity( - 'title', r'\*\*\*\s+\* +(.+?) +\*\s+\*\*\*', - dtype=str, flatten=False - ) - - ]) + "header", + rf"(Version[\s\S]+?){re_n} *{re_n}", + sub_parser=TextParser( + quantities=[ + Quantity("program_version", r"Version = (\S+)", dtype=str), + Quantity("task", r"\* +(\w+) +\- .+", repeats=True, dtype=str), + Quantity( + "title", + r"\*\*\*\s+\* +(.+?) +\*\s+\*\*\*", + dtype=str, + flatten=False, + ), + ] + ), ), Quantity( - 'date_start', - r'Job Started +at (\d+\:\d+\.\d+) (\d+)\w+ (\w+) +(\d+)', - dtype=str, flatten=False + "date_start", + r"Job Started +at (\d+\:\d+\.\d+) (\d+)\w+ (\w+) +(\d+)", + dtype=str, + flatten=False, ), Quantity( - 'date_end', - r'Job Started +at (\d+\:\d+\.\d+) (\d+)\w+ (\w+) +(\d+)', - dtype=str, flatten=False + "date_end", + r"Job Started +at (\d+\:\d+\.\d+) (\d+)\w+ (\w+) +(\d+)", + dtype=str, + flatten=False, ), - Quantity('x_gulp_n_cpu', r'Number of CPUs += +(\d+)', dtype=np.int32), - Quantity('x_gulp_host_name', r'Host name += +(\S+)', dtype=str, flatten=False), + Quantity("x_gulp_n_cpu", r"Number of CPUs += +(\d+)", dtype=np.int32), Quantity( - 'x_gulp_total_n_configurations_input', - r'Total number of configurations input += +(\d+)', dtype=np.int32 + "x_gulp_host_name", r"Host name += +(\S+)", dtype=str, flatten=False ), Quantity( - 'input_configuration', - r'(Input for Configuration.+\s*\*+[\s\S]+?)\*{80}', - sub_parser=TextParser(quantities=[ - Quantity('x_gulp_formula', r'Formula = (\S+)', dtype=str), - Quantity('x_gulp_pbc', r'Dimensionality = (\d+)', dtype=np.int32), - Quantity('x_gulp_space_group', rf'Space group \S+ +\: +(.+?) +{re_n}', dtype=str, flatten=False), - Quantity('x_gulp_patterson_group', rf'Patterson group +\: +(.+?) +{re_n}', dtype=str, flatten=False), - Quantity( - 'lattice_vectors', - rf'Cartesian lattice vectors \(Angstroms\) \:\s+' - rf'((?:{re_f} +{re_f} +{re_f}\s+)+)', - dtype=np.dtype(np.float64), shape=[3, 3], - ), - Quantity( - 'cell_parameters', - rf'Primitive cell parameters.+\s+a.+?a += +({re_f}) +alpha += +({re_f})\s+' - rf'b.+?b += +({re_f}) +beta += +({re_f})\s+' - rf'c.+?c += +({re_f}) +gamma += +({re_f})\s+', - str_operation=to_cell_parameters - ), - Quantity( - 'cell_parameters', - rf'Cell parameters.+\s+a += +({re_f}) +alpha += +({re_f})\s+' - rf'b += +({re_f}) +beta += +({re_f})\s+' - rf'c += +({re_f}) +gamma += +({re_f})\s+', - str_operation=to_cell_parameters - ), - Quantity( - 'coordinates', - r'No\. +Atomic +(x +y +z +.+\s+Label .+)\s*\-+\s*([\s\S]+?)\-{50}', - sub_parser=TextParser(quantities=coordinates_quantities) - ), - ]) + "x_gulp_total_n_configurations_input", + r"Total number of configurations input += +(\d+)", + dtype=np.int32, ), Quantity( - 'input_information', - r'(General input information\s+\*\s*\*+[\s\S]+?)\*{80}', - sub_parser=TextParser(quantities=[ - Quantity( - 'species', - rf'Species +Type.+\s+Number.+\s*\-+\s+' - rf'((?:[A-Z]\w* +\w+ +\d+.+\s+)+)', - str_operation=to_species, convert=False - ), - Quantity( - 'pgfnff', - r'(pGFNFF forcefield to be used[\s\S]+pGFNFF.+)', - sub_parser=TextParser(quantities=[ - Quantity( - 'key_parameter', - rf'pGFNFF (.+? += +{re_f})', - repeats=True, str_operation=lambda x: [v.strip() for v in x.split('=')] - ) - ]) - ), - # old format - Quantity( - 'pair_potential', - r'(Atom +Types +Potential +A +B +C +D +.*\s+)' - r'(.+\s*\-+\s*[\s\S]+?)\-{80}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'interaction', - r'([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)', - repeats=True, sub_parser=TextParser(quantities=interaction_quantities + [ + "input_configuration", + r"(Input for Configuration.+\s*\*+[\s\S]+?)\*{80}", + sub_parser=TextParser( + quantities=[ + Quantity("x_gulp_formula", r"Formula = (\S+)", dtype=str), + Quantity( + "x_gulp_pbc", r"Dimensionality = (\d+)", dtype=np.int32 + ), + Quantity( + "x_gulp_space_group", + rf"Space group \S+ +\: +(.+?) +{re_n}", + dtype=str, + flatten=False, + ), + Quantity( + "x_gulp_patterson_group", + rf"Patterson group +\: +(.+?) +{re_n}", + dtype=str, + flatten=False, + ), + Quantity( + "lattice_vectors", + rf"Cartesian lattice vectors \(Angstroms\) \:\s+" + rf"((?:{re_f} +{re_f} +{re_f}\s+)+)", + dtype=np.dtype(np.float64), + shape=[3, 3], + ), + Quantity( + "cell_parameters", + rf"Primitive cell parameters.+\s+a.+?a += +({re_f}) +alpha += +({re_f})\s+" + rf"b.+?b += +({re_f}) +beta += +({re_f})\s+" + rf"c.+?c += +({re_f}) +gamma += +({re_f})\s+", + str_operation=to_cell_parameters, + ), + Quantity( + "cell_parameters", + rf"Cell parameters.+\s+a += +({re_f}) +alpha += +({re_f})\s+" + rf"b += +({re_f}) +beta += +({re_f})\s+" + rf"c += +({re_f}) +gamma += +({re_f})\s+", + str_operation=to_cell_parameters, + ), + Quantity( + "coordinates", + r"No\. +Atomic +(x +y +z +.+\s+Label .+)\s*\-+\s*([\s\S]+?)\-{50}", + sub_parser=TextParser(quantities=coordinates_quantities), + ), + ] + ), + ), + Quantity( + "input_information", + r"(General input information\s+\*\s*\*+[\s\S]+?)\*{80}", + sub_parser=TextParser( + quantities=[ + Quantity( + "species", + rf"Species +Type.+\s+Number.+\s*\-+\s+" + rf"((?:[A-Z]\w* +\w+ +\d+.+\s+)+)", + str_operation=to_species, + convert=False, + ), + Quantity( + "pgfnff", + r"(pGFNFF forcefield to be used[\s\S]+pGFNFF.+)", + sub_parser=TextParser( + quantities=[ Quantity( - 'key_parameter', - rf'({re_f}) +({re_f}) +({re_f}) +({re_f}) +({re_f}) +({re_f})', - str_operation=lambda x: list(zip( - ['A', 'B', 'C', 'D', 'cutoff_min', 'cutoff_max'], - np.array(x.strip().split(), np.dtype(np.float64)))) + "key_parameter", + rf"pGFNFF (.+? += +{re_f})", + repeats=True, + str_operation=lambda x: [ + v.strip() for v in x.split("=") + ], ) - ]) - ) - ]) - ), - # TODO verify this no example - Quantity( - 'three_body_potential', - r'(Atom +Atom +Atom +Force Constants +Theta.*\s+)' - r'(.+\s*\-+\s*[\s\S]+?)\-{80}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'interaction', - r'([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)', - repeats=True, sub_parser=TextParser(quantities=interaction_quantities + [ + ] + ), + ), + # old format + Quantity( + "pair_potential", + r"(Atom +Types +Potential +A +B +C +D +.*\s+)" + r"(.+\s*\-+\s*[\s\S]+?)\-{80}", + repeats=True, + sub_parser=TextParser( + quantities=[ Quantity( - 'key_parameter', - rf'({re_f}) +({re_f}) +({re_f}) +({re_f})', - str_operation=lambda x: list(zip( - ['force_constant_1', 'force_constant_2', 'force_constant_3', 'Theta'], - np.array(x.strip().split(), np.dtype(np.float64)))) + "interaction", + r"([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)", + repeats=True, + sub_parser=TextParser( + quantities=interaction_quantities + + [ + Quantity( + "key_parameter", + rf"({re_f}) +({re_f}) +({re_f}) +({re_f}) +({re_f}) +({re_f})", + str_operation=lambda x: list( + zip( + [ + "A", + "B", + "C", + "D", + "cutoff_min", + "cutoff_max", + ], + np.array( + x.strip().split(), + np.dtype(np.float64), + ), + ) + ), + ) + ] + ), ) - ]) - ) - ]) - ), - # TODO verify this no example - Quantity( - 'four_body_potential', - r'(Atom Types +Force cst\s*Sign\s*Phase\s*Phi0.*\s+)' - r'(.+\s*\-+\s*[\s\S]+?)\-{80}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'interaction', - r'([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)', - repeats=True, sub_parser=TextParser(quantities=interaction_quantities + [ + ] + ), + ), + # TODO verify this no example + Quantity( + "three_body_potential", + r"(Atom +Atom +Atom +Force Constants +Theta.*\s+)" + r"(.+\s*\-+\s*[\s\S]+?)\-{80}", + repeats=True, + sub_parser=TextParser( + quantities=[ Quantity( - 'key_parameter', - rf'({re_f}) +({re_f}) +({re_f}) +({re_f})', - str_operation=lambda x: list(zip( - ['force_constant', 'sign', 'phase', 'phi0'], - np.array(x.strip().split(), np.dtype(np.float64)))) + "interaction", + r"([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)", + repeats=True, + sub_parser=TextParser( + quantities=interaction_quantities + + [ + Quantity( + "key_parameter", + rf"({re_f}) +({re_f}) +({re_f}) +({re_f})", + str_operation=lambda x: list( + zip( + [ + "force_constant_1", + "force_constant_2", + "force_constant_3", + "Theta", + ], + np.array( + x.strip().split(), + np.dtype(np.float64), + ), + ) + ), + ) + ] + ), ) - ]) - ) - ]) - ), - # new format - Quantity( - 'interatomic_potential', - rf'.+? potentials +\:\s+\-+\s+' - rf'Atom.+?Potential +Parameter([\s\S]+?){re_n} *{re_n}', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'interaction', - # r'([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +[\s\S]+?\-{80})', - r'([A-Z]\S* +(?:core|shell|c|s) +[\s\S]+?\-{80})', - repeats=True, sub_parser=TextParser(quantities=interaction_quantities + [ + ] + ), + ), + # TODO verify this no example + Quantity( + "four_body_potential", + r"(Atom Types +Force cst\s*Sign\s*Phase\s*Phi0.*\s+)" + r"(.+\s*\-+\s*[\s\S]+?)\-{80}", + repeats=True, + sub_parser=TextParser( + quantities=[ Quantity( - 'key_parameter', r'([A-Z].{14}) {1,4}(\-*\d+\S*)', - repeats=True, str_operation=lambda x: [v.strip() for v in x.rsplit(' ', 1)] + "interaction", + r"([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +.+)", + repeats=True, + sub_parser=TextParser( + quantities=interaction_quantities + + [ + Quantity( + "key_parameter", + rf"({re_f}) +({re_f}) +({re_f}) +({re_f})", + str_operation=lambda x: list( + zip( + [ + "force_constant", + "sign", + "phase", + "phi0", + ], + np.array( + x.strip().split(), + np.dtype(np.float64), + ), + ) + ), + ) + ] + ), ) - ]) - ) - ]) - ) - ]) + ] + ), + ), + # new format + Quantity( + "interatomic_potential", + rf".+? potentials +\:\s+\-+\s+" + rf"Atom.+?Potential +Parameter([\s\S]+?){re_n} *{re_n}", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "interaction", + # r'([A-Z]\S* +(?:core|shell|c|s) +[A-Z]\S* +(?:core|shell|c|s) +[\s\S]+?\-{80})', + r"([A-Z]\S* +(?:core|shell|c|s) +[\s\S]+?\-{80})", + repeats=True, + sub_parser=TextParser( + quantities=interaction_quantities + + [ + Quantity( + "key_parameter", + r"([A-Z].{14}) {1,4}(\-*\d+\S*)", + repeats=True, + str_operation=lambda x: [ + v.strip() + for v in x.rsplit(" ", 1) + ], + ) + ] + ), + ) + ] + ), + ), + ] + ), ), Quantity( - 'single_point', - r'Output for configuration.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)', - repeats=True, sub_parser=TextParser(quantities=[Quantity( - 'calculation', - r'(Components of.+energy \:[\s\S]+?(?:Time to end of|Optimisation achieved|\Z).*)', - repeats=True, sub_parser=TextParser(quantities=calc_quantities) - )]) + "single_point", + r"Output for configuration.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "calculation", + r"(Components of.+energy \:[\s\S]+?(?:Time to end of|Optimisation achieved|\Z).*)", + repeats=True, + sub_parser=TextParser(quantities=calc_quantities), + ) + ] + ), ), Quantity( - 'molecular_dynamics', - r'Molecular Dynamics.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('ensemble_type', r'ensemble \((\S+)\) to be used', dtype=str), - Quantity( - 'x_gulp_friction_temperature_bath', - rf'Friction for temperature bath += +({re_f})', dtype=np.float64 - ), - Quantity( - 'x_gulp_n_mobile_ions', - r'No\. of mobile ions += +(\d+)', dtype=np.int32 - ), - Quantity( - 'x_gulp_n_degrees_of_freedom', - r'No\. of degrees of freedom += +(\d+)', dtype=np.int32 - ), - Quantity( - 'timestep', - rf'Time step += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_equilibration_time', - rf'Equilibration time += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_production_time', - rf'Production time += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_scaling_time', - rf'Scaling time += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_scaling_frequency', - rf'Scaling frequency += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_sampling_frequency', - rf'Sampling frequency += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_write_frequency', - rf'Write frequency += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_td_force_start_time', - rf'TD\-Force start time += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'x_gulp_td_field_start_time', - rf'TD\-Field start time += +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'step', - rf'(Time \: +[\s\S]+?)(?:\*\*|{re_n} *{re_n})', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'time', - rf'Time \: +({re_f})', dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'energy_kinetic', - rf'Kinetic energy +\(eV\) += +({re_f}) +({re_f})', - dtype=np.dtype(np.float64), unit=ureg.eV - ), - Quantity( - 'energy_potential', - rf'Potential energy +\(eV\) += +({re_f}) +({re_f})', - dtype=np.dtype(np.float64), unit=ureg.eV - ), - Quantity( - 'energy_total', - rf'Total energy +\(eV\) += +({re_f}) +({re_f})', - dtype=np.dtype(np.float64), unit=ureg.eV - ), - Quantity( - 'temperature', - rf'Temperature +\(K\) += +({re_f}) +({re_f})', - dtype=np.dtype(np.float64), unit=ureg.kelvin + "molecular_dynamics", + r"Molecular Dynamics.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "ensemble_type", r"ensemble \((\S+)\) to be used", dtype=str + ), + Quantity( + "x_gulp_friction_temperature_bath", + rf"Friction for temperature bath += +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_gulp_n_mobile_ions", + r"No\. of mobile ions += +(\d+)", + dtype=np.int32, + ), + Quantity( + "x_gulp_n_degrees_of_freedom", + r"No\. of degrees of freedom += +(\d+)", + dtype=np.int32, + ), + Quantity( + "timestep", + rf"Time step += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_equilibration_time", + rf"Equilibration time += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_production_time", + rf"Production time += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_scaling_time", + rf"Scaling time += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_scaling_frequency", + rf"Scaling frequency += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_sampling_frequency", + rf"Sampling frequency += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_write_frequency", + rf"Write frequency += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_td_force_start_time", + rf"TD\-Force start time += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "x_gulp_td_field_start_time", + rf"TD\-Field start time += +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "step", + rf"(Time \: +[\s\S]+?)(?:\*\*|{re_n} *{re_n})", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "time", + rf"Time \: +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "energy_kinetic", + rf"Kinetic energy +\(eV\) += +({re_f}) +({re_f})", + dtype=np.dtype(np.float64), + unit=ureg.eV, + ), + Quantity( + "energy_potential", + rf"Potential energy +\(eV\) += +({re_f}) +({re_f})", + dtype=np.dtype(np.float64), + unit=ureg.eV, + ), + Quantity( + "energy_total", + rf"Total energy +\(eV\) += +({re_f}) +({re_f})", + dtype=np.dtype(np.float64), + unit=ureg.eV, + ), + Quantity( + "temperature", + rf"Temperature +\(K\) += +({re_f}) +({re_f})", + dtype=np.dtype(np.float64), + unit=ureg.kelvin, + ), + Quantity( + "pressure", + rf"Pressure +\(GPa\) += +({re_f}) +({re_f})", + dtype=np.dtype(np.float64), + unit=ureg.GPa, + ), + ] ), - Quantity( - 'pressure', - rf'Pressure +\(GPa\) += +({re_f}) +({re_f})', - dtype=np.dtype(np.float64), unit=ureg.GPa - ) - ]) - ) - ]) + ), + ] + ), ), Quantity( - 'defect', - r'Defect calculation for configuration.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)', - repeats=True, sub_parser=TextParser(quantities=[Quantity( - 'calculation', - r'(Components of.+energy \:[\s\S]+?(?:Time to end of|Optimisation achieved).+)', - repeats=True, sub_parser=TextParser(quantities=calc_quantities) - )]) - ) + "defect", + r"Defect calculation for configuration.+\s+\*+([\s\S]+?)(?:\*{80}|\Z)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "calculation", + r"(Components of.+energy \:[\s\S]+?(?:Time to end of|Optimisation achieved).+)", + repeats=True, + sub_parser=TextParser(quantities=calc_quantities), + ) + ] + ), + ), ] -class GulpParser: +class GulpParser(Parser): def __init__(self): self.mainfile_parser = MainfileParser() self._metainfo_map = { - 'Attachment energy': 'x_gulp_attachment', - 'Attachment energy/unit': 'x_gulp_attachment_unit', - 'Bond-order potentials': 'x_gulp_bond_order_potentials', - 'Brenner potentials': 'x_gulp_brenner_potentials', - 'Bulk energy': 'x_gulp_bulk', - 'Dispersion (real+recip)': 'x_gulp_dispersion', - 'Electric_field*distance': 'x_gulp_electric_field_distance', - 'Energy shift': 'x_gulp_shift', - 'Four-body potentials': 'x_gulp_four_body_potentials', - 'Improper torsions': 'x_gulp_improper_torsions', - 'Interatomic potentials': 'x_gulp_interatomic_potentials', - 'Many-body potentials': 'x_gulp_many_body_potentials', - 'Monopole - monopole (real)': 'x_gulp_monopole_monopole_real', - 'Monopole - monopole (recip)': 'x_gulp_monopole_monopole_recip', - 'Monopole - monopole (total)': 'x_gulp_monopole_monopole_total', - 'Neutralising energy': 'x_gulp_neutralising', - 'Non-primitive unit cell': 'x_gulp_non_primitive_unit_cell', - 'Out of plane potentials': 'x_gulp_out_of_plane_potentials', - 'Primitive unit cell': 'x_gulp_primitive_unit_cell', - 'ReaxFF force field': 'x_gulp_reaxff_force_field', - 'Region 1-2 interaction': 'x_gulp_region_1_2_interaction', - 'Region 2-2 interaction': 'x_gulp_region_2_2_interaction', - 'Self energy (EEM/QEq/SM)': 'x_gulp_self_eem_qeq_sm', - 'SM Coulomb correction': 'x_gulp_sm_coulomb_correction', - 'Solvation energy': 'x_gulp_solvation', - 'Three-body potentials': 'x_gulp_three_body_potentials', - 'Total lattice energy': 'total', 'Total defect energy': 'total' + "Attachment energy": "x_gulp_attachment", + "Attachment energy/unit": "x_gulp_attachment_unit", + "Bond-order potentials": "x_gulp_bond_order_potentials", + "Brenner potentials": "x_gulp_brenner_potentials", + "Bulk energy": "x_gulp_bulk", + "Dispersion (real+recip)": "x_gulp_dispersion", + "Electric_field*distance": "x_gulp_electric_field_distance", + "Energy shift": "x_gulp_shift", + "Four-body potentials": "x_gulp_four_body_potentials", + "Improper torsions": "x_gulp_improper_torsions", + "Interatomic potentials": "x_gulp_interatomic_potentials", + "Many-body potentials": "x_gulp_many_body_potentials", + "Monopole - monopole (real)": "x_gulp_monopole_monopole_real", + "Monopole - monopole (recip)": "x_gulp_monopole_monopole_recip", + "Monopole - monopole (total)": "x_gulp_monopole_monopole_total", + "Neutralising energy": "x_gulp_neutralising", + "Non-primitive unit cell": "x_gulp_non_primitive_unit_cell", + "Out of plane potentials": "x_gulp_out_of_plane_potentials", + "Primitive unit cell": "x_gulp_primitive_unit_cell", + "ReaxFF force field": "x_gulp_reaxff_force_field", + "Region 1-2 interaction": "x_gulp_region_1_2_interaction", + "Region 2-2 interaction": "x_gulp_region_2_2_interaction", + "Self energy (EEM/QEq/SM)": "x_gulp_self_eem_qeq_sm", + "SM Coulomb correction": "x_gulp_sm_coulomb_correction", + "Solvation energy": "x_gulp_solvation", + "Three-body potentials": "x_gulp_three_body_potentials", + "Total lattice energy": "total", + "Total defect energy": "total", } self._sg_map = { - 'P 1': 1, 'P -1': 2, 'P 2': 3, 'P 21': 4, 'C 2': 5, 'P M': 6, 'P C': 7, - 'C M': 8, 'C C': 9, 'P 2/M': 10, 'P 21/M': 11, 'C 2/M': 12, 'P 2/C': 13, - 'P 21/C': 14, 'C 2/C': 15, 'P 2 2 2': 16, 'P 2 2 21': 17, 'P 21 21 2': 18, - 'P 21 21 21': 19, 'C 2 2 21': 20, 'C 2 2 2': 21, 'F 2 2 2': 22, 'I 2 2 2': 23, - 'I 21 21 21': 24, 'P M M 2': 25, 'P M C 21': 26, 'P C C 2': 27, 'P M A 2': 28, - 'P C A 21': 29, 'P N C 2': 30, 'P M N 21': 31, 'P B A 2': 32, 'P N A 21': 33, - 'P N N 2': 34, 'C M M 2': 35, 'C M C 21': 36, 'C C C 2': 37, 'A M M 2': 38, - 'A B M 2': 39, 'A M A 2': 40, 'A B A 2': 41, 'F M M 2': 42, 'F D D 2': 43, - 'I M M 2': 44, 'I B A 2': 45, 'I M A 2': 46, 'P M M M': 47, 'P N N N': 48, - 'P C C M': 49, 'P B A N': 50, 'P M M A': 51, 'P N N A': 52, 'P M N A': 53, - 'P C C A': 54, 'P B A M': 55, 'P C C N': 56, 'P B C M': 57, 'P N N M': 58, - 'P M M N': 59, 'P B C N': 60, 'P B C A': 61, 'P N M A': 62, 'C M C M': 63, - 'C M C A': 64, 'C M M M': 65, 'C C C M': 66, 'C M M A': 67, 'C C C A': 68, - 'F M M M': 69, 'F D D D': 70, 'I M M M': 71, 'I B A M': 72, 'I B C A': 73, - 'I M M A': 74, 'P 4': 75, 'P 41': 76, 'P 42': 77, 'P 43': 78, 'I 4': 79, - 'I 41': 80, 'P -4': 81, 'I -4': 82, 'P 4/M': 83, 'P 42/M': 84, 'P 4/N': 85, - 'P 42/N': 86, 'I 4/M': 87, 'I 41/A': 88, 'P 4 2 2': 89, 'P 4 21 2': 90, - 'P 41 2 2': 91, 'P 41 21 2': 92, 'P 42 2 2': 93, 'P 42 21 2': 94, - 'P 43 2 2': 95, 'P 43 21 2': 96, 'I 4 2 2': 97, 'I 41 2 2': 98, 'P 4 M M': 99, - 'P 4 B M': 100, 'P 42 C M': 101, 'P 42 N M': 102, 'P 4 C C': 103, - 'P 4 N C': 104, 'P 42 M C': 105, 'P 42 B C': 106, 'I 4 M M': 107, - 'I 4 C M': 108, 'I 41 M D': 109, 'I 41 C D': 110, 'P -4 2 M': 111, - 'P -4 2 C': 112, 'P -4 21 M': 113, 'P -4 21 C': 114, 'P -4 M 2': 115, - 'P -4 C 2': 116, 'P -4 B 2': 117, 'P -4 N 2': 118, 'I -4 M 2': 119, - 'I -4 C 2': 120, 'I -4 2 M': 121, 'I -4 2 D': 122, 'P 4/M M M': 123, - 'P 4/M C C': 124, 'P 4/N B M': 125, 'P 4/N N C': 126, 'P 4/M B M': 127, - 'P 4/M N C': 128, 'P 4/N M M': 129, 'P 4/N C C': 130, 'P 42/M M C': 131, - 'P 42/M C M': 132, 'P 42/N B C': 133, 'P 42/N N M': 134, 'P 42/M B C': 135, - 'P 42/M N M': 136, 'P 42/N M C': 137, 'P 42/N C M': 138, 'I 4/M M M': 139, - 'I 4/M C M': 140, 'I 41/A M D': 141, 'I 41/A C D': 142, 'P 3': 143, - 'P 31': 144, 'P 32': 145, 'R 3': 146, 'P -3': 147, 'R -3': 148, - 'P 3 1 2': 149, 'P 3 2 1': 150, 'P 31 1 2': 151, 'P 31 2 1': 152, - 'P 32 1 2': 153, 'P 32 2 1': 154, 'R 3 2': 155, 'P 3 M 1': 156, - 'P 3 1 M': 157, 'P 3 C 1': 158, 'P 3 1 C': 159, 'R 3 M': 160, 'R 3 C': 161, - 'P -3 1 M': 162, 'P -3 1 C': 163, 'P -3 M 1': 164, 'P -3 C 1': 165, - 'R -3 M': 166, 'R -3 C': 167, 'P 6': 168, 'P 61': 169, 'P 65': 170, - 'P 62': 171, 'P 64': 172, 'P 63': 173, 'P -6': 174, 'P 6/M': 175, - 'P 63/M': 176, 'P 6 2 2': 177, 'P 61 2 2': 178, 'P 65 2 2': 179, - 'P 62 2 2': 180, 'P 64 2 2': 181, 'P 63 2 2': 182, 'P 6 M M': 183, - 'P 6 C C': 184, 'P 63 C M': 185, 'P 63 M C': 186, 'P -6 M 2': 187, - 'P -6 C 2': 188, 'P -6 2 M': 189, 'P -6 2 C': 190, 'P 6/M M M': 191, - 'P 6/M C C': 192, 'P 63/M C M': 193, 'P 63/M M C': 194, 'P 2 3': 195, - 'F 2 3': 196, 'I 2 3': 197, 'P 21 3': 198, 'I 21 3': 199, 'P M 3': 200, - 'P M -3': 200, 'P N 3': 201, 'P N -3': 201, 'F M 3': 202, 'F M -3': 202, - 'F D 3': 203, 'F D -3': 203, 'I M 3': 204, 'I M -3': 204, 'P A 3': 205, - 'P A -3': 205, 'I A 3': 206, 'I A -3': 206, 'P 4 3 2': 207, 'P 42 3 2': 208, - 'F 4 3 2': 209, 'F 41 3 2': 210, 'I 4 3 2': 211, 'P 43 3 2': 212, - 'P 41 3 2': 213, 'I 41 3 2': 214, 'P -4 3 M': 215, 'F -4 3 M': 216, - 'I -4 3 M': 217, 'P -4 3 N': 218, 'F -4 3 C': 219, 'I -4 3 D': 220, - 'P M 3 M': 221, 'P N 3 N': 222, 'P M 3 N': 223, 'P N 3 M': 224, - 'F M 3 M': 225, 'F M 3 C': 226, 'F D 3 M': 227, 'F D 3 C': 228, - 'I M 3 M': 229, 'I A 3 D': 230, 'C 1': 231, 'C -1': 232 + "P 1": 1, + "P -1": 2, + "P 2": 3, + "P 21": 4, + "C 2": 5, + "P M": 6, + "P C": 7, + "C M": 8, + "C C": 9, + "P 2/M": 10, + "P 21/M": 11, + "C 2/M": 12, + "P 2/C": 13, + "P 21/C": 14, + "C 2/C": 15, + "P 2 2 2": 16, + "P 2 2 21": 17, + "P 21 21 2": 18, + "P 21 21 21": 19, + "C 2 2 21": 20, + "C 2 2 2": 21, + "F 2 2 2": 22, + "I 2 2 2": 23, + "I 21 21 21": 24, + "P M M 2": 25, + "P M C 21": 26, + "P C C 2": 27, + "P M A 2": 28, + "P C A 21": 29, + "P N C 2": 30, + "P M N 21": 31, + "P B A 2": 32, + "P N A 21": 33, + "P N N 2": 34, + "C M M 2": 35, + "C M C 21": 36, + "C C C 2": 37, + "A M M 2": 38, + "A B M 2": 39, + "A M A 2": 40, + "A B A 2": 41, + "F M M 2": 42, + "F D D 2": 43, + "I M M 2": 44, + "I B A 2": 45, + "I M A 2": 46, + "P M M M": 47, + "P N N N": 48, + "P C C M": 49, + "P B A N": 50, + "P M M A": 51, + "P N N A": 52, + "P M N A": 53, + "P C C A": 54, + "P B A M": 55, + "P C C N": 56, + "P B C M": 57, + "P N N M": 58, + "P M M N": 59, + "P B C N": 60, + "P B C A": 61, + "P N M A": 62, + "C M C M": 63, + "C M C A": 64, + "C M M M": 65, + "C C C M": 66, + "C M M A": 67, + "C C C A": 68, + "F M M M": 69, + "F D D D": 70, + "I M M M": 71, + "I B A M": 72, + "I B C A": 73, + "I M M A": 74, + "P 4": 75, + "P 41": 76, + "P 42": 77, + "P 43": 78, + "I 4": 79, + "I 41": 80, + "P -4": 81, + "I -4": 82, + "P 4/M": 83, + "P 42/M": 84, + "P 4/N": 85, + "P 42/N": 86, + "I 4/M": 87, + "I 41/A": 88, + "P 4 2 2": 89, + "P 4 21 2": 90, + "P 41 2 2": 91, + "P 41 21 2": 92, + "P 42 2 2": 93, + "P 42 21 2": 94, + "P 43 2 2": 95, + "P 43 21 2": 96, + "I 4 2 2": 97, + "I 41 2 2": 98, + "P 4 M M": 99, + "P 4 B M": 100, + "P 42 C M": 101, + "P 42 N M": 102, + "P 4 C C": 103, + "P 4 N C": 104, + "P 42 M C": 105, + "P 42 B C": 106, + "I 4 M M": 107, + "I 4 C M": 108, + "I 41 M D": 109, + "I 41 C D": 110, + "P -4 2 M": 111, + "P -4 2 C": 112, + "P -4 21 M": 113, + "P -4 21 C": 114, + "P -4 M 2": 115, + "P -4 C 2": 116, + "P -4 B 2": 117, + "P -4 N 2": 118, + "I -4 M 2": 119, + "I -4 C 2": 120, + "I -4 2 M": 121, + "I -4 2 D": 122, + "P 4/M M M": 123, + "P 4/M C C": 124, + "P 4/N B M": 125, + "P 4/N N C": 126, + "P 4/M B M": 127, + "P 4/M N C": 128, + "P 4/N M M": 129, + "P 4/N C C": 130, + "P 42/M M C": 131, + "P 42/M C M": 132, + "P 42/N B C": 133, + "P 42/N N M": 134, + "P 42/M B C": 135, + "P 42/M N M": 136, + "P 42/N M C": 137, + "P 42/N C M": 138, + "I 4/M M M": 139, + "I 4/M C M": 140, + "I 41/A M D": 141, + "I 41/A C D": 142, + "P 3": 143, + "P 31": 144, + "P 32": 145, + "R 3": 146, + "P -3": 147, + "R -3": 148, + "P 3 1 2": 149, + "P 3 2 1": 150, + "P 31 1 2": 151, + "P 31 2 1": 152, + "P 32 1 2": 153, + "P 32 2 1": 154, + "R 3 2": 155, + "P 3 M 1": 156, + "P 3 1 M": 157, + "P 3 C 1": 158, + "P 3 1 C": 159, + "R 3 M": 160, + "R 3 C": 161, + "P -3 1 M": 162, + "P -3 1 C": 163, + "P -3 M 1": 164, + "P -3 C 1": 165, + "R -3 M": 166, + "R -3 C": 167, + "P 6": 168, + "P 61": 169, + "P 65": 170, + "P 62": 171, + "P 64": 172, + "P 63": 173, + "P -6": 174, + "P 6/M": 175, + "P 63/M": 176, + "P 6 2 2": 177, + "P 61 2 2": 178, + "P 65 2 2": 179, + "P 62 2 2": 180, + "P 64 2 2": 181, + "P 63 2 2": 182, + "P 6 M M": 183, + "P 6 C C": 184, + "P 63 C M": 185, + "P 63 M C": 186, + "P -6 M 2": 187, + "P -6 C 2": 188, + "P -6 2 M": 189, + "P -6 2 C": 190, + "P 6/M M M": 191, + "P 6/M C C": 192, + "P 63/M C M": 193, + "P 63/M M C": 194, + "P 2 3": 195, + "F 2 3": 196, + "I 2 3": 197, + "P 21 3": 198, + "I 21 3": 199, + "P M 3": 200, + "P M -3": 200, + "P N 3": 201, + "P N -3": 201, + "F M 3": 202, + "F M -3": 202, + "F D 3": 203, + "F D -3": 203, + "I M 3": 204, + "I M -3": 204, + "P A 3": 205, + "P A -3": 205, + "I A 3": 206, + "I A -3": 206, + "P 4 3 2": 207, + "P 42 3 2": 208, + "F 4 3 2": 209, + "F 41 3 2": 210, + "I 4 3 2": 211, + "P 43 3 2": 212, + "P 41 3 2": 213, + "I 41 3 2": 214, + "P -4 3 M": 215, + "F -4 3 M": 216, + "I -4 3 M": 217, + "P -4 3 N": 218, + "F -4 3 C": 219, + "I -4 3 D": 220, + "P M 3 M": 221, + "P N 3 N": 222, + "P M 3 N": 223, + "P N 3 M": 224, + "F M 3 M": 225, + "F M 3 C": 226, + "F D 3 M": 227, + "F D 3 C": 228, + "I M 3 M": 229, + "I A 3 D": 230, + "C 1": 231, + "C -1": 232, } - def init_parser(self): + def write_to_archive(self) -> None: self.mainfile_parser.logger = self.logger - self.mainfile_parser.mainfile = self.filepath - - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.maindir = os.path.dirname(self.filepath) - self.logger = logger if logger is not None else logging - - self.init_parser() + self.mainfile_parser.mainfile = self.mainfile + self.maindir = os.path.dirname(self.mainfile) sec_run = Run() self.archive.run.append(sec_run) # general run parameters - header = self.mainfile_parser.get('header', {}) - sec_run.program = Program(version=header.get('program_version')) - sec_run.x_gulp_title = header.get('title') + header = self.mainfile_parser.get("header", {}) + sec_run.program = Program(version=header.get("program_version")) + sec_run.x_gulp_title = header.get("title") for key, val in self.mainfile_parser.items(): - if key.startswith('x_gulp_'): + if key.startswith("x_gulp_"): setattr(sec_run, key, val) if self.mainfile_parser.date_start is not None: - sec_run.time_run = TimeRun(date_start=datetime.strptime( - self.mainfile_parser.date_start, '%H:%M.%S %d %B %Y').timestamp()) + sec_run.time_run = TimeRun( + date_start=datetime.strptime( + self.mainfile_parser.date_start, "%H:%M.%S %d %B %Y" + ).timestamp() + ) if self.mainfile_parser.date_end is not None: sec_run.time_run.date_end = datetime.strptime( - self.mainfile_parser.date_end, '%H:%M.%S %d %B %Y').timestamp() + self.mainfile_parser.date_end, "%H:%M.%S %d %B %Y" + ).timestamp() - input_info = self.mainfile_parser.get('input_information', {}) + input_info = self.mainfile_parser.get("input_information", {}) sec_method = Method() sec_run.method.append(sec_method) # add force field interaction models @@ -643,37 +1079,54 @@ def parse(self, filepath, archive, logger): sec_method.force_field = force_field # for the new format, all potentials are in potentials while in old, needs to add # pair, three- and four-body interactions - for potential_type in ['interatomic_potential', 'pair_potential', 'three_body_potential', 'four_body_potential']: + for potential_type in [ + "interatomic_potential", + "pair_potential", + "three_body_potential", + "four_body_potential", + ]: for potential in input_info.get(potential_type, []): sec_model = Model() force_field.model.append(sec_model) - for interaction in potential.get('interaction', []): - sec_model.contributions.append(Interaction( - functional_form=interaction.functional_form, - atom_labels=[v[0] for v in interaction.get('atom_type')], - parameters={key: float(val) if isinstance( - val, np.float64) else val for key, val in interaction.get('key_parameter', [])} - )) + for interaction in potential.get("interaction", []): + sec_model.contributions.append( + Interaction( + functional_form=interaction.functional_form, + atom_labels=[v[0] for v in interaction.get("atom_type")], + parameters={ + key: float(val) if isinstance(val, np.float64) else val + for key, val in interaction.get("key_parameter", []) + }, + ) + ) - if (pgfnff := input_info.get('pgfnff')) is not None: + if (pgfnff := input_info.get("pgfnff")) is not None: sec_model = Model() force_field.model.append(sec_model) - sec_model.name = 'pGFNFF' - sec_model.contributions.append(Interaction( - parameters={key: float(val) for key, val in pgfnff.get('key_parameter', [])} - )) + sec_model.name = "pGFNFF" + sec_model.contributions.append( + Interaction( + parameters={ + key: float(val) for key, val in pgfnff.get("key_parameter", []) + } + ) + ) # atom parameters - for n in range(len(input_info.get('species', {}).get('label', []))): + for n in range(len(input_info.get("species", {}).get("label", []))): sec_atom_parameter = AtomParameters() sec_method.atom_parameters.append(sec_atom_parameter) for key, val in input_info.species.items(): setattr(sec_atom_parameter, key, val[n]) - input_config = self.mainfile_parser.get('input_configuration', {}) + input_config = self.mainfile_parser.get("input_configuration", {}) def parse_system(source): # read from input configuration if section does not have structure information - source = self.mainfile_parser.input_configuration if source.coordinates is None else source + source = ( + self.mainfile_parser.input_configuration + if source.coordinates is None + else source + ) if source.coordinates is None: return @@ -681,41 +1134,56 @@ def parse_system(source): sec_run.system.append(sec_system) positions = [] labels = [] - for atom in source.coordinates.get('atom', []): + for atom in source.coordinates.get("atom", []): # include only core atoms - if atom[1].lower().startswith('c'): + if atom[1].lower().startswith("c"): positions.append(atom[2:5]) labels.append(atom[0]) - lattice_vectors = source.get('lattice_vectors', input_config.lattice_vectors) - unit = source.coordinates.get('unit', '') - if unit.lower().startswith('frac') and lattice_vectors is not None: + lattice_vectors = source.get( + "lattice_vectors", input_config.lattice_vectors + ) + unit = source.coordinates.get("unit", "") + if unit.lower().startswith("frac") and lattice_vectors is not None: positions = np.dot(positions, lattice_vectors) # get periodicity periodic = np.array([False] * 3) - periodic[0:input_config.get('x_gulp_pbc', 3)] = True + periodic[0 : input_config.get("x_gulp_pbc", 3)] = True # build the basis atoms atoms = ase_Atoms(symbols=labels, cell=lattice_vectors, positions=positions) # build the full crystal space_group = self._sg_map.get(input_config.x_gulp_space_group) # cellpar from source or primitive cellpar or from input_config - cellpar = source.get('cell_parameters', source.get('cell_parameters_primitive', input_config.cell_parameters)) + cellpar = source.get( + "cell_parameters", + source.get("cell_parameters_primitive", input_config.cell_parameters), + ) if space_group is not None and cellpar is not None: atoms = ase_crystal( - atoms, spacegroup=space_group, pbc=periodic, primitive_cell=True, - cellpar=cellpar, onduplicates='replace' + atoms, + spacegroup=space_group, + pbc=periodic, + primitive_cell=True, + cellpar=cellpar, + onduplicates="replace", ) # TODO take fractional occupancies into consideration sec_system.atoms = Atoms( - positions=atoms.get_positions() * ureg.angstrom, labels=atoms.get_chemical_symbols(), - lattice_vectors=atoms.get_cell().array * ureg.angstrom, periodic=periodic + positions=atoms.get_positions() * ureg.angstrom, + labels=atoms.get_chemical_symbols(), + lattice_vectors=atoms.get_cell().array * ureg.angstrom, + periodic=periodic, ) return sec_system def parse_mechanical_property(name, source, target): values = source.get(name, [None] * 3) - types = ['x', 'y', 'z'] if 'youngs_modulus' in name else ['reuss', 'voigt', 'hill'] + types = ( + ["x", "y", "z"] + if "youngs_modulus" in name + else ["reuss", "voigt", "hill"] + ) for n, type_n in enumerate(types): - setattr(target, f'{name}_{type_n}', values[n]) + setattr(target, f"{name}_{type_n}", values[n]) def parse_calculation(source): sec_calc = Calculation() @@ -724,93 +1192,125 @@ def parse_calculation(source): if source.energy_components is not None: sec_energy = Energy() sec_calc.energy = sec_energy - for key, val in source.energy_components.get('key_val', []): + for key, val in source.energy_components.get("key_val", []): name = self._metainfo_map.get(key) if name is None: continue - val = val * ureg.eV if name.startswith('x_gulp_') else EnergyEntry(value=val * ureg.eV) + val = ( + val * ureg.eV + if name.startswith("x_gulp_") + else EnergyEntry(value=val * ureg.eV) + ) setattr(sec_energy, name, val) # assign primitive unit cell energy to energy total - sec_energy.total = EnergyEntry(value=sec_energy.x_gulp_primitive_unit_cell) + sec_energy.total = EnergyEntry( + value=sec_energy.x_gulp_primitive_unit_cell + ) if source.bulk_optimisation is not None: sec_opt = x_gulp_bulk_optimisation() sec_calc.x_gulp_bulk_optimisation = sec_opt - for cycle in source.bulk_optimisation.get('cycle', []): - sec_opt.x_gulp_bulk_optimisation_cycle.append(x_gulp_bulk_optimisation_cycle( - x_gulp_energy=cycle[0] * ureg.eV, x_gulp_gnorm=cycle[1], - x_gulp_cpu_time=cycle[2] - )) + for cycle in source.bulk_optimisation.get("cycle", []): + sec_opt.x_gulp_bulk_optimisation_cycle.append( + x_gulp_bulk_optimisation_cycle( + x_gulp_energy=cycle[0] * ureg.eV, + x_gulp_gnorm=cycle[1], + x_gulp_cpu_time=cycle[2], + ) + ) for key, val in source.bulk_optimisation.items(): - if key.startswith('x_gulp_'): + if key.startswith("x_gulp_"): setattr(sec_opt, key, val) if source.elastic_constants is not None: - workflow = Elastic( - method=ElasticMethod(), results=ElasticResults()) - workflow.method.energy_stress_calculator = 'gulp' - workflow.results.elastic_constants_matrix_second_order = source.elastic_constants - workflow.results.compliance_matrix_second_order = source.elastic_compliance - mechanical_properties = source.get('mechanical_properties', {}) - parse_mechanical_property('bulk_modulus', mechanical_properties, workflow.results) - parse_mechanical_property('shear_modulus', mechanical_properties, workflow.results) - parse_mechanical_property('x_gulp_velocity_s_wave', mechanical_properties, workflow.results) - parse_mechanical_property('x_gulp_velocity_p_wave', mechanical_properties, workflow.results) - parse_mechanical_property('x_gulp_youngs_modulus', mechanical_properties, workflow.results) - workflow.results.x_gulp_compressibility = mechanical_properties.get('compressibility') - poissons = mechanical_properties.get('poissons_ratio', np.zeros((3, 3))) + workflow = Elastic(method=ElasticMethod(), results=ElasticResults()) + workflow.method.energy_stress_calculator = "gulp" + workflow.results.elastic_constants_matrix_second_order = ( + source.elastic_constants + ) + workflow.results.compliance_matrix_second_order = ( + source.elastic_compliance + ) + mechanical_properties = source.get("mechanical_properties", {}) + parse_mechanical_property( + "bulk_modulus", mechanical_properties, workflow.results + ) + parse_mechanical_property( + "shear_modulus", mechanical_properties, workflow.results + ) + parse_mechanical_property( + "x_gulp_velocity_s_wave", mechanical_properties, workflow.results + ) + parse_mechanical_property( + "x_gulp_velocity_p_wave", mechanical_properties, workflow.results + ) + parse_mechanical_property( + "x_gulp_youngs_modulus", mechanical_properties, workflow.results + ) + workflow.results.x_gulp_compressibility = mechanical_properties.get( + "compressibility" + ) + poissons = mechanical_properties.get("poissons_ratio", np.zeros((3, 3))) # insert zeros for diagonal elements for n in range(3): - poissons[n] = np.insert(poissons[n], n, 0.) + poissons[n] = np.insert(poissons[n], n, 0.0) workflow.results.x_gulp_poissons_ratio = poissons self.archive.workflow2 = workflow # md properties if source.energy_total is not None: - sec_calc.energy = Energy(total=EnergyEntry( - value=source.energy_total[0], kinetic=source.energy_kinetic[0], - potential=source.energy_potential[0] - )) + sec_calc.energy = Energy( + total=EnergyEntry( + value=source.energy_total[0], + kinetic=source.energy_kinetic[0], + potential=source.energy_potential[0], + ) + ) sec_calc.energy.x_gulp_total_averaged = EnergyEntry( - value=source.energy_total[1], kinetic=source.energy_kinetic[1], - potential=source.energy_potential[1] + value=source.energy_total[1], + kinetic=source.energy_kinetic[1], + potential=source.energy_potential[1], ) sec_calc.time = source.time - sec_calc.temperature = source.get('temperature', [None, None])[0] - sec_calc.pressure = source.get('pressure', [None, None])[0] - sec_calc.x_gulp_temperature_averaged = source.get('temperature', [None, None])[1] - sec_calc.x_gulp_pressure_averaged = source.get('pressure', [None, None])[1] + sec_calc.temperature = source.get("temperature", [None, None])[0] + sec_calc.pressure = source.get("pressure", [None, None])[0] + sec_calc.x_gulp_temperature_averaged = source.get( + "temperature", [None, None] + )[1] + sec_calc.x_gulp_pressure_averaged = source.get("pressure", [None, None])[1] # other properties for key, val in source.items(): - if key.startswith('x_gulp_'): - if key.endswith('refractive_indices') and val is not None: + if key.startswith("x_gulp_"): + if key.endswith("refractive_indices") and val is not None: val = val.value setattr(sec_calc, key, val) return sec_calc - for output in self.mainfile_parser.get('single_point', []): - for calculation in output.get('calculation', []): + for output in self.mainfile_parser.get("single_point", []): + for calculation in output.get("calculation", []): sec_system = parse_system(calculation) sec_calc = parse_calculation(calculation) sec_calc.system_ref = sec_system - for output in self.mainfile_parser.get('defect', []): - for calculation in output.get('calculation', []): + for output in self.mainfile_parser.get("defect", []): + for calculation in output.get("calculation", []): sec_system = parse_system(calculation) sec_calc = parse_calculation(calculation) sec_calc.system_ref = sec_system - for output in self.mainfile_parser.get('molecular_dynamics', []): + for output in self.mainfile_parser.get("molecular_dynamics", []): workflow = MolecularDynamics(method=MolecularDynamicsMethod()) - workflow.method.thermodynamic_ensemble = output.get('ensemble_type', '').upper() + workflow.method.thermodynamic_ensemble = output.get( + "ensemble_type", "" + ).upper() workflow.method.integration_timestep = output.timestep for key, val in output.items(): - if key.startswith('x_gulp_'): + if key.startswith("x_gulp_"): setattr(workflow, key, val) # parse md steps - for step in output.get('step', []): + for step in output.get("step", []): parse_calculation(step) # TODO where are the trajectory data saved self.archive.workflow2 = workflow diff --git a/atomisticparsers/h5md/parser.py b/atomisticparsers/h5md/parser.py index db937c80..94c184fc 100644 --- a/atomisticparsers/h5md/parser.py +++ b/atomisticparsers/h5md/parser.py @@ -813,14 +813,11 @@ def parse_workflow(self): dict(method=workflow_parameters, results=workflow_results) ) - def parse(self, filepath, archive: EntryArchive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.logger = logging.getLogger(__name__) if logger is None else logger - self._maindir = os.path.dirname(self.filepath) + def write_to_archive(self) -> None: + self._maindir = os.path.dirname(self.mainfile) self._h5md_files = os.listdir(self._maindir) - self._basename = os.path.basename(filepath).rsplit(".", 1)[0] - self._data_parser.mainfile = self.filepath + self._basename = os.path.basename(self.mainfile).rsplit(".", 1)[0] + self._data_parser.mainfile = self.mainfile if self._data_parser.filehdf5 is None: self.logger.warning("hdf5 file missing in H5MD Parser.") return diff --git a/atomisticparsers/namd/parser.py b/atomisticparsers/namd/parser.py index 60043c9b..b8b9b399 100644 --- a/atomisticparsers/namd/parser.py +++ b/atomisticparsers/namd/parser.py @@ -32,73 +32,106 @@ from .metainfo import m_env # pylint: disable=unused-import -MOL = 6.022140857e+23 -re_f = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' -re_n = r'[\n\r]' +MOL = 6.022140857e23 +re_f = r"[-+]?\d+\.\d*(?:[Ee][-+]\d+)?" +re_n = r"[\n\r]" class ConfigParser(TextParser): def init_quantities(self): self._quantities = [ Quantity( - 'parameter', - rf'([\w\-]+) +(.+){re_n}', - repeats=True, str_operation=lambda x: x.split(' ', 1)) + "parameter", + rf"([\w\-]+) +(.+){re_n}", + repeats=True, + str_operation=lambda x: x.split(" ", 1), + ) ] def get_parameters(self): - return {parameter[0]: parameter[1] for parameter in self.get('parameter', [])} + return {parameter[0]: parameter[1] for parameter in self.get("parameter", [])} class MainfileParser(TextParser): def init_quantities(self): self._quantities = [ Quantity( - 'version_arch', r'Info\: NAMD ([\d\.]+) for (.+)', - str_operation=lambda x: x.split(' ', 1), convert=False + "version_arch", + r"Info\: NAMD ([\d\.]+) for (.+)", + str_operation=lambda x: x.split(" ", 1), + convert=False, ), - Quantity('config_file', r'Info\: Configuration file is (\S+)', dtype=str), + Quantity("config_file", r"Info\: Configuration file is (\S+)", dtype=str), Quantity( - 'simulation_parameters', - r'Info\: SIMULATION PARAMETERS\:([\s\S]+?)Info\: SUMMARY', - sub_parser=TextParser(quantities=[ - Quantity( - 'parameter', - rf'Info\: ([A-Z][A-Z ]+) +([\d\.e\-\+]+)', - str_operation=lambda x: [v.strip() for v in x.rsplit(' ', 1)], repeats=True), - Quantity( - 'cell', - r'PERIODIC CELL BASIS \d +(.+)', - repeats=True, dtype=np.dtype(np.float64) - ), - Quantity('output_file', r'Info\: OUTPUT FILENAME +(\S+)', dtype=str), - Quantity('coordinate_file', r'Info\: COORDINATE PDB +(\S+)', dtype=str), - Quantity('structure_file', r'Info\: STRUCTURE FILE +(\S+)', dtype=str), - Quantity('parameter_file', r'Info\: PARAMETERS +(\S+)', dtype=str, repeats=True) - ]) + "simulation_parameters", + r"Info\: SIMULATION PARAMETERS\:([\s\S]+?)Info\: SUMMARY", + sub_parser=TextParser( + quantities=[ + Quantity( + "parameter", + rf"Info\: ([A-Z][A-Z ]+) +([\d\.e\-\+]+)", + str_operation=lambda x: [ + v.strip() for v in x.rsplit(" ", 1) + ], + repeats=True, + ), + Quantity( + "cell", + r"PERIODIC CELL BASIS \d +(.+)", + repeats=True, + dtype=np.dtype(np.float64), + ), + Quantity( + "output_file", r"Info\: OUTPUT FILENAME +(\S+)", dtype=str + ), + Quantity( + "coordinate_file", + r"Info\: COORDINATE PDB +(\S+)", + dtype=str, + ), + Quantity( + "structure_file", r"Info\: STRUCTURE FILE +(\S+)", dtype=str + ), + Quantity( + "parameter_file", + r"Info\: PARAMETERS +(\S+)", + dtype=str, + repeats=True, + ), + ] + ), ), Quantity( - 'step', - rf'ENERGY\: +(\d+ +{re_f}.+)', repeats=True, dtype=np.dtype(np.float64) + "step", + rf"ENERGY\: +(\d+ +{re_f}.+)", + repeats=True, + dtype=np.dtype(np.float64), ), Quantity( - 'timing', - r'TIMING\: *(\d+) *CPU\: *[\d\.]+, +[\d\.]+/step +Wall\: *([\d\.]+), *([\d\.]+)/step', - repeats=True, dtype=np.dtype(np.float64) + "timing", + r"TIMING\: *(\d+) *CPU\: *[\d\.]+, +[\d\.]+/step +Wall\: *([\d\.]+), *([\d\.]+)/step", + repeats=True, + dtype=np.dtype(np.float64), ), - Quantity('total_time', r'WallClock: +([\d\.]+)', dtype=float), + Quantity("total_time", r"WallClock: +([\d\.]+)", dtype=float), Quantity( - 'property_names', - r'ETITLE\: +(.+)', - str_operation=lambda x: x.lower().strip().split()), + "property_names", + r"ETITLE\: +(.+)", + str_operation=lambda x: x.lower().strip().split(), + ), Quantity( - 'coordinates_write_step', - r'WRITING COORDINATES TO OUTPUT FILE AT STEP (\d+)', - repeats=True, dtype=np.int32) + "coordinates_write_step", + r"WRITING COORDINATES TO OUTPUT FILE AT STEP (\d+)", + repeats=True, + dtype=np.int32, + ), ] def get_parameters(self): - return {p[0]: p[1] for p in self.get('simulation_parameters', {}).get('parameter', [])} + return { + p[0]: p[1] + for p in self.get("simulation_parameters", {}).get("parameter", []) + } class NAMDParser(MDParser): @@ -107,55 +140,47 @@ def __init__(self) -> None: self.config_parser = ConfigParser() self.traj_parser = MDAnalysisParser() self._metainfo_map = { - 'bond': 'energy_contribution_bond', - 'angle': 'energy_contribution_angle', - 'dihed': 'energy_contribution_dihedral', - 'imprp': 'energy_contribution_improper', - 'elect': 'energy_electronic', - 'vdw': 'energy_van_der_waals', - 'boundary': 'energy_contribution_boundary', - 'misc': 'energy_contribution_miscellaneous', - 'total': 'energy_total', - 'temp': 'temperature', - 'total3': 'energy_x_namd_total3', - 'tempavg': 'x_namd_temperature_average', - 'pressure': 'pressure', - 'gpressure': 'x_namd_gpressure', - 'volume': 'x_namd_volume', - 'pressavg': 'x_namd_pressure_average', - 'gpressavg': 'x_namd_gpressure_average' + "bond": "energy_contribution_bond", + "angle": "energy_contribution_angle", + "dihed": "energy_contribution_dihedral", + "imprp": "energy_contribution_improper", + "elect": "energy_electronic", + "vdw": "energy_van_der_waals", + "boundary": "energy_contribution_boundary", + "misc": "energy_contribution_miscellaneous", + "total": "energy_total", + "temp": "temperature", + "total3": "energy_x_namd_total3", + "tempavg": "x_namd_temperature_average", + "pressure": "pressure", + "gpressure": "x_namd_gpressure", + "volume": "x_namd_volume", + "pressavg": "x_namd_pressure_average", + "gpressavg": "x_namd_gpressure_average", } super().__init__() - def init_parser(self): - ''' - Initializes the mainfile and auxilliary file parsers. - ''' - self.mainfile_parser.mainfile = self.filepath + def write_to_archive(self) -> None: + """ + Main parsing function. Populates the archive with the quantities parsed from the + mainfile parser and auxilliary file parsers. + """ + self.mainfile_parser.mainfile = self.mainfile self.mainfile_parser.logger = self.logger self.config_parser.logger = self.logger self.traj_parser.logger = self.logger + self.maindir = os.path.dirname(self.mainfile) - def parse(self, filepath: str, archive: EntryArchive, logger=None): - ''' - Main parsing function. Populates the archive with the quantities parsed from the - mainfile parser and auxilliary file parsers. - ''' - self.filepath = os.path.abspath(filepath) - self.maindir = os.path.dirname(filepath) - self.archive = archive - self.logger = logger if logger is not None else logging - - self.init_parser() sec_run = Run() - archive.run.append(sec_run) - version_arch = self.mainfile_parser.get('version_arch', [None, None]) - sec_run.program = Program(name='namd', version=version_arch[0]) + self.archive.run.append(sec_run) + version_arch = self.mainfile_parser.get("version_arch", [None, None]) + sec_run.program = Program(name="namd", version=version_arch[0]) sec_run.program.x_namd_build_osarch = version_arch[1] # read the config file and simulation parameters self.config_parser.mainfile = os.path.join( - self.maindir, os.path.basename(self.mainfile_parser.get('config_file', ''))) + self.maindir, os.path.basename(self.mainfile_parser.get("config_file", "")) + ) sec_method = Method() sec_run.method.append(sec_method) sec_method.x_namd_input_parameters = self.config_parser.get_parameters() @@ -171,43 +196,59 @@ def get_system_data(index): lattice_vectors = self.traj_parser.get_lattice_vectors(index) if lattice_vectors is None: # get if from simulation parameters - lattice_vectors = self.mainfile_parser.get('simulation_parameters', {}).get('cell') - lattice_vectors = lattice_vectors * ureg.angstrom if lattice_vectors is not None else lattice_vectors - return dict(atoms=dict( - labels=labels, positions=positions, velocities=velocities, lattice_vectors=lattice_vectors)) + lattice_vectors = self.mainfile_parser.get( + "simulation_parameters", {} + ).get("cell") + lattice_vectors = ( + lattice_vectors * ureg.angstrom + if lattice_vectors is not None + else lattice_vectors + ) + return dict( + atoms=dict( + labels=labels, + positions=positions, + velocities=velocities, + lattice_vectors=lattice_vectors, + ) + ) # input structure - parameters = self.mainfile_parser.get('simulation_parameters', {}) - self.traj_parser.mainfile = os.path.join(self.maindir, parameters.get('coordinate_file')) + parameters = self.mainfile_parser.get("simulation_parameters", {}) + self.traj_parser.mainfile = os.path.join( + self.maindir, parameters.get("coordinate_file") + ) # initial_system self.parse_trajectory_step(get_system_data(0)) # energy unit is kcal / mol - n_atoms = self.traj_parser.get('n_atoms') + n_atoms = self.traj_parser.get("n_atoms") energy_unit = ureg.J * 4184.0 * n_atoms / MOL # trajectories # TODO other formats - output_file = parameters.get('output_file') - self.traj_parser.mainfile = os.path.join(self.maindir, f'{output_file}.coor') - self.traj_parser.options = dict(format='coor') + output_file = parameters.get("output_file") + self.traj_parser.mainfile = os.path.join(self.maindir, f"{output_file}.coor") + self.traj_parser.options = dict(format="coor") self.traj_parser.auxilliary_files = [] # output properties at each step - property_names = self.mainfile_parser.get('property_names', []) + property_names = self.mainfile_parser.get("property_names", []) # saved trajectories - saved_trajectories = self.mainfile_parser.get('coordinates_write_step', []) + saved_trajectories = self.mainfile_parser.get("coordinates_write_step", []) # md data - steps_data = self.mainfile_parser.get('step', []) + steps_data = self.mainfile_parser.get("step", []) # set up md parser self.n_atoms = n_atoms self.trajectory_steps = [0] + saved_trajectories self.thermodynamics_steps = [int(step[0]) for step in steps_data] - timings = self.mainfile_parser.get('timing', []) - timing_step = sec_method.x_namd_simulation_parameters.get('TIMING OUTPUT STEPS', 1) - time_per_step = self.mainfile_parser.get('total_time', 0.) / len(steps_data) + timings = self.mainfile_parser.get("timing", []) + timing_step = sec_method.x_namd_simulation_parameters.get( + "TIMING OUTPUT STEPS", 1 + ) + time_per_step = self.mainfile_parser.get("total_time", 0.0) / len(steps_data) for step in self.trajectory_steps: if not step or self.traj_parser.mainfile is None: @@ -222,41 +263,47 @@ def get_system_data(index): continue energy: Dict[str, Any] = {} - thermo_data = {'step': step, 'energy': energy} + thermo_data = {"step": step, "energy": energy} for index, name in enumerate(property_names): metainfo_name = self._metainfo_map.get(name) if metainfo_name is None: continue value = step_data[index] - if metainfo_name.startswith('energy_contribution_'): - energy.setdefault('contributions', []) - metainfo_name = metainfo_name.replace('energy_contribution_', '') - energy['contributions'].append(dict(kind=metainfo_name, value=value * energy_unit)) - elif metainfo_name.startswith('energy_'): - metainfo_name = metainfo_name.replace('energy_', '') + if metainfo_name.startswith("energy_contribution_"): + energy.setdefault("contributions", []) + metainfo_name = metainfo_name.replace("energy_contribution_", "") + energy["contributions"].append( + dict(kind=metainfo_name, value=value * energy_unit) + ) + elif metainfo_name.startswith("energy_"): + metainfo_name = metainfo_name.replace("energy_", "") energy[metainfo_name] = dict(value=value * energy_unit) - if metainfo_name == 'total': + if metainfo_name == "total": # include potential and kinetic terms - for key in ['kinetic', 'potential']: + for key in ["kinetic", "potential"]: try: - energy['total'][key] = step_data[property_names.index(key)] * energy_unit + energy["total"][key] = ( + step_data[property_names.index(key)] * energy_unit + ) except Exception: pass - elif 'pressure' in metainfo_name: + elif "pressure" in metainfo_name: thermo_data[metainfo_name] = value * ureg.bar - elif 'temperature' in metainfo_name: + elif "temperature" in metainfo_name: thermo_data[metainfo_name] = value * ureg.kelvin - elif 'volume' in metainfo_name: - thermo_data[metainfo_name] = value * ureg.angstrom ** 3 + elif "volume" in metainfo_name: + thermo_data[metainfo_name] = value * ureg.angstrom**3 # TODO forces # timing index_time = step // timing_step if index_time < len(timings): - thermo_data['time_calculation'] = timings[index_time][2] - thermo_data['time_physical'] = (0 if index_time == 0 else timings[index_time - 1][1]) + timings[index_time][2] * (step % timing_step + 1) + thermo_data["time_calculation"] = timings[index_time][2] + thermo_data["time_physical"] = ( + 0 if index_time == 0 else timings[index_time - 1][1] + ) + timings[index_time][2] * (step % timing_step + 1) elif time_per_step: - thermo_data['time_calculation'] = time_per_step - thermo_data['time_physical'] = time_per_step * step + thermo_data["time_calculation"] = time_per_step + thermo_data["time_physical"] = time_per_step * step self.parse_thermodynamics_step(thermo_data) # workflow diff --git a/atomisticparsers/tinker/parser.py b/atomisticparsers/tinker/parser.py index 8964d749..76fa3adc 100644 --- a/atomisticparsers/tinker/parser.py +++ b/atomisticparsers/tinker/parser.py @@ -25,24 +25,21 @@ from nomad.parsing.file_parser.text_parser import Quantity, TextParser from nomad.units import ureg from runschema.run import Run, Program -from runschema.system import ( - System, Atoms -) -from runschema.method import ( - Method, ForceField, Model, AtomParameters -) +from runschema.system import System, Atoms +from runschema.method import Method, ForceField, Model, AtomParameters from runschema.calculation import ( - Calculation, Energy, EnergyEntry, VibrationalFrequencies -) -from simulationworkflowschema import ( - GeometryOptimization, GeometryOptimizationMethod + Calculation, + Energy, + EnergyEntry, + VibrationalFrequencies, ) +from simulationworkflowschema import GeometryOptimization, GeometryOptimizationMethod from atomisticparsers.utils import MDAnalysisParser, MDParser from .metainfo.tinker import x_tinker_section_control_parameters -re_n = r'[\n\r]' -re_f = r'[-+]?\d+\.\d*(?:[DdEe][-+]\d+)?' -mol = (1 * ureg.mol).to('particle').magnitude +re_n = r"[\n\r]" +re_f = r"[-+]?\d+\.\d*(?:[DdEe][-+]\d+)?" +mol = (1 * ureg.mol).to("particle").magnitude class KeyParser(TextParser): @@ -59,10 +56,15 @@ def to_key_val(val_in): else: return [val[0], val[1:]] - self._quantities = [Quantity( - 'key_val', - rf'([a-z\-]+) *([^#]*?) *{re_n}', - str_operation=to_key_val, repeats=True, convert=False)] + self._quantities = [ + Quantity( + "key_val", + rf"([a-z\-]+) *([^#]*?) *{re_n}", + str_operation=to_key_val, + repeats=True, + convert=False, + ) + ] class RunParser(TextParser): @@ -73,29 +75,32 @@ def init_quantities(self): def to_argument(val_in): val_in = val_in.strip().split() argument = dict() - if '-k' in val_in: - argument['key'] = val_in.pop(val_in.index('-k') + 1) - val_in.remove('-k') - argument['name'] = val_in.pop(0) - argument['parameters'] = val_in + if "-k" in val_in: + argument["key"] = val_in.pop(val_in.index("-k") + 1) + val_in.remove("-k") + argument["name"] = val_in.pop(0) + argument["parameters"] = val_in return argument self._quantities = [ Quantity( - 'molecular_dynamics', - r'dynamic +(\S+ +\-*k* *\S+.+)', - repeats=True, str_operation=to_argument + "molecular_dynamics", + r"dynamic +(\S+ +\-*k* *\S+.+)", + repeats=True, + str_operation=to_argument, ), Quantity( - 'geometry_optimization', - r'minimize +(\S+ +\-*k* *\S+.+)', - repeats=True, str_operation=to_argument + "geometry_optimization", + r"minimize +(\S+ +\-*k* *\S+.+)", + repeats=True, + str_operation=to_argument, ), Quantity( - 'single_point', - r'vibrate +(\S+ +\-*k* *\S+.+)', - repeats=True, str_operation=to_argument - ) + "single_point", + r"vibrate +(\S+ +\-*k* *\S+.+)", + repeats=True, + str_operation=to_argument, + ), ] @@ -105,135 +110,190 @@ def __init__(self): def init_quantities(self): iteration_quantity = Quantity( - 'iteration', - rf'Iter.+([\s\S]+?){re_n} *{re_n}', - sub_parser=TextParser(quantities=[Quantity( - 'step', - rf' +\d+ +({re_f} +{re_f} +[\d\.DE\+\- ]+)', - str_operation=lambda x: [float(v) for v in x.replace('D+', 'E+').replace('d+', 'e+').split()], - repeats=True, dtype=np.dtype(np.float64) - )]) + "iteration", + rf"Iter.+([\s\S]+?){re_n} *{re_n}", + sub_parser=TextParser( + quantities=[ + Quantity( + "step", + rf" +\d+ +({re_f} +{re_f} +[\d\.DE\+\- ]+)", + str_operation=lambda x: [ + float(v) + for v in x.replace("D+", "E+").replace("d+", "e+").split() + ], + repeats=True, + dtype=np.dtype(np.float64), + ) + ] + ), ) calculation_quantities = [ - Quantity('program_version', r'Version ([\d\.]+)', dtype=str), + Quantity("program_version", r"Version ([\d\.]+)", dtype=str), Quantity( - 'vibrate', - r'(Eigenvalues of the Hessian Matrix[\s\S]+?)\Z', - sub_parser=TextParser(quantities=[ - Quantity( - 'eigenvalues', - r'Eigenvalues of the Hessian Matrix :\s+([\d\-\s\.]+)', - dtype=np.dtype(np.float64) - ), - Quantity( - 'frequencies', - r'Vibrational Frequencies \(cm\-1\) :([\d\-\s\.]+)', - dtype=np.dtype(np.float64) - ), - ])), + "vibrate", + r"(Eigenvalues of the Hessian Matrix[\s\S]+?)\Z", + sub_parser=TextParser( + quantities=[ + Quantity( + "eigenvalues", + r"Eigenvalues of the Hessian Matrix :\s+([\d\-\s\.]+)", + dtype=np.dtype(np.float64), + ), + Quantity( + "frequencies", + r"Vibrational Frequencies \(cm\-1\) :([\d\-\s\.]+)", + dtype=np.dtype(np.float64), + ), + ] + ), + ), Quantity( - 'minimize', - r'(.+?Optimization :[\s\S]+?Final Gradient Norm.+)', - sub_parser=TextParser(quantities=[ - iteration_quantity, - Quantity('method', r' +(.+) Optimization', flatten=False, dtype=str), - Quantity('x_tiner_final_function_value', rf'Final Function Value : +({re_f})', dtype=np.float64), - Quantity('x_tinker_final_rms_gradient', rf'Final RMS Gradient : +({re_f})', dtype=np.float64), - Quantity('x_tinker_final_gradient_norm', rf'Final Gradient Norm : +({re_f})', dtype=np.float64) - ]) + "minimize", + r"(.+?Optimization :[\s\S]+?Final Gradient Norm.+)", + sub_parser=TextParser( + quantities=[ + iteration_quantity, + Quantity( + "method", r" +(.+) Optimization", flatten=False, dtype=str + ), + Quantity( + "x_tiner_final_function_value", + rf"Final Function Value : +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_tinker_final_rms_gradient", + rf"Final RMS Gradient : +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_tinker_final_gradient_norm", + rf"Final Gradient Norm : +({re_f})", + dtype=np.float64, + ), + ] + ), ), Quantity( - 'dynamic', - r'(Molecular Dynamics[\s\S]+?)(?:\#\#\#\#\#|\Z)', - sub_parser=TextParser(quantities=[ - iteration_quantity, - Quantity( - 'instantaneous_values', - r'(Instantaneous Values for Frame Saved at[\s\S]+?Coordinate File.+)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('step', r'(\d+) Dynamics Steps', dtype=np.int32), - Quantity( - 'time', - rf'Current Time +({re_f}) Picosecond', - dtype=np.float64, unit=ureg.ps - ), - Quantity( - 'potential', - rf'Current Potential +({re_f}) Kcal/mole', - dtype=np.float64, unit=ureg.J * 4184.0 / mol - ), - Quantity( - 'kinetic', - rf'Current Kinetic +({re_f}) Kcal/mole', - dtype=np.float64, unit=ureg.J * 4184.0 / mol - ), - Quantity( - 'lattice_lengths', - rf'Lattice Lengths +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64) - ), - Quantity( - 'lattice_angles', - rf'Lattice Angles +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64) - ), - Quantity('frame', r'Frame Number +(\d+)', dtype=np.int32), - Quantity('coordinate_file', r'Coordinate File +(\S+)', dtype=str) - ]) - ), - Quantity( - 'average_values', - r'(Average Values for the Last[\s\S]+?Density.+)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('step', r'(\d+) Dynamics Steps', dtype=np.int32), - Quantity( - 'time', - rf'Simulation Time +({re_f}) Picosecond', - dtype=np.float64, unit=ureg.ps + "dynamic", + r"(Molecular Dynamics[\s\S]+?)(?:\#\#\#\#\#|\Z)", + sub_parser=TextParser( + quantities=[ + iteration_quantity, + Quantity( + "instantaneous_values", + r"(Instantaneous Values for Frame Saved at[\s\S]+?Coordinate File.+)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "step", r"(\d+) Dynamics Steps", dtype=np.int32 + ), + Quantity( + "time", + rf"Current Time +({re_f}) Picosecond", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "potential", + rf"Current Potential +({re_f}) Kcal/mole", + dtype=np.float64, + unit=ureg.J * 4184.0 / mol, + ), + Quantity( + "kinetic", + rf"Current Kinetic +({re_f}) Kcal/mole", + dtype=np.float64, + unit=ureg.J * 4184.0 / mol, + ), + Quantity( + "lattice_lengths", + rf"Lattice Lengths +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + ), + Quantity( + "lattice_angles", + rf"Lattice Angles +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + ), + Quantity( + "frame", r"Frame Number +(\d+)", dtype=np.int32 + ), + Quantity( + "coordinate_file", + r"Coordinate File +(\S+)", + dtype=str, + ), + ] ), - Quantity( - 'energy_total', - rf'Total Energy +({re_f}) Kcal/mole', - dtype=np.float64, unit=ureg.J * 4184.0 / mol + ), + Quantity( + "average_values", + r"(Average Values for the Last[\s\S]+?Density.+)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "step", r"(\d+) Dynamics Steps", dtype=np.int32 + ), + Quantity( + "time", + rf"Simulation Time +({re_f}) Picosecond", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "energy_total", + rf"Total Energy +({re_f}) Kcal/mole", + dtype=np.float64, + unit=ureg.J * 4184.0 / mol, + ), + Quantity( + "potential", + rf"Potential Energy +({re_f}) Kcal/mole", + dtype=np.float64, + unit=ureg.J * 4184.0 / mol, + ), + Quantity( + "kinetic", + rf"Kinetic Energy +({re_f}) Kcal/mole", + dtype=np.float64, + unit=ureg.J * 4184.0 / mol, + ), + Quantity( + "temperature", + rf"Temperature +({re_f}) Kelvin", + dtype=np.float64, + unit=ureg.kelvin, + ), + Quantity( + "pressure", + rf"Pressure +({re_f}) Atmosphere", + dtype=np.float64, + unit=ureg.atm, + ), + Quantity( + "density", + rf"Density +({re_f}) Grams/cc", + dtype=np.float64, + unit=ureg.g / ureg.cm**3, + ), + ] ), - Quantity( - 'potential', - rf'Potential Energy +({re_f}) Kcal/mole', - dtype=np.float64, unit=ureg.J * 4184.0 / mol - ), - Quantity( - 'kinetic', - rf'Kinetic Energy +({re_f}) Kcal/mole', - dtype=np.float64, unit=ureg.J * 4184.0 / mol - ), - Quantity( - 'temperature', - rf'Temperature +({re_f}) Kelvin', - dtype=np.float64, unit=ureg.kelvin - ), - Quantity( - 'pressure', - rf'Pressure +({re_f}) Atmosphere', - dtype=np.float64, unit=ureg.atm - ), - Quantity( - 'density', - rf'Density +({re_f}) Grams/cc', - dtype=np.float64, unit=ureg.g / ureg.cm ** 3 - ), - - ]) - ) - ]) - ) + ), + ] + ), + ), ] self._quantities = [ Quantity( - 'run', - r'Software Tools for Molecular Design([\s\S]+?)(?:TINKER \-\-\-|\Z)', - repeats=True, sub_parser=TextParser(quantities=calculation_quantities) + "run", + r"Software Tools for Molecular Design([\s\S]+?)(?:TINKER \-\-\-|\Z)", + repeats=True, + sub_parser=TextParser(quantities=calculation_quantities), ) ] @@ -245,32 +305,20 @@ def __init__(self): self.key_parser = KeyParser() self.run_parser = RunParser() self._run_types = { - 'vibrate': 'single_point', 'minimize': 'geometry_optimization', - 'dynamic': 'molecular_dynamics'} + "vibrate": "single_point", + "minimize": "geometry_optimization", + "dynamic": "molecular_dynamics", + } super().__init__() - def init_parser(self): - self.out_parser.mainfile = self.filepath - self.out_parser.logger = self.logger - self._base_name = os.path.basename(self.filepath).rsplit(".", 1)[0] - files = os.listdir(self.maindir) - # required to track the current files - self._files = dict() - for ext in ['xyz', 'arc', 'key']: - matches = fnmatch.filter(files, f'{self._base_name}.{ext}*') - if not matches: - matches = fnmatch.filter(files, f'tinker.{ext}*') - matches.sort(key=lambda x: int(x.rsplit('_', 1)[-1]) if x[-1].isdecimal() else 0) - self._files[ext] = dict(files=matches, current=matches[0] if matches else '') - def _get_tinker_file(self, ext): if ext not in self._files: return - current = self._files[ext]['current'] + current = self._files[ext]["current"] # advance to the next file - files = self._files[ext]['files'] + files = self._files[ext]["files"] try: - self._files[ext]['current'] = files[files.index(current) + 1] + self._files[ext]["current"] = files[files.index(current) + 1] except Exception: # the last file has been reached in this case pass @@ -278,7 +326,9 @@ def _get_tinker_file(self, ext): def parse_system(self, index, filename): self.traj_parser.mainfile = filename - self.traj_parser.options = dict(topology_format='ARC' if filename.endswith('.arc') else 'TXYZ') + self.traj_parser.options = dict( + topology_format="ARC" if filename.endswith(".arc") else "TXYZ" + ) if self.traj_parser.universe is None: return @@ -288,12 +338,17 @@ def parse_system(self, index, filename): trajectory = self.traj_parser.universe.trajectory[index] sec_system.atoms = Atoms( positions=trajectory.positions * ureg.angstrom, - labels=[atom.name for atom in list(self.traj_parser.universe.atoms)]) + labels=[atom.name for atom in list(self.traj_parser.universe.atoms)], + ) if trajectory.triclinic_dimensions is not None: - sec_system.atoms.lattice_vectors = trajectory.triclinic_dimensions * ureg.angstrom + sec_system.atoms.lattice_vectors = ( + trajectory.triclinic_dimensions * ureg.angstrom + ) sec_system.atoms.periodic = [True, True, True] if trajectory.has_velocities: - sec_system.atoms.velocities = trajectory.velocities * (ureg.angstrom / ureg.ps) + sec_system.atoms.velocities = trajectory.velocities * ( + ureg.angstrom / ureg.ps + ) return sec_system @@ -302,20 +357,26 @@ def parse_method(self): self.archive.run[-1].method.append(sec_method) parameters = self.archive.run[-1].x_tinker_control_parameters - if parameters.get('parameters') is not None: - sec_method.force_field = ForceField(model=[Model(name=parameters['parameters'])]) + if parameters.get("parameters") is not None: + sec_method.force_field = ForceField( + model=[Model(name=parameters["parameters"])] + ) # TODO read the prm file property_map = { - 'name': 'label', 'type': 'x_tinker_atom_type', 'resid': 'x_tinker_atom_resid' + "name": "label", + "type": "x_tinker_atom_type", + "resid": "x_tinker_atom_resid", } if self.traj_parser.universe is not None: for atom in list(self.traj_parser.universe.atoms): sec_atom = AtomParameters() sec_method.atom_parameters.append(sec_atom) - for key in ['charge', 'mass', 'name', 'type', 'resid']: + for key in ["charge", "mass", "name", "type", "resid"]: if hasattr(atom, key): - setattr(sec_atom, property_map.get(key, key), getattr(atom, key)) + setattr( + sec_atom, property_map.get(key, key), getattr(atom, key) + ) # TODO add interaction parameters @@ -330,84 +391,119 @@ def resolve_ensemble_type(): if parameters is None: return - thermostat, barostat = parameters.get('thermostat', ''), parameters.get('barostat', '') - thermostats = ['berendsen', 'andersen', 'bussi'] + thermostat, barostat = ( + parameters.get("thermostat", ""), + parameters.get("barostat", ""), + ) + thermostats = ["berendsen", "andersen", "bussi"] # TODO verify this - if barostat.lower() in ['berendsen', 'montecarlo']: - ensemble_type = 'NPT' + if barostat.lower() in ["berendsen", "montecarlo"]: + ensemble_type = "NPT" elif not barostat and thermostat in thermostats: - ensemble_type = 'NVT' + ensemble_type = "NVT" elif not barostat and not thermostat: - ensemble_type = 'NVE' + ensemble_type = "NVE" else: ensemble_type = None return ensemble_type # TODO handle multiple workflow sections - parameters = list(program.get('parameters', [])) + parameters = list(program.get("parameters", [])) # so we cover the case when optional parameters are missing parameters.extend([None] * 6) workflow_type = self.resolve_workflow_type(run) - if workflow_type == 'molecular_dynamics': + if workflow_type == "molecular_dynamics": control_parameters = self.archive.run[-1].x_tinker_control_parameters # TODO verify this! I am sure it is wrong but tinker documentation does not specify clearly - ensemble_types = ['NVE', 'NVT', 'NPT', None, None] - ensemble = ensemble_types[int(parameters[3]) - 1] if parameters[3] is not None else resolve_ensemble_type() - self.parse_md_workflow(dict( - method=dict(thermodynamic_ensemble=ensemble, integration_timestep=parameters[1] * ureg.fs if parameters[1] else parameters[1]), - x_tinker_barostat_tau=control_parameters.get('tau-pressure'), - x_tinker_barostat_type=control_parameters.get('barostat'), - x_tinker_integrator_type=control_parameters.get('integrator'), - x_tinker_number_of_steps_requested=parameters[0], - x_tinker_integrator_dt=parameters[1] * ureg.ps if parameters[1] else parameters, - x_tinker_thermostat_target_temperature=parameters[4] * ureg.kelvin if parameters[4] else parameters[4], - x_tinker_barostat_target_pressure=parameters[5] * ureg.atmosphere if parameters[5] else parameters[5], - x_tinker_thermostat_tau=control_parameters.get('tau-temperature'), - x_tinker_thermostat_type=control_parameters.get('thermostat'))) - - elif workflow_type == 'geometry_optimization': + ensemble_types = ["NVE", "NVT", "NPT", None, None] + ensemble = ( + ensemble_types[int(parameters[3]) - 1] + if parameters[3] is not None + else resolve_ensemble_type() + ) + self.parse_md_workflow( + dict( + method=dict( + thermodynamic_ensemble=ensemble, + integration_timestep=parameters[1] * ureg.fs + if parameters[1] + else parameters[1], + ), + x_tinker_barostat_tau=control_parameters.get("tau-pressure"), + x_tinker_barostat_type=control_parameters.get("barostat"), + x_tinker_integrator_type=control_parameters.get("integrator"), + x_tinker_number_of_steps_requested=parameters[0], + x_tinker_integrator_dt=parameters[1] * ureg.ps + if parameters[1] + else parameters, + x_tinker_thermostat_target_temperature=parameters[4] * ureg.kelvin + if parameters[4] + else parameters[4], + x_tinker_barostat_target_pressure=parameters[5] * ureg.atmosphere + if parameters[5] + else parameters[5], + x_tinker_thermostat_tau=control_parameters.get("tau-temperature"), + x_tinker_thermostat_type=control_parameters.get("thermostat"), + ) + ) + + elif workflow_type == "geometry_optimization": workflow = GeometryOptimization(method=GeometryOptimizationMethod()) - workflow.method.method = run.minimize.get('method') + workflow.method.method = run.minimize.get("method") workflow.x_tinker_convergence_tolerance_rms_gradient = parameters[0] for key, val in run.minimize.items(): - if key.startswith('x_tinker'): + if key.startswith("x_tinker"): setattr(workflow, key, val) self.archive.workflow2 = workflow - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.logger = logging.getLogger(__name__) if logger is None else logger - self.maindir = os.path.dirname(self.filepath) - self.init_parser() + def write_to_archive(self) -> None: + self.out_parser.mainfile = self.mainfile + self.out_parser.logger = self.logger + self.maindir = os.path.dirname(self.mainfile) + self._base_name = os.path.basename(self.mainfile).rsplit(".", 1)[0] + files = os.listdir(self.maindir) + # required to track the current files + self._files = dict() + for ext in ["xyz", "arc", "key"]: + matches = fnmatch.filter(files, f"{self._base_name}.{ext}*") + if not matches: + matches = fnmatch.filter(files, f"tinker.{ext}*") + matches.sort( + key=lambda x: int(x.rsplit("_", 1)[-1]) if x[-1].isdecimal() else 0 + ) + self._files[ext] = dict( + files=matches, current=matches[0] if matches else "" + ) + + self.maindir = os.path.dirname(self.mainfile) workflows = [] def get_reference_filename(program): # resolves the filename as provided in the cli command for the program - filename = program.get('name', '') - if '.xyz' not in filename: - filename = self._files['xyz']['current'] + filename = program.get("name", "") + if ".xyz" not in filename: + filename = self._files["xyz"]["current"] # succeding files start from the index of the file for geometry_opt and md - filename = filename if '.xyz' in filename else f'{filename}.xyz' - if workflows[-1] in ['geometry_optimization', 'molecular_dynamics']: - files = self._files['xyz']['files'] + filename = filename if ".xyz" in filename else f"{filename}.xyz" + if workflows[-1] in ["geometry_optimization", "molecular_dynamics"]: + files = self._files["xyz"]["files"] try: - self._files['xyz']['current'] = files[files.index(filename) + 1] + self._files["xyz"]["current"] = files[files.index(filename) + 1] except Exception: pass return os.path.join(self.maindir, filename) # necesarry to extract program parameters from the cli command in basename.run - self.run_parser.mainfile = os.path.join(self.maindir, f'{self._base_name}.run') + self.run_parser.mainfile = os.path.join(self.maindir, f"{self._base_name}.run") # TODO put runs in separate archives - for run in self.out_parser.get('run', []): + for run in self.out_parser.get("run", []): sec_run = Run() - archive.run.append(sec_run) - sec_run.program = Program(name='tinker', version=run.get('program_version')) + self.archive.run.append(sec_run) + sec_run.program = Program(name="tinker", version=run.get("program_version")) workflow_type = self.resolve_workflow_type(run) workflows.append(workflow_type) @@ -416,7 +512,10 @@ def get_reference_filename(program): # extract the information here # program can be executed for an arbitrary number of times so we need to # resolve the index of the appropriate command - n_workflow = len([workflow for workflow in workflows if workflow == workflow_type]) - 1 + n_workflow = ( + len([workflow for workflow in workflows if workflow == workflow_type]) + - 1 + ) program = self.run_parser.get(workflow_type, []) program = program[n_workflow] if len(program) > n_workflow else dict() if run.vibrate is not None: @@ -426,10 +525,16 @@ def get_reference_filename(program): sec_run.calculation.append(sec_scc) sec_vibrations = VibrationalFrequencies() sec_scc.vibrational_frequencies.append(sec_vibrations) - sec_vibrations.value = [run.vibrate.frequencies[n] for n in range(len( - run.vibrate.get('frequencies', []))) if n % 2 == 1] * (1 / ureg.cm) - sec_vibrations.x_tinker_eigenvalues = [run.vibrate.eigenvalues[n] for n in range(len( - run.vibrate.get('eigenvalues', []))) if n % 2 == 1] + sec_vibrations.value = [ + run.vibrate.frequencies[n] + for n in range(len(run.vibrate.get("frequencies", []))) + if n % 2 == 1 + ] * (1 / ureg.cm) + sec_vibrations.x_tinker_eigenvalues = [ + run.vibrate.eigenvalues[n] + for n in range(len(run.vibrate.get("eigenvalues", []))) + if n % 2 == 1 + ] sec_scc.system_ref = sec_system if run.minimize is not None: @@ -437,32 +542,49 @@ def get_reference_filename(program): initial_system = self.parse_system(0, get_reference_filename(program)) # optimized structure - sec_system = self.parse_system(0, self._get_tinker_file('xyz')) + sec_system = self.parse_system(0, self._get_tinker_file("xyz")) - for n, step in enumerate(run.minimize.get('iteration', {}).get('step', [])): + for n, step in enumerate( + run.minimize.get("iteration", {}).get("step", []) + ): sec_scc = Calculation() sec_run.calculation.append(sec_scc) - sec_scc.energy = Energy(total=EnergyEntry( - value=step[0] * len(sec_system.atoms.positions) * ureg.J * 4184.0 / mol)) + sec_scc.energy = Energy( + total=EnergyEntry( + value=step[0] + * len(sec_system.atoms.positions) + * ureg.J + * 4184.0 + / mol + ) + ) if n == 0: sec_scc.system_ref = initial_system # only the optimized structure is printed, corresponding to the last scc sec_scc.system_ref = sec_system if run.dynamic is not None: - self.traj_parser.mainfile = self._get_tinker_file('arc') - self.traj_parser.options = dict(topology_format='ARC') - average_values = {value.step: value for value in run.dynamic.get('average_values', [])} + self.traj_parser.mainfile = self._get_tinker_file("arc") + self.traj_parser.options = dict(topology_format="ARC") + average_values = { + value.step: value for value in run.dynamic.get("average_values", []) + } # set up md parser # TODO this is not efficient as we need to load traj file to know if it exists traj_steps, thermo_steps, n_atoms, filenames = [], [], [], [] - for nframe, value in enumerate(run.dynamic.get('instantaneous_values', [])): - filename = os.path.join(self.maindir, value.get('coordinate_file', '')) - index = nframe if filename.endswith('.arc') else 0 + for nframe, value in enumerate( + run.dynamic.get("instantaneous_values", []) + ): + filename = os.path.join( + self.maindir, value.get("coordinate_file", "") + ) + index = nframe if filename.endswith(".arc") else 0 filenames.append((filename, index)) thermo_steps.append(value.step) self.traj_parser.mainfile = filename - self.traj_parser.options = dict(topology_format='ARC' if filename.endswith('.arc') else 'TXYZ') + self.traj_parser.options = dict( + topology_format="ARC" if filename.endswith(".arc") else "TXYZ" + ) trajectory = self.traj_parser.universe.trajectory[index] if trajectory is not None: traj_steps.append(value.step) @@ -473,54 +595,86 @@ def get_reference_filename(program): for n_frame, step in enumerate(self.thermodynamics_steps): self.traj_parser.mainfile = filenames[n_frame][0] - trajectory = self.traj_parser.universe.trajectory[filenames[n_frame][1]] + trajectory = self.traj_parser.universe.trajectory[ + filenames[n_frame][1] + ] if step in self.trajectory_steps: # sampled trajectory positions = trajectory.positions * ureg.angstrom - labels = [atom.name for atom in list(self.traj_parser.universe.atoms)] + labels = [ + atom.name for atom in list(self.traj_parser.universe.atoms) + ] lattice_vectors, periodic, velocities = None, None, None if trajectory.triclinic_dimensions is not None: - lattice_vectors = trajectory.triclinic_dimensions * ureg.angstrom + lattice_vectors = ( + trajectory.triclinic_dimensions * ureg.angstrom + ) periodic = [True, True, True] if trajectory.has_velocities: - velocities = trajectory.velocities * (ureg.angstrom / ureg.ps) - self.parse_trajectory_step(dict(atoms=dict( - positions=positions, labels=labels, lattice_vectors=lattice_vectors, - periodic=periodic, velocities=velocities))) + velocities = trajectory.velocities * ( + ureg.angstrom / ureg.ps + ) + self.parse_trajectory_step( + dict( + atoms=dict( + positions=positions, + labels=labels, + lattice_vectors=lattice_vectors, + periodic=periodic, + velocities=velocities, + ) + ) + ) # thermo properties instantaneous = run.dynamic.instantaneous_values[n_frame] n_atoms_step = n_atoms[traj_steps.index(step)] - energy = dict(total=dict( - value=(instantaneous.potential + instantaneous.kinetic) * n_atoms_step, - kinetic=instantaneous.kinetic * n_atoms_step, - potential=instantaneous.potential * n_atoms_step)) + energy = dict( + total=dict( + value=(instantaneous.potential + instantaneous.kinetic) + * n_atoms_step, + kinetic=instantaneous.kinetic * n_atoms_step, + potential=instantaneous.potential * n_atoms_step, + ) + ) data = dict(energy=energy, step=instantaneous.step) average = average_values.get(instantaneous.step) if average: - data['temperature'] = average.temperature - data['pressure'] = average.pressure + data["temperature"] = average.temperature + data["pressure"] = average.pressure if trajectory is not None and trajectory.has_forces: - data['forces'] = dict(total=dict(value=trajectory.forces * (ureg.kJ / ureg.angstrom))) + data["forces"] = dict( + total=dict( + value=trajectory.forces * (ureg.kJ / ureg.angstrom) + ) + ) self.parse_thermodynamics_step(data) # TODO add support for other tinker programs # control parameters # a key file can be specified optionally via the cli we assign this instead to get the parameters - self.key_parser.mainfile = os.path.join(self.maindir, program.get('key', '')) + self.key_parser.mainfile = os.path.join( + self.maindir, program.get("key", "") + ) if self.key_parser.mainfile is None: # if the key file is not specified in cli, read from either basename.key # or tinker.key - self.key_parser.mainfile = self._get_tinker_file('key') + self.key_parser.mainfile = self._get_tinker_file("key") - parameters = {key.lower(): val for key, val in self.key_parser.get('key_val', [])} + parameters = { + key.lower(): val for key, val in self.key_parser.get("key_val", []) + } sec_run.x_tinker_control_parameters = parameters # TODO should this be removed and only have a dictionary of control parameters sec_control = x_tinker_section_control_parameters() sec_run.x_tinker_section_control_parameters.append(sec_control) for key, val in parameters.items(): - key = key.replace('-', '_') - setattr(sec_control, f'x_tinker_inout_control_{key}', val if isinstance(val, bool) else str(val)) + key = key.replace("-", "_") + setattr( + sec_control, + f"x_tinker_inout_control_{key}", + val if isinstance(val, bool) else str(val), + ) self.parse_method() diff --git a/atomisticparsers/xtb/parser.py b/atomisticparsers/xtb/parser.py index d07caa4e..dce3286b 100644 --- a/atomisticparsers/xtb/parser.py +++ b/atomisticparsers/xtb/parser.py @@ -29,17 +29,25 @@ from runschema.method import Method, TB, xTB, Interaction from runschema.system import System, Atoms from runschema.calculation import ( - Calculation, ScfIteration, Energy, EnergyEntry, BandEnergies, Multipoles, MultipolesEntry + Calculation, + ScfIteration, + Energy, + EnergyEntry, + BandEnergies, + Multipoles, + MultipolesEntry, ) from simulationworkflowschema import ( - SinglePoint, GeometryOptimization, GeometryOptimizationMethod + SinglePoint, + GeometryOptimization, + GeometryOptimizationMethod, ) from atomisticparsers.utils import MDParser from atomisticparsers.xtb.metainfo import m_env # pylint: disable=unused-import -re_f = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' -re_n = r'[\n\r]' +re_f = r"[-+]?\d+\.\d*(?:[Ee][-+]\d+)?" +re_n = r"[\n\r]" class OutParser(TextParser): @@ -47,12 +55,12 @@ def __init__(self, **kwargs): super().__init__(**kwargs) def init_quantities(self): - re_f = r'[\d\.E\+\-]+' + re_f = r"[\d\.E\+\-]+" def str_to_eigenvalues(val_in): occupations, energies = [], [] - for val in val_in.strip().split('\n'): - val = val.split('(')[0].split() + for val in val_in.strip().split("\n"): + val = val.split("(")[0].split() if not val[0].isdecimal(): continue occupations.append(float(val.pop(1)) if len(val) > 3 else 0.0) @@ -60,354 +68,477 @@ def str_to_eigenvalues(val_in): return occupations, energies * ureg.hartree def str_to_parameters(val_in): - val = [v.strip() for v in val_in.split(' ', 1)] + val = [v.strip() for v in val_in.split(" ", 1)] val[1] = val[1].split() return val def str_to_wall_time(val_in): - name, d, h, m, s = val_in.rsplit(' ', 4) - return name.strip(), 24 * 60 * 60 * float(d) + 60 * 60 * float(h) + 60 * float(m) + float(s) + name, d, h, m, s = val_in.rsplit(" ", 4) + return name.strip(), 24 * 60 * 60 * float(d) + 60 * 60 * float( + h + ) + 60 * float(m) + float(s) common_quantities = [ Quantity( - 'setup', - r'SETUP\s*:\s*([\s\S]+?\.+\n *\n)', - sub_parser=TextParser(quantities=[ - Quantity( - 'parameter', - r'\n +\: +(.+?\s{2,}[\w\.\-\+]+)', str_operation=lambda x: [ - v.strip() for v in x.split(' ', 1)], repeats=True - ) - ]) - ), - Quantity( - 'summary', - r'(SUMMARY[\s\S]+?\:\n *\n)', - sub_parser=TextParser(quantities=[ - Quantity( - 'energy_total', - rf':: total energy\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'x_xtb_gradient_norm', - rf':: gradient norm\s*({re_f})', - unit=ureg.hartree / ureg.angstrom, dtype=np.float64), - Quantity( - 'x_xtb_hl_gap', - rf':: HOMO-LUMO gap\s*({re_f})', - unit=ureg.eV, dtype=np.float64), - Quantity( - 'energy_x_xtb_scc', - rf':: SCC energy\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_isotropic_es', - rf':: \-\> isotropic ES\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_anisotropic_es', - rf':: \-\> anisotropic ES\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_anisotropic_xc', - rf':: \-\> anisotropic XC\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_dispersion', - rf':: \-\> dispersion\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_electrostatic', - rf':: \-\> electrostatic\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_repulsion', - rf':: repulsion energy\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_halogen_bond_corr', - rf':: halogen bond corr\.\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'energy_x_xtb_add_restraining', - rf':: repulsion energy\s*({re_f})', - unit=ureg.hartree, dtype=np.float64), - Quantity( - 'charge_total', - rf':: total charge\s*({re_f})', - unit=ureg.elementary_charge, dtype=np.float64 - ) - ]) - ) + "setup", + r"SETUP\s*:\s*([\s\S]+?\.+\n *\n)", + sub_parser=TextParser( + quantities=[ + Quantity( + "parameter", + r"\n +\: +(.+?\s{2,}[\w\.\-\+]+)", + str_operation=lambda x: [ + v.strip() for v in x.split(" ", 1) + ], + repeats=True, + ) + ] + ), + ), + Quantity( + "summary", + r"(SUMMARY[\s\S]+?\:\n *\n)", + sub_parser=TextParser( + quantities=[ + Quantity( + "energy_total", + rf":: total energy\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "x_xtb_gradient_norm", + rf":: gradient norm\s*({re_f})", + unit=ureg.hartree / ureg.angstrom, + dtype=np.float64, + ), + Quantity( + "x_xtb_hl_gap", + rf":: HOMO-LUMO gap\s*({re_f})", + unit=ureg.eV, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_scc", + rf":: SCC energy\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_isotropic_es", + rf":: \-\> isotropic ES\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_anisotropic_es", + rf":: \-\> anisotropic ES\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_anisotropic_xc", + rf":: \-\> anisotropic XC\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_dispersion", + rf":: \-\> dispersion\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_electrostatic", + rf":: \-\> electrostatic\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_repulsion", + rf":: repulsion energy\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_halogen_bond_corr", + rf":: halogen bond corr\.\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "energy_x_xtb_add_restraining", + rf":: repulsion energy\s*({re_f})", + unit=ureg.hartree, + dtype=np.float64, + ), + Quantity( + "charge_total", + rf":: total charge\s*({re_f})", + unit=ureg.elementary_charge, + dtype=np.float64, + ), + ] + ), + ), ] orbital_quantities = [ Quantity( - 'eigenvalues', - r'# +Occupation +Energy.+\s*\-+([\s\S]+?)\-+\n', - str_operation=str_to_eigenvalues), + "eigenvalues", + r"# +Occupation +Energy.+\s*\-+([\s\S]+?)\-+\n", + str_operation=str_to_eigenvalues, + ), Quantity( - 'hl_gap', - rf'HL\-Gap\s*({re_f})', dtype=np.float64, unit=ureg.hartree), + "hl_gap", rf"HL\-Gap\s*({re_f})", dtype=np.float64, unit=ureg.hartree + ), Quantity( - 'energy_fermi', - rf'Fermi\-level\s*({re_f})', dtype=np.float64, unit=ureg.hartree - ) + "energy_fermi", + rf"Fermi\-level\s*({re_f})", + dtype=np.float64, + unit=ureg.hartree, + ), ] property_quantities = orbital_quantities + [ Quantity( - 'dipole', - r'(dipole\:[\s\S]+?)molecular', - sub_parser=TextParser(quantities=[ - Quantity( - 'q', - rf'q only: +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.elementary_charge * ureg.bohr - ), - Quantity( - 'full', - rf'full: +({re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), unit=ureg.elementary_charge * ureg.bohr - ) - ]) + "dipole", + r"(dipole\:[\s\S]+?)molecular", + sub_parser=TextParser( + quantities=[ + Quantity( + "q", + rf"q only: +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.elementary_charge * ureg.bohr, + ), + Quantity( + "full", + rf"full: +({re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + unit=ureg.elementary_charge * ureg.bohr, + ), + ] + ), ), Quantity( - 'quadrupole', - r'(quadrupole \(traceless\):[\s\S]+?)\n *\n', - sub_parser=TextParser(quantities=[ - Quantity( - 'q', - r'q only:(.+)', - dtype=np.dtype(np.float64), unit=ureg.elementary_charge * ureg.bohr ** 2 - ), - Quantity( - 'full', - r'full:(.+)', - dtype=np.dtype(np.float64), unit=ureg.elementary_charge * ureg.bohr ** 2 - ), - Quantity( - 'q_dip', - r'q\+dip:(.+)', - dtype=np.dtype(np.float64), unit=ureg.elementary_charge * ureg.bohr ** 2 - ) - ]) - ) + "quadrupole", + r"(quadrupole \(traceless\):[\s\S]+?)\n *\n", + sub_parser=TextParser( + quantities=[ + Quantity( + "q", + r"q only:(.+)", + dtype=np.dtype(np.float64), + unit=ureg.elementary_charge * ureg.bohr**2, + ), + Quantity( + "full", + r"full:(.+)", + dtype=np.dtype(np.float64), + unit=ureg.elementary_charge * ureg.bohr**2, + ), + Quantity( + "q_dip", + r"q\+dip:(.+)", + dtype=np.dtype(np.float64), + unit=ureg.elementary_charge * ureg.bohr**2, + ), + ] + ), + ), ] geometry_quantities = [ - Quantity('file', r'optimized geometry written to:\s*(\S+)')] - - scf_quantities = common_quantities + orbital_quantities + [ - Quantity( - 'model', - r'((?:G F N \d+ \- x T B.+\s+\-+\s+|Reference)\s*[\s\S]+?\n *\n)', - sub_parser=TextParser(quantities=[ - Quantity('reference', r'Reference\s*(\S+)'), - Quantity( - 'contribution', - r'(\w+:\s*[\s\S]+?)(?:\*|\n *\n)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity('name', r'(\w+):'), - Quantity( - 'parameters', - r'\n +(\w.+? .+)', - str_operation=str_to_parameters, repeats=True - ) - ]) - ) - ]) - ), - Quantity( - 'scf_iteration', - r'iter\s*E\s*dE.+([\s\S]+?convergence.+)', - sub_parser=TextParser(quantities=[ - Quantity('step', r'(\d+ .+)', repeats=True), - Quantity( - 'converged', - r'(\*\*\* convergence criteria.+)', - str_operation=lambda x: 'satisfied' in x - ) - ]) - ), + Quantity("file", r"optimized geometry written to:\s*(\S+)") ] - optimization_quantities = [ - Quantity( - 'cycle', - r'CYCLE +\d([\s\S]+?\n *\n)', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'energy_total', - rf'total energy +: +({re_f}) Eh', - dtype=np.float64, unit=ureg.hartree + scf_quantities = ( + common_quantities + + orbital_quantities + + [ + Quantity( + "model", + r"((?:G F N \d+ \- x T B.+\s+\-+\s+|Reference)\s*[\s\S]+?\n *\n)", + sub_parser=TextParser( + quantities=[ + Quantity("reference", r"Reference\s*(\S+)"), + Quantity( + "contribution", + r"(\w+:\s*[\s\S]+?)(?:\*|\n *\n)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity("name", r"(\w+):"), + Quantity( + "parameters", + r"\n +(\w.+? .+)", + str_operation=str_to_parameters, + repeats=True, + ), + ] + ), + ), + ] ), - Quantity( - 'energy_change', - rf'change +({re_f}) Eh', - dtype=np.float64, unit=ureg.hartree + ), + Quantity( + "scf_iteration", + r"iter\s*E\s*dE.+([\s\S]+?convergence.+)", + sub_parser=TextParser( + quantities=[ + Quantity("step", r"(\d+ .+)", repeats=True), + Quantity( + "converged", + r"(\*\*\* convergence criteria.+)", + str_operation=lambda x: "satisfied" in x, + ), + ] ), - Quantity( - 'scf_iteration', - rf'\.+(\s+\d+\s+{re_f}[\s\S]+?)\*', - sub_parser=TextParser(quantities=[ - Quantity('step', rf'{re_n} +(\d+ +{re_f}.+)', repeats=True), - Quantity('time', rf'SCC iter\. +\.+ +(\d+) min, +({re_f}) sec') - ]) - ) - ]) + ), + ] + ) + + optimization_quantities = [ + Quantity( + "cycle", + r"CYCLE +\d([\s\S]+?\n *\n)", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "energy_total", + rf"total energy +: +({re_f}) Eh", + dtype=np.float64, + unit=ureg.hartree, + ), + Quantity( + "energy_change", + rf"change +({re_f}) Eh", + dtype=np.float64, + unit=ureg.hartree, + ), + Quantity( + "scf_iteration", + rf"\.+(\s+\d+\s+{re_f}[\s\S]+?)\*", + sub_parser=TextParser( + quantities=[ + Quantity( + "step", + rf"{re_n} +(\d+ +{re_f}.+)", + repeats=True, + ), + Quantity( + "time", + rf"SCC iter\. +\.+ +(\d+) min, +({re_f}) sec", + ), + ] + ), + ), + ] + ), ), Quantity( - 'converged', - r'(\*\*\* GEOMETRY OPTIMIZATION.+)', - str_operation=lambda x: 'CONVERGED' in x + "converged", + r"(\*\*\* GEOMETRY OPTIMIZATION.+)", + str_operation=lambda x: "CONVERGED" in x, ), Quantity( - 'final_structure', - r'final structure:([\s\S]+?\-+\s+\|)', - sub_parser=TextParser(quantities=[ - Quantity('atom_labels', r'([A-Z][a-z]?) ', repeats=True), - Quantity( - 'atom_positions', - rf'({re_f} +{re_f} +{re_f})', - unit=ureg.angstrom, dtype=np.dtype(np.float64) - ) - ]) + "final_structure", + r"final structure:([\s\S]+?\-+\s+\|)", + sub_parser=TextParser( + quantities=[ + Quantity("atom_labels", r"([A-Z][a-z]?) ", repeats=True), + Quantity( + "atom_positions", + rf"({re_f} +{re_f} +{re_f})", + unit=ureg.angstrom, + dtype=np.dtype(np.float64), + ), + ] + ), ), Quantity( - 'final_single_point', - r'(Final Singlepoint +\|[\s\S]+?::::::::::::)', - sub_parser=TextParser(quantities=scf_quantities) - ) + "final_single_point", + r"(Final Singlepoint +\|[\s\S]+?::::::::::::)", + sub_parser=TextParser(quantities=scf_quantities), + ), ] + common_quantities md_quantities = [ - Quantity('traj_file', r'trajectories on (.+?\.trj)'), - Quantity('x_xtb_md_time', rf'MD time /ps +: +({re_f})', dtype=np.float64, unit=ureg.ps), - Quantity('timestep', rf'dt /fs +: +({re_f})', dtype=np.float64, unit=ureg.fs), - Quantity('x_xtb_scc_accuracy', rf'SCC accuracy +: +({re_f})', dtype=np.float64), - Quantity('x_xtb_temperature', rf'temperature /K +: +({re_f})', dtype=np.float64, unit=ureg.K), - Quantity('x_xtb_max_steps', rf'max_steps +: +(\d+)', dtype=np.int32), - Quantity('x_xtb_block_length', rf'block length \(av\. \) +: +(\d+)', dtype=np.int32), - Quantity('x_xtb_dumpstep_trj', rf'dumpstep\(trj\) /fs +: +({re_f})', dtype=np.float64), - Quantity('x_xtb_dumpstep_coords', rf'dumpstep\(coords\) /fs +: +({re_f})', dtype=np.float64), - Quantity('x_xtb_h_atoms_mass', rf'H atoms mass \(amu\) +: +(\d+)', dtype=np.float64, unit=ureg.amu), - Quantity('x_xtb_n_degrees_freedom', rf' +: +(\d+)', dtype=np.float64), - Quantity('x_xtb_shake_bonds', rf'SHAKE on\. # bonds +: +(\d+)', dtype=np.float64), - Quantity('x_xtb_berendsen', rf'Berendsen THERMOSTAT (\S+)', str_operation=lambda x: x == 'on'), - Quantity( - 'cycle', - rf'{re_n} +(\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})', - dtype=np.dtype(np.float64), repeats=True - ) + Quantity("traj_file", r"trajectories on (.+?\.trj)"), + Quantity( + "x_xtb_md_time", + rf"MD time /ps +: +({re_f})", + dtype=np.float64, + unit=ureg.ps, + ), + Quantity( + "timestep", rf"dt /fs +: +({re_f})", dtype=np.float64, unit=ureg.fs + ), + Quantity( + "x_xtb_scc_accuracy", rf"SCC accuracy +: +({re_f})", dtype=np.float64 + ), + Quantity( + "x_xtb_temperature", + rf"temperature /K +: +({re_f})", + dtype=np.float64, + unit=ureg.K, + ), + Quantity("x_xtb_max_steps", rf"max_steps +: +(\d+)", dtype=np.int32), + Quantity( + "x_xtb_block_length", + rf"block length \(av\. \) +: +(\d+)", + dtype=np.int32, + ), + Quantity( + "x_xtb_dumpstep_trj", + rf"dumpstep\(trj\) /fs +: +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_xtb_dumpstep_coords", + rf"dumpstep\(coords\) /fs +: +({re_f})", + dtype=np.float64, + ), + Quantity( + "x_xtb_h_atoms_mass", + rf"H atoms mass \(amu\) +: +(\d+)", + dtype=np.float64, + unit=ureg.amu, + ), + Quantity("x_xtb_n_degrees_freedom", rf" +: +(\d+)", dtype=np.float64), + Quantity( + "x_xtb_shake_bonds", rf"SHAKE on\. # bonds +: +(\d+)", dtype=np.float64 + ), + Quantity( + "x_xtb_berendsen", + rf"Berendsen THERMOSTAT (\S+)", + str_operation=lambda x: x == "on", + ), + Quantity( + "cycle", + rf"{re_n} +(\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})", + dtype=np.dtype(np.float64), + repeats=True, + ), ] self._quantities = [ - Quantity('program_version', r'\* xtb version ([\d\.]+)'), + Quantity("program_version", r"\* xtb version ([\d\.]+)"), Quantity( - 'date_start', - r'started run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)', - dtype=str, flatten=False + "date_start", + r"started run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)", + dtype=str, + flatten=False, ), Quantity( - 'date_end', - r'finished run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)', - dtype=str, flatten=False + "date_end", + r"finished run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)", + dtype=str, + flatten=False, ), Quantity( - 'calculation_setup', - r'Calculation Setup +\|\s*\-+\s*([\s\S]+?)\-+\s+\|', - sub_parser=TextParser(quantities=[ - Quantity( - 'parameter', r'([\w ]+:.+)', - str_operation=lambda x: [v.strip() for v in x.split(':')], repeats=True - ) - ]) + "calculation_setup", + r"Calculation Setup +\|\s*\-+\s*([\s\S]+?)\-+\s+\|", + sub_parser=TextParser( + quantities=[ + Quantity( + "parameter", + r"([\w ]+:.+)", + str_operation=lambda x: [v.strip() for v in x.split(":")], + repeats=True, + ) + ] + ), ), Quantity( - 'gfnff', - r'(G F N - F F[\s\S]+?::::::::::::\n *\n)', - sub_parser=TextParser(quantities=scf_quantities) + "gfnff", + r"(G F N - F F[\s\S]+?::::::::::::\n *\n)", + sub_parser=TextParser(quantities=scf_quantities), ), Quantity( - 'gfn1', - r'(G F N 1 - x T B[\s\S]+?::::::::::::\n *\n)', - sub_parser=TextParser(quantities=scf_quantities) + "gfn1", + r"(G F N 1 - x T B[\s\S]+?::::::::::::\n *\n)", + sub_parser=TextParser(quantities=scf_quantities), ), Quantity( - 'gfn2', - r'(G F N 2 - x T B[\s\S]+?::::::::::::\n *\n)', - sub_parser=TextParser(quantities=scf_quantities) + "gfn2", + r"(G F N 2 - x T B[\s\S]+?::::::::::::\n *\n)", + sub_parser=TextParser(quantities=scf_quantities), ), Quantity( - 'ancopt', - r'(A N C O P T +\|[\s\S]+?::::::::::::\n *\n)', - sub_parser=TextParser(quantities=optimization_quantities) + "ancopt", + r"(A N C O P T +\|[\s\S]+?::::::::::::\n *\n)", + sub_parser=TextParser(quantities=optimization_quantities), ), Quantity( - 'md', - r'(Molecular Dynamics +\|[\s\S]+?exit of md)', - sub_parser=TextParser(quantities=md_quantities) + "md", + r"(Molecular Dynamics +\|[\s\S]+?exit of md)", + sub_parser=TextParser(quantities=md_quantities), ), Quantity( - 'property', - r'(Property Printout +\|[\s\S]+?\-+\s+\|)', - sub_parser=TextParser(quantities=property_quantities) + "property", + r"(Property Printout +\|[\s\S]+?\-+\s+\|)", + sub_parser=TextParser(quantities=property_quantities), ), Quantity( - 'geometry', - r'(Geometry Summary +\|[\s\S]+?\-+\s+\|)', - sub_parser=TextParser(quantities=geometry_quantities) + "geometry", + r"(Geometry Summary +\|[\s\S]+?\-+\s+\|)", + sub_parser=TextParser(quantities=geometry_quantities), ), Quantity( - 'energy_total', rf'\| TOTAL ENERGY\s*({re_f})', - dtype=np.float64, unit=ureg.hartree + "energy_total", + rf"\| TOTAL ENERGY\s*({re_f})", + dtype=np.float64, + unit=ureg.hartree, ), Quantity( - 'gradient_norm', - rf'\| GRADIENT NORM\s*({re_f})', - dtype=np.float64, unit=ureg.hartree / ureg.angstrom + "gradient_norm", + rf"\| GRADIENT NORM\s*({re_f})", + dtype=np.float64, + unit=ureg.hartree / ureg.angstrom, ), Quantity( - 'hl_gap', - rf'\| HOMO-LUMO GAP\s*({re_f})', - dtype=np.float64, unit=ureg.eV + "hl_gap", + rf"\| HOMO-LUMO GAP\s*({re_f})", + dtype=np.float64, + unit=ureg.eV, ), - Quantity( - 'topo_file', - r'Writing topology from bond orders to (.+\.mol)' + Quantity("topo_file", r"Writing topology from bond orders to (.+\.mol)"), + Quantity( + "footer", + r"(\* finished run on [\s\S]+?\Z)", + sub_parser=TextParser( + quantities=[ + Quantity( + "end_time", r"finished run on (\S+) at (\S+)", flatten=False + ), + Quantity( + "wall_time", + r"(.+):\s+\* +wall-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec", + repeats=True, + str_operation=str_to_wall_time, + ), + Quantity( + "cpu_time", + r"\* +cpu-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec", + repeats=True, + ), + ] + ), ), - Quantity( - 'footer', - r'(\* finished run on [\s\S]+?\Z)', - sub_parser=TextParser(quantities=[ - Quantity( - 'end_time', - r'finished run on (\S+) at (\S+)', flatten=False - ), - Quantity( - 'wall_time', - r'(.+):\s+\* +wall-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec', - repeats=True, str_operation=str_to_wall_time - ), - Quantity( - 'cpu_time', - r'\* +cpu-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec', - repeats=True - ) - ]) - ) ] def get_time(self, section=None, index=0): start_time = 0 section_index = 0 - for time in self.get('footer', {}).get('wall_time', []): + for time in self.get("footer", {}).get("wall_time", []): if time[0] == section or section is None: if index == section_index: return start_time, time[1] section_index += 1 - if time[0] != 'total': + if time[0] != "total": start_time += time[1] return start_time, None @@ -417,57 +548,64 @@ def __init__(self): super().__init__() def init_quantities(self): - re_f = r'[\d\.\-]+' + re_f = r"[\d\.\-]+" self._quantities = [ - Quantity('coord_unit', r'\$coord(.+)'), + Quantity("coord_unit", r"\$coord(.+)"), Quantity( - 'positions_labels', - rf'({re_f} +{re_f} +{re_f} +[A-Za-z]+\s+)', repeats=True + "positions_labels", + rf"({re_f} +{re_f} +{re_f} +[A-Za-z]+\s+)", + repeats=True, ), - Quantity('periodic', r'\$periodic(.+)'), - Quantity('lattice_unit', r'\$lattice(.+)'), + Quantity("periodic", r"\$periodic(.+)"), + Quantity("lattice_unit", r"\$lattice(.+)"), Quantity( - 'lattice', - rf'({re_f} +{re_f} +{re_f}) *\n', repeats=True, dtype=np.dtype(np.float64) + "lattice", + rf"({re_f} +{re_f} +{re_f}) *\n", + repeats=True, + dtype=np.dtype(np.float64), ), - Quantity('cell_unit', r'\$cell(.+)'), + Quantity("cell_unit", r"\$cell(.+)"), Quantity( - 'cell', - rf'({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}) *\n', - dtype=np.dtype(np.float64) - ) + "cell", + rf"({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}) *\n", + dtype=np.dtype(np.float64), + ), ] def get_atoms(self): - positions = self.get('positions_labels') + positions = self.get("positions_labels") if positions is None: return - lattice_unit = self.get('lattice_unit', '').strip() - lattice_unit = ureg.angstrom if lattice_unit.startswith('angs') else ureg.bohr - lattice = self.get('lattice') - lattice = (lattice * lattice_unit).to('angstrom').magnitude if lattice is not None else lattice + lattice_unit = self.get("lattice_unit", "").strip() + lattice_unit = ureg.angstrom if lattice_unit.startswith("angs") else ureg.bohr + lattice = self.get("lattice") + lattice = ( + (lattice * lattice_unit).to("angstrom").magnitude + if lattice is not None + else lattice + ) - cell = self.get('cell') + cell = self.get("cell") if cell is not None: - cell_unit = self.get('cell_unit') + cell_unit = self.get("cell_unit") cell_unit = ureg.angstrom if cell_unit is not None else ureg.bohr - cell_abc = (cell[:3] * cell_unit).to('angstrom').magnitude + cell_abc = (cell[:3] * cell_unit).to("angstrom").magnitude lattice = list(cell_abc) + list(cell[3:]) labels = [p[-1].title() for p in positions] positions = [p[:3] for p in positions] - coord_unit = self.get('coord_unit', '').strip() - if coord_unit.startswith('frac') and lattice is not None: + coord_unit = self.get("coord_unit", "").strip() + if coord_unit.startswith("frac") and lattice is not None: positions = np.dot(positions, lattice) - elif coord_unit.startswith('angs'): + elif coord_unit.startswith("angs"): positions = positions * ureg.angstrom else: positions = positions * ureg.bohr - positions = positions.to('angstrom').magnitude + positions = positions.to("angstrom").magnitude - pbc = ([True] * int(self.get('periodic', 0))) + [False] * 3 + pbc = ([True] * int(self.get("periodic", 0))) + [False] * 3 return aseAtoms(symbols=labels, positions=positions, cell=lattice, pbc=pbc[:3]) @@ -477,29 +615,33 @@ def __init__(self): super().__init__() def init_quantities(self): - re_f = r'[\d\.\-]+' + re_f = r"[\d\.\-]+" self._quantities = [ Quantity( - 'frame', - r'energy\:([\s\S]+?(?:\Z|\n *\d+ *\n))', - repeats=True, sub_parser=TextParser(quantities=[ - Quantity( - 'positions', - rf'({re_f} +{re_f} +{re_f})', - repeats=True, dtype=np.dtype(np.float64) - ), - Quantity('labels', r'\n *([A-Za-z]{1,2}) +', repeats=True) - ]) + "frame", + r"energy\:([\s\S]+?(?:\Z|\n *\d+ *\n))", + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + "positions", + rf"({re_f} +{re_f} +{re_f})", + repeats=True, + dtype=np.dtype(np.float64), + ), + Quantity("labels", r"\n *([A-Za-z]{1,2}) +", repeats=True), + ] + ), ) ] def get_atoms(self, n_frame): - frames = self.get('frame', []) + frames = self.get("frame", []) if n_frame >= len(frames): return - frame = self.get('frame')[n_frame] - labels = [label.title() for label in frame.get('labels', [])] + frame = self.get("frame")[n_frame] + labels = [label.title() for label in frame.get("labels", [])] # TODO verify if trajectory positions are always printed out in angstroms return aseAtoms(symbols=labels, positions=frame.positions) @@ -511,24 +653,22 @@ def __init__(self): self.traj_parser = TrajParser() self.calculation_type = None self._metainfo_map = { - 'optimization level': 'optimization_level', 'max. optcycles': 'max_opt_cycles', - 'ANC micro-cycles': 'anc_micro_cycles', 'degrees of freedom': 'n_degrees_freedom', - 'RF solver': 'rf_solver', 'linear?': 'linear', 'Hlow (freq-cutoff)': 'hlow', - 'Hmax (freq-cutoff)': 'hmax', 'S6 in model hess.': 's6' + "optimization level": "optimization_level", + "max. optcycles": "max_opt_cycles", + "ANC micro-cycles": "anc_micro_cycles", + "degrees of freedom": "n_degrees_freedom", + "RF solver": "rf_solver", + "linear?": "linear", + "Hlow (freq-cutoff)": "hlow", + "Hmax (freq-cutoff)": "hmax", + "S6 in model hess.": "s6", } super().__init__() - def init_parser(self): - self.out_parser.mainfile = self.filepath - self.out_parser.logger = self.logger - self.coord_parser.logger = self.logger - self.traj_parser.logger = self.logger - self.calculation_type = None - def parse_system(self, source): if isinstance(source, int): atoms = self.traj_parser.get_atoms(source) - elif source.endswith('.xyz') or source.endswith('.poscar'): + elif source.endswith(".xyz") or source.endswith(".poscar"): atoms = aseread(os.path.join(self.maindir, source)) else: self.coord_parser.mainfile = os.path.join(self.maindir, source) @@ -560,58 +700,67 @@ def parse_calculation(self, source): sec_energy.change = source.energy_change # scf - for step in source.get('scf_iteration', {}).get('step', []): + for step in source.get("scf_iteration", {}).get("step", []): sec_scf = ScfIteration() sec_calc.scf_iteration.append(sec_scf) sec_scf.energy = Energy( total=EnergyEntry(value=step[1] * ureg.hartree), - change=step[2] * ureg.hartree + change=step[2] * ureg.hartree, ) # summary of calculated properties - summary = source.get('summary', {}) + summary = source.get("summary", {}) for key, val in summary.items(): - if key.startswith('energy_') and val is not None: - setattr(sec_energy, key.replace('energy_', ''), EnergyEntry(value=val)) + if key.startswith("energy_") and val is not None: + setattr(sec_energy, key.replace("energy_", ""), EnergyEntry(value=val)) # eigenvalues if source.eigenvalues is not None: sec_eigs = BandEnergies() sec_calc.eigenvalues.append(sec_eigs) - sec_eigs.occupations = np.reshape(source.eigenvalues[0], (1, 1, len(source.eigenvalues[0]))) - sec_eigs.energies = np.reshape(source.eigenvalues[1], (1, 1, len(source.eigenvalues[1]))) + sec_eigs.occupations = np.reshape( + source.eigenvalues[0], (1, 1, len(source.eigenvalues[0])) + ) + sec_eigs.energies = np.reshape( + source.eigenvalues[1], (1, 1, len(source.eigenvalues[1])) + ) sec_eigs.kpoints = np.zeros((1, 3)) return sec_calc def parse_method(self, section): - model = self.out_parser.get(section, {}).get('model') + model = self.out_parser.get(section, {}).get("model") if model is None: return sec_method = Method() self.archive.run[-1].method.append(sec_method) - parameters = {p[0]: p[1] for p in self.out_parser.get(section, {}).get('setup', {}).get('parameter', [])} + parameters = { + p[0]: p[1] + for p in self.out_parser.get(section, {}) + .get("setup", {}) + .get("parameter", []) + } sec_tb = TB() sec_method.tb = sec_tb - sec_tb.name = 'xTB' + sec_tb.name = "xTB" sec_tb.x_xtb_setup = parameters sec_xtb = xTB() sec_tb.xtb = sec_xtb sec_xtb.name = section - if model.get('reference') is not None: + if model.get("reference") is not None: sec_xtb.reference = model.reference - for contribution in model.get('contribution', []): + for contribution in model.get("contribution", []): name = contribution.name.lower() - if name == 'hamiltonian': + if name == "hamiltonian": sec_interaction = Interaction() sec_xtb.hamiltonian.append(sec_interaction) - elif name == 'coulomb': + elif name == "coulomb": sec_interaction = Interaction() sec_xtb.coulomb.append(sec_interaction) - elif name == 'repulsion': + elif name == "repulsion": sec_interaction = Interaction() sec_xtb.repulsion.append(sec_interaction) else: @@ -619,7 +768,9 @@ def parse_method(self, section): sec_xtb.contributions.append(sec_interaction) sec_interaction.type = name sec_interaction.parameters = { - p[0]: p[1].tolist() if isinstance(p[1], np.ndarray) else p[1] for p in contribution.parameters} + p[0]: p[1].tolist() if isinstance(p[1], np.ndarray) else p[1] + for p in contribution.parameters + } def parse_single_point(self, source, section): if source is None: @@ -627,10 +778,12 @@ def parse_single_point(self, source, section): total_time = None # determine file extension of input structure file - coord_file = self.archive.run[-1].x_xtb_calculation_setup.get('coordinate file', 'coord') - if section == 'final_single_point': - extension = 'coord' if coord_file == 'coord' else coord_file.split('.')[-1] - coord_file = f'xtbopt.{extension}' + coord_file = self.archive.run[-1].x_xtb_calculation_setup.get( + "coordinate file", "coord" + ) + if section == "final_single_point": + extension = "coord" if coord_file == "coord" else coord_file.split(".")[-1] + coord_file = f"xtbopt.{extension}" else: self._run_index += 1 start_time, total_time = self.out_parser.get_time(index=self._run_index) @@ -656,11 +809,15 @@ def parse_opt(self, section): self._run_index += 1 - start_time, total_time = self.out_parser.get_time(section='ANC optimizer') - time_per_step = total_time / (len(module.get('cycle')) + 1) if total_time is not None else None - self.traj_parser.mainfile = os.path.join(self.maindir, 'xtbopt.log') + start_time, total_time = self.out_parser.get_time(section="ANC optimizer") + time_per_step = ( + total_time / (len(module.get("cycle")) + 1) + if total_time is not None + else None + ) + self.traj_parser.mainfile = os.path.join(self.maindir, "xtbopt.log") - for n, cycle in enumerate(module.get('cycle', [])): + for n, cycle in enumerate(module.get("cycle", [])): self.parse_system(n) sec_scc = self.parse_calculation(cycle) if sec_scc is not None and time_per_step is not None: @@ -668,23 +825,33 @@ def parse_opt(self, section): sec_scc.time_calculation = time_per_step # final single point - sec_scc = self.parse_single_point(module.get('final_single_point'), 'final_single_point') + sec_scc = self.parse_single_point( + module.get("final_single_point"), "final_single_point" + ) if sec_scc is not None and time_per_step is not None: - sec_scc.time_physical = start_time + time_per_step * (len(module.get('cycle', [])) + 1) + sec_scc.time_physical = start_time + time_per_step * ( + len(module.get("cycle", [])) + 1 + ) sec_scc.time_calculation = time_per_step # workflow parameters workflow = GeometryOptimization(method=GeometryOptimizationMethod()) - for key, val in module.get('setup', {}).get('parameter', []): + for key, val in module.get("setup", {}).get("parameter", []): name = self._metainfo_map.get(key) - if key == 'energy convergence': - workflow.method.convergence_tolerance_energy_difference = val * ureg.hartree - elif key == 'grad. convergence': - workflow.method.convergence_tolerance_force_maximum = val * ureg.hartree / ureg.bohr - elif key == 'maximium RF displ.': - workflow.method.convergence_tolerance_displacement_maximum = val * ureg.bohr + if key == "energy convergence": + workflow.method.convergence_tolerance_energy_difference = ( + val * ureg.hartree + ) + elif key == "grad. convergence": + workflow.method.convergence_tolerance_force_maximum = ( + val * ureg.hartree / ureg.bohr + ) + elif key == "maximium RF displ.": + workflow.method.convergence_tolerance_displacement_maximum = ( + val * ureg.bohr + ) elif name is not None: - setattr(workflow, f'x_xtb_{name}', val) + setattr(workflow, f"x_xtb_{name}", val) self.archive.workflow2 = workflow def parse_md(self, section): @@ -692,77 +859,103 @@ def parse_md(self, section): if module is None: return - self.traj_parser.mainfile = os.path.join(self.maindir, 'xtb.trj') + self.traj_parser.mainfile = os.path.join(self.maindir, "xtb.trj") # get trj dump frequency to determine which frame to parse in trajectory file - trj_freq = module.get('x_xtb_dumpstep_trj', 1) + trj_freq = module.get("x_xtb_dumpstep_trj", 1) - traj_steps = [n * int(trj_freq) for n in range(len(self.traj_parser.get('frame', [])))] - self.n_atoms = self.archive.run[-1].x_xtb_calculation_setup.get('number of atoms', 0) + traj_steps = [ + n * int(trj_freq) for n in range(len(self.traj_parser.get("frame", []))) + ] + self.n_atoms = self.archive.run[-1].x_xtb_calculation_setup.get( + "number of atoms", 0 + ) self.trajectory_steps = [-1] + traj_steps - self.thermodynamics_steps = [int(cycle[0]) for cycle in module.get('cycle', [])] + self.thermodynamics_steps = [int(cycle[0]) for cycle in module.get("cycle", [])] for step in self.trajectory_steps: if step < 0: continue atoms = self.traj_parser.get_atoms(traj_steps.index(step)) - data = dict(labels=atoms.get_chemical_symbols(), positions=atoms.get_positions() * ureg.angstrom) + data = dict( + labels=atoms.get_chemical_symbols(), + positions=atoms.get_positions() * ureg.angstrom, + ) lattice_vectors = np.array(atoms.get_cell()) if np.count_nonzero(lattice_vectors) > 0: - data['lattice_vectors'] = lattice_vectors * ureg.angstrom + data["lattice_vectors"] = lattice_vectors * ureg.angstrom self.parse_trajectory_step(dict(atoms=data)) - time_start, time_calc = self.out_parser.get_time(section='MD') - time_step = time_calc / (max(self.thermodynamics_steps) + 1) if time_calc is not None else None + time_start, time_calc = self.out_parser.get_time(section="MD") + time_step = ( + time_calc / (max(self.thermodynamics_steps) + 1) + if time_calc is not None + else None + ) for n_frame, step in enumerate(self.thermodynamics_steps): - cycle = module.get('cycle')[n_frame] + cycle = module.get("cycle")[n_frame] data = dict( - step=step, time_physical=cycle[1] * ureg.ps, energy=dict(total=dict( - potential=cycle[2] * ureg.hartree, kinetic=cycle[3] * ureg.hartree, - value=cycle[6] * ureg.hartree)), - temperature=cycle[5] * ureg.kelvin) + step=step, + time_physical=cycle[1] * ureg.ps, + energy=dict( + total=dict( + potential=cycle[2] * ureg.hartree, + kinetic=cycle[3] * ureg.hartree, + value=cycle[6] * ureg.hartree, + ) + ), + temperature=cycle[5] * ureg.kelvin, + ) if time_step is not None: - data['time_physical'] = time_start + time_step * (step + 1) - data['time_calculation'] = time_step + data["time_physical"] = time_start + time_step * (step + 1) + data["time_calculation"] = time_step self.parse_thermodynamics_step(data) # workflow parameters - self.parse_md_workflow({key: val for key, val in module.items() if key.startswith('x_xtb')}) + self.parse_md_workflow( + {key: val for key, val in module.items() if key.startswith("x_xtb")} + ) - def parse(self, filepath, archive, logger): - self.filepath = os.path.abspath(filepath) - self.archive = archive - self.maindir = os.path.dirname(self.filepath) - self.logger = logger if logger is not None else logging - - self.init_parser() + def write_to_archive(self) -> None: + self.out_parser.mainfile = self.mainfile + self.out_parser.logger = self.logger + self.coord_parser.logger = self.logger + self.traj_parser.logger = self.logger + self.calculation_type = None + self.maindir = os.path.dirname(self.mainfile) self._run_index = 0 # run parameters sec_run = Run() self.archive.run.append(sec_run) - sec_run.program = Program(name='xTB', version=self.out_parser.get('program_version')) + sec_run.program = Program( + name="xTB", version=self.out_parser.get("program_version") + ) sec_run.x_xtb_calculation_setup = { - p[0]: p[1] for p in self.out_parser.get('calculation_setup', {}).get('parameter', []) + p[0]: p[1] + for p in self.out_parser.get("calculation_setup", {}).get("parameter", []) } if self.out_parser.date_start is not None: - sec_run.time_run = TimeRun(date_start=datetime.strptime( - self.out_parser.date_start, '%Y/%m/%d %H:%M:%S.%f').timestamp() + sec_run.time_run = TimeRun( + date_start=datetime.strptime( + self.out_parser.date_start, "%Y/%m/%d %H:%M:%S.%f" + ).timestamp() ) if self.out_parser.date_end is not None: sec_run.time_run.date_end = datetime.strptime( - self.out_parser.date_end, '%Y/%m/%d %H:%M:%S.%f').timestamp() + self.out_parser.date_end, "%Y/%m/%d %H:%M:%S.%f" + ).timestamp() # modules - self.parse_gfn('gfnff') - self.parse_gfn('gfn1') - self.parse_gfn('gfn2') - self.parse_opt('ancopt') - self.parse_md('md') + self.parse_gfn("gfnff") + self.parse_gfn("gfn1") + self.parse_gfn("gfn2") + self.parse_opt("ancopt") + self.parse_md("md") # output properties - properties = self.out_parser.get('property') + properties = self.out_parser.get("property") if properties.dipole is not None: if sec_run.calculation: sec_calc = sec_run.calculation[-1] @@ -772,13 +965,15 @@ def parse(self, filepath, archive, logger): sec_multipoles = Multipoles() sec_calc.multipoles.append(sec_multipoles) sec_multipoles.dipole = MultipolesEntry( - total=properties.dipole.full.to('C * m').magnitude, - x_xtb_q_only=properties.dipole.q.to('C * m').magnitude + total=properties.dipole.full.to("C * m").magnitude, + x_xtb_q_only=properties.dipole.q.to("C * m").magnitude, ) if properties.quadrupole is not None: sec_multipoles.quadrupole = MultipolesEntry( - total=properties.quadrupole.full.to('C * m**2').magnitude, - x_xtb_q_only=properties.quadrupole.q.to('C * m**2').magnitude, - x_xtb_q_plus_dip=properties.quadrupole.q_dip.to('C * m**2').magnitude + total=properties.quadrupole.full.to("C * m**2").magnitude, + x_xtb_q_only=properties.quadrupole.q.to("C * m**2").magnitude, + x_xtb_q_plus_dip=properties.quadrupole.q_dip.to( + "C * m**2" + ).magnitude, ) # TODO implement vibrational properties