From e3cce49a5caf34ccbed340fe0da2875d0b8c40fb Mon Sep 17 00:00:00 2001 From: Sean Kavanagh Date: Sat, 2 Nov 2024 16:33:17 -0400 Subject: [PATCH] Use `doped` structure matching (~2 order of magnitude speed increase) --- README.md | 4 +- docs/index.rst | 6 +- shakenbreak/analysis.py | 149 +++++---------------- shakenbreak/energy_lowering_distortions.py | 65 ++++----- shakenbreak/input.py | 7 +- tests/test_analysis.py | 41 +++--- tests/test_cli.py | 16 ++- tests/test_energy_lowering_distortions.py | 28 +++- tests/test_input.py | 21 --- tests/test_io.py | 26 +++- 10 files changed, 146 insertions(+), 217 deletions(-) diff --git a/README.md b/README.md index 10f57e69..b6662b68 100644 --- a/README.md +++ b/README.md @@ -135,13 +135,13 @@ Automatic testing is run on the master and develop branches using Github Actions - B. E. Murdock et al. **_Li-Site Defects Induce Formation of Li-Rich Impurity Phases: Implications for Charge Distribution and Performance of LiNi0.5-xMxMn1.5O4 Cathodes (M = Fe and Mg; x = 0.05–0.2)_** [_Advanced Materials_](https://doi.org/10.1002/adma.202400343) 2024 - Y. Fu & H. Lohan et al. **_Factors Enabling Delocalized Charge-Carriers in Pnictogen-Based Solar Absorbers: In-depth Investigation into CuSbSe2_** [_arXiv_](https://doi.org/10.48550/arXiv.2401.02257) 2024 -- S. Hachmioune et al. **_Exploring the Thermoelectric Potential of MgB4: Electronic Band Structure, Transport Properties, and Defect Chemistry_** [_Chemistry of Materials_](https://doi.org/10.1021/acs.chemmater.4c00584) 2024 -- J. Hu et al. **_Enabling ionic transport in Li3AlP2 the roles of defects and disorder_** [_ChemRxiv_](https://doi.org/10.26434/chemrxiv-2024-3s0kh) 2024 - A. G. Squires et al. **_Oxygen dimerization as a defect-driven process in bulk LiNiO22_** [_ACS Energy Letters_](https://pubs.acs.org/doi/10.1021/acsenergylett.4c01307) 2024 - X. Wang et al. **_Upper efficiency limit of Sb2Se3 solar cells_** [_Joule_](https://doi.org/10.1016/j.joule.2024.05.004) 2024 - I. Mosquera-Lois et al. **_Machine-learning structural reconstructions for accelerated point defect calculations_** [_npj Computational Materials_](https://doi.org/10.1038/s41524-024-01303-9) 2024 - S. R. Kavanagh et al. **_doped: Python toolkit for robust and repeatable charged defect supercell calculations_** [_Journal of Open Source Software_](https://doi.org/10.21105/joss.06433) 2024 - K. Li et al. **_Computational Prediction of an Antimony-based n-type Transparent Conducting Oxide: F-doped Sb2O5_** [_Chemistry of Materials_](https://doi.org/10.1021/acs.chemmater.3c03257) 2024 +- S. Hachmioune et al. **_Exploring the Thermoelectric Potential of MgB4: Electronic Band Structure, Transport Properties, and Defect Chemistry_** [_Chemistry of Materials_](https://doi.org/10.1021/acs.chemmater.4c00584) 2024 +- J. Hu et al. **_Enabling ionic transport in Li3AlP2 the roles of defects and disorder_** [_ChemRxiv_](https://doi.org/10.26434/chemrxiv-2024-3s0kh) 2024 - X. Wang et al. **_Four-electron negative-U vacancy defects in antimony selenide_** [_Physical Review B_](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.108.134102) 2023 - Y. Kumagai et al. **_Alkali Mono-Pnictides: A New Class of Photovoltaic Materials by Element Mutation_** [_PRX Energy_](http://dx.doi.org/10.1103/PRXEnergy.2.043002) 2023 - A. T. J. Nicolson et al. **_Cu2SiSe3 as a promising solar absorber: harnessing cation dissimilarity to avoid killer antisites_** [_Journal of Materials Chemistry A_](https://doi.org/10.1039/D3TA02429F) 2023 diff --git a/docs/index.rst b/docs/index.rst index 3d7fc288..785a9772 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -218,13 +218,13 @@ Studies using ``ShakeNBreak`` - Z\. Yuan & G. Hautier **First-principles study of defects and doping limits in CaO** `Applied Physics Letters `_ 2024 - B\. E. Murdock et al. **Li-Site Defects Induce Formation of Li-Rich Impurity Phases: Implications for Charge Distribution and Performance of LiNi** :sub:`0.5-x` **M** :sub:`x` **Mn** :sub:`1.5` **O₄ Cathodes (M = Fe and Mg; x = 0.05–0.2)** `Advanced Materials `_ 2024 - A\. G. Squires et al. **Oxygen dimerization as a defect-driven process in bulk LiNiO₂** `ACS Energy Letters `__ 2024 -- Y\. Fu & H. Lohan et al. **Factors Enabling Delocalized Charge-Carriers in Pnictogen-Based Solar Absorbers: In-depth Investigation into CuSbSe2** `arXiv `_ 2024 -- S\. Hachmioune et al. **Exploring the Thermoelectric Potential of MgB₄: Electronic Band Structure, Transport Properties, and Defect Chemistry** `Chemistry of Materials `_ 2024 -- J\. Hu et al. **Enabling ionic transport in Li₃AlP₂ the roles of defects and disorder** `ChemRxiv `_ 2024 +- Y\. Fu & H. Lohan et al. **Factors Enabling Delocalized Charge-Carriers in Pnictogen-Based Solar Absorbers: In-depth Investigation into CuSbSe₂** `arXiv `_ 2024 - X\. Wang et al. **Upper efficiency limit of Sb₂Se₃ solar cells** `Joule `_ 2024 - I\. Mosquera-Lois et al. **Machine-learning structural reconstructions for accelerated point defect calculations** `npj Computational Materials `_ 2024 - S\. R. Kavanagh et al. **doped: Python toolkit for robust and repeatable charged defect supercell calculations** `Journal of Open Source Software `_ 2024 - K\. Li et al. **Computational Prediction of an Antimony-based n-type Transparent Conducting Oxide: F-doped Sb₂O₅** `Chemistry of Materials `_ 2024 +- S\. Hachmioune et al. **Exploring the Thermoelectric Potential of MgB₄: Electronic Band Structure, Transport Properties, and Defect Chemistry** `Chemistry of Materials `_ 2024 +- J\. Hu et al. **Enabling ionic transport in Li₃AlP₂ the roles of defects and disorder** `ChemRxiv `_ 2024 - X\. Wang et al. **Four-electron negative-U vacancy defects in antimony selenide** `Physical Review B `_ 2023 - Y\. Kumagai et al. **Alkali Mono-Pnictides: A New Class of Photovoltaic Materials by Element Mutation** `PRX Energy `__ 2023 - J\. Willis, K. B. Spooner, D. O. Scanlon. **On the possibility of p-type doping in barium stannate** `Applied Physics Letters `__ 2023 diff --git a/shakenbreak/analysis.py b/shakenbreak/analysis.py index 506d51bb..ccc993b2 100644 --- a/shakenbreak/analysis.py +++ b/shakenbreak/analysis.py @@ -12,10 +12,10 @@ import numpy as np import pandas as pd +from doped.utils.configurations import _scan_sm_stol_till_match from doped.utils.parsing import get_outcar from monty.serialization import loadfn from pymatgen.analysis.local_env import CrystalNN -from pymatgen.analysis.structure_matcher import StructureMatcher from pymatgen.core.composition import Composition, Element from pymatgen.core.structure import IStructure, PeriodicSite, Structure from pymatgen.io.vasp.outputs import Outcar @@ -545,80 +545,48 @@ def get_energies( def _cached_calculate_atomic_disp( struct1: Structure, struct2: Structure, - stol: float = 0.5, - ltol: float = 0.3, - angle_tol: float = 5, -) -> tuple: - """ - ``_calculate_atomic_disp`` but with lru-caching. - - Should only really be used internally in ``ShakeNBreak`` - as it relies on the ``Structure`` hashes, which in ``pymatgen`` - is just the composition which is unusable here, but this is - monkey-patched in ``calculate_struct_comparison`` below for - fast internal usage. - """ - return _calculate_atomic_disp(struct1, struct2, stol, ltol, angle_tol) - - -def _calculate_atomic_disp( - struct1: Structure, - struct2: Structure, - stol: float = 0.5, - ltol: float = 0.3, - angle_tol: float = 5, + **sm_kwargs, ) -> tuple: """ Calculate root-mean-square displacement and atomic displacements, normalized by the free length per atom ((Vol/Nsites)^(1/3)) between two structures. + Should only really be used internally in ``ShakeNBreak`` + as the caching in this function relies on the ``Structure`` hashes, + which in ``pymatgen`` is just the composition which is unusable here, + but this is monkey-patched in ``calculate_struct_comparison`` in + ``shakenbreak.analysis`` for fast internal usage. + Args: struct1 (:obj:`Structure`): Structure to compare to struct2. struct2 (:obj:`Structure`): Structure to compare to struct1. - stol (:obj:`float`): - Site tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`), as a fraction of the - average free length per atom := ( V / Nsites ) ** (1/3). If - output contains too many 'NaN' values, this likely needs to - be increased. - (Default: 0.5) - ltol (:obj:`float`): - Length tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`). - (Default: 0.3) - angle_tol (:obj:`float`): - Angle tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`). - (Default: 5) + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`tuple`: Tuple of normalized root mean squared displacements and normalized displacements between the two structures. """ - # StructureMatcher._cart_dists() is the performance bottleneck for large supercells here. It could + # ``StructureMatcher._cart_dists()`` is the performance bottleneck for large supercells here. It could # likely be made faster using Cython/numba (see - # https://github.com/materialsproject/pymatgen/issues/2593), caching and/or multiprocessing - sm = StructureMatcher(ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=False, scale=True) - struct1, struct2 = sm._process_species([struct1, struct2]) - struct1, struct2, fu, s1_supercell = sm._preprocess(struct1, struct2) - match = sm._match(struct1, struct2, fu, s1_supercell, use_rms=True, break_on_match=False) - - return None if match is None else (match[0], match[1]) + # https://github.com/materialsproject/pymatgen/issues/2593), caching and/or multiprocessing, but + # ``doped`` efficiency improvements have made it multiple orders of magnitude faster: + return _scan_sm_stol_till_match(struct1, struct2, func_name="_get_atomic_disps", **sm_kwargs) def calculate_struct_comparison( defect_structures_dict: dict, metric: str = "max_dist", ref_structure: Union[str, float, Structure] = "Unperturbed", - stol: float = 0.5, - ltol: float = 0.3, - angle_tol: float = 5, min_dist: float = 0.1, verbose: bool = False, + **sm_kwargs, ) -> dict: """ Calculate either the summed atomic displacement, with metric = "disp", @@ -646,26 +614,15 @@ def calculate_struct_comparison( or a pymatgen Structure object (to compare with a specific external structure). (Default: "Unperturbed") - stol (:obj:`float`): - Site tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`), as a fraction of the - average free length per atom := ( V / Nsites ) ** (1/3). If - output contains too many 'NaN' values, this likely needs to - be increased. - (Default: 0.5) - ltol (:obj:`float`): - Length tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`). - (Default: 0.3) - angle_tol (:obj:`float`): - Angle tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`). - (Default: 5) min_dist (:obj:`float`): Minimum atomic displacement threshold to include in atomic displacements sum (in Å, default 0.1 Å). verbose (:obj:`bool`): Whether to print information message about structures being compared. + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`dict`: @@ -724,9 +681,7 @@ def calculate_struct_comparison( _, norm_dist = _cached_calculate_atomic_disp( struct1=ref_structure, struct2=defect_structures_dict[distortion], - stol=stol, - ltol=ltol, - angle_tol=angle_tol, + **sm_kwargs, ) if metric == "disp": disp_dict[distortion] = ( @@ -737,9 +692,9 @@ def calculate_struct_comparison( disp_dict[distortion] = max(norm_dist) / normalization # Remove normalization else: raise ValueError(f"Invalid metric '{metric}'. Must be one of 'disp' or 'max_dist'.") - except TypeError: - disp_dict[distortion] = None # algorithm couldn't match lattices. Set comparison - # metric to None + except TypeError as exc: + raise exc + disp_dict[distortion] = None # algorithm couldn't match lattices. Set metric to None # warnings.warn( # f"pymatgen StructureMatcher could not match lattices between " # f"{ref_name} and {distortion} structures." @@ -752,11 +707,11 @@ def compare_structures( defect_structures_dict: dict, defect_energies_dict: dict, ref_structure: Union[str, float, Structure] = "Unperturbed", - stol: float = 0.5, units: str = "eV", min_dist: float = 0.1, display_df: bool = True, verbose: bool = True, + **sm_kwargs, ) -> Union[None, pd.DataFrame]: """ Compare final bond-distorted structures with either 'Unperturbed' or @@ -777,13 +732,6 @@ def compare_structures( or a pymatgen Structure object (to compare with a specific external structure). (Default: "Unperturbed") - stol (:obj:`float`): - Site tolerance used for structural comparison (via - `pymatgen`'s `StructureMatcher`), as a fraction of the - average free length per atom := ( V / Nsites ) ** (1/3). If - structure comparison output contains too many 'NaN' values, - this likely needs to be increased. - (Default: 0.5) units (:obj:`str`): Energy units label for outputs (either 'eV' or 'meV'). Should be the same as the units in `defect_energies_dict`, @@ -797,6 +745,10 @@ def compare_structures( interactively in Jupyter/Ipython (Default: True). verbose (:obj:`bool`): Whether to print information message about structures being compared. + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`pd.DataFrame`: @@ -808,44 +760,17 @@ def compare_structures( warnings.warn("All structures in defect_structures_dict are not converged. Returning None.") return None df_list = [] - disp_dict = calculate_struct_comparison( - defect_structures_dict, - metric="disp", - ref_structure=ref_structure, - stol=stol, - min_dist=min_dist, - verbose=verbose, - ) - max_dist_dict = calculate_struct_comparison( - defect_structures_dict, - metric="max_dist", - ref_structure=ref_structure, - stol=stol, - verbose=False, # only print "Comparing to..." once - ) - # Check if too many 'NaN' values in disp_dict, if so, try with higher stol - number_of_nan = len([value for value in disp_dict.values() if value is None]) - if number_of_nan > len(disp_dict.values()) // 3: - warnings.warn( - f"The specified tolerance {stol} seems to be too tight as" - " too many lattices could not be matched. Will retry with" - f" larger tolerance ({stol+0.4})." - ) - max_dist_dict = calculate_struct_comparison( + disp_dict, max_dist_dict = ( + calculate_struct_comparison( # cached to avoid redundant calculation time defect_structures_dict, - metric="max_dist", + metric=i, ref_structure=ref_structure, - stol=stol + 0.4, - verbose=False, - ) - disp_dict = calculate_struct_comparison( - defect_structures_dict, - metric="disp", - ref_structure=ref_structure, - stol=stol + 0.4, min_dist=min_dist, - verbose=False, + verbose=verbose if i == "disp" else False, # only print "Comparing to..." once + **sm_kwargs, ) + for i in ["disp", "max_dist"] + ) for distortion in defect_energies_dict["distortions"]: try: diff --git a/shakenbreak/energy_lowering_distortions.py b/shakenbreak/energy_lowering_distortions.py index c857347e..1c8a263d 100644 --- a/shakenbreak/energy_lowering_distortions.py +++ b/shakenbreak/energy_lowering_distortions.py @@ -99,9 +99,9 @@ def _compare_distortion( gs_distortion: float, gs_struct: Structure, low_energy_defects: dict, - stol: float = 0.5, min_dist: float = 0.2, verbose: bool = False, + **sm_kwargs, ) -> dict: """ Compare the ground state distortion (`gs_distortion`) to the other @@ -125,17 +125,16 @@ def _compare_distortion( pymatgen Structure object of the ground state configuration. low_energy_defects (:obj:`dict): Dictionary storing all unique, energy-lowering distortions. - stol (:obj:`float`): - Site-matching tolerance for structure matching. Site - tolerance defined as the fraction of the average free length - per atom := ( V / Nsites ) ** (1/3). - (Default: 0.5) min_dist (:obj:`float`): Minimum atomic displacement threshold between structures, in order to consider them not matching (in Å, default = 0.2 Å). verbose (:obj:`bool`): Whether to print information message about structures being compared. (Default: False) + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`dict` @@ -153,9 +152,9 @@ def _compare_distortion( ], # just select the first structure in # each list as these structures have already been # found to match - stol=stol, min_dist=min_dist, verbose=verbose, + **sm_kwargs, ) comparison_dicts_dict[i] = struct_comparison_dict @@ -210,9 +209,9 @@ def _prune_dict_across_charges( code: str = "VASP", structure_filename: str = "CONTCAR", output_path: str = ".", - stol: float = 0.5, min_dist: float = 0.2, verbose: bool = False, + **sm_kwargs, ) -> dict: """ Screen through defects to check if any lower-energy distorted structures @@ -237,11 +236,6 @@ def _prune_dict_across_charges( structure_filename (:obj:`str`, optional): Name of the file containing the structure. (Default: CONTCAR) - stol (:obj:`float`): - Site-matching tolerance for structure matching. Site - tolerance defined as the fraction of the average free length - per atom := ( V / Nsites ) ** (1/3). - (Default: 0.5) min_dist (:obj:`float`): Minimum atomic displacement threshold between structures, in order to consider them not matching (in Å, default = 0.2 Å). @@ -249,6 +243,10 @@ def _prune_dict_across_charges( Whether to print verbose information about parsed defect structures for energy-lowering distortions, if found. (Default: False) + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`dict` @@ -286,9 +284,9 @@ def _prune_dict_across_charges( output_path, code=code, structure_filename=structure_filename, - stol=stol, min_dist=min_dist, verbose=verbose, + **sm_kwargs, ) if comparison_results[0] is not None: break @@ -329,11 +327,11 @@ def get_energy_lowering_distortions( code: str = "vasp", structure_filename: str = "CONTCAR", min_e_diff: float = 0.05, - stol: float = 0.5, min_dist: float = 0.2, verbose: bool = False, write_input_files: bool = False, metastable: bool = False, + **sm_kwargs, ) -> dict: """ Convenience function to identify defect species undergoing @@ -367,11 +365,6 @@ def get_energy_lowering_distortions( defect structure, relative to the `Unperturbed` structure, to consider it as having found a new energy-lowering distortion. Default is 0.05 eV. - stol (:obj:`float`): - Site-matching tolerance for structure matching. Site - tolerance defined as the fraction of the average free length - per atom := ( V / Nsites ) ** (1/3). - (Default: 0.5) min_dist (:obj:`float`): Minimum atomic displacement threshold between structures, in order to consider them not matching (in Å, default = 0.2 Å). @@ -387,6 +380,10 @@ def get_energy_lowering_distortions( energy-lowering distortions, as these can become ground-state distortions for other charge states. (Default: False) + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`dict`: @@ -493,9 +490,9 @@ def get_energy_lowering_distortions( gs_distortion=gs_distortion, gs_struct=gs_struct, low_energy_defects=low_energy_defects, - stol=stol, min_dist=min_dist, verbose=verbose, + **sm_kwargs, ) else: # if defect not in dict, add it @@ -565,9 +562,9 @@ def get_energy_lowering_distortions( gs_distortion=distortion, gs_struct=struct, low_energy_defects=low_energy_defects, - stol=stol, min_dist=min_dist, verbose=verbose, + **sm_kwargs, ) else: # if defect not in dict, add it @@ -621,8 +618,8 @@ def get_energy_lowering_distortions( code=code, structure_filename=structure_filename, output_path=output_path, - stol=stol, min_dist=min_dist, + **sm_kwargs, ) # Write input files for the identified distortions @@ -642,9 +639,9 @@ def compare_struct_to_distortions( output_path: str = ".", code: str = "vasp", structure_filename: str = "CONTCAR", - stol: float = 0.5, min_dist: float = 0.2, verbose: bool = False, + **sm_kwargs, ) -> tuple: """ Compares the ground-state structure found for a certain defect charge @@ -669,17 +666,16 @@ def compare_struct_to_distortions( structure_filename (:obj:`str`, optional): Name of the file containing the structure. (Default: CONTCAR) - stol (:obj:`float`): - Site-matching tolerance for structure matching. Site - tolerance defined as thefraction of the average free length - per atom := ( V / Nsites ) ** (1/3). - (Default: 0.5) min_dist (:obj:`float`): Minimum atomic displacement threshold between structures, in orderto consider them not matching (in Å, default = 0.2 Å). verbose (:obj:`bool`): Whether to print information message about structures being compared. (Default: False) + **sm_kwargs: + Additional keyword arguments to pass to ``_scan_sm_stol_till_match`` + in ``doped`` (used for ultra-fast structure matching), such as + ``min_stol``, ``max_stol``, ``stol_factor`` etc. Returns: :obj:`tuple`: @@ -708,10 +704,10 @@ def compare_struct_to_distortions( defect_structures_dict=defect_structures_dict, defect_energies_dict=defect_energies_dict, ref_structure=distorted_struct, - stol=stol, min_dist=min_dist, display_df=False, verbose=verbose, + **sm_kwargs, ) if struct_comparison_df is None: # no converged structures found for # defect_species @@ -802,13 +798,8 @@ def compare_struct_to_distortions( struc_key, ) - # no matches - return ( - False, - None, - None, - None, - ) # T/F, matching structure, energy_diff, distortion factor + # no matches; T/F, matching structure, energy_diff, distortion factor + return (False, None, None, None) def write_retest_inputs( diff --git a/shakenbreak/input.py b/shakenbreak/input.py index 8c384438..b6ac7155 100644 --- a/shakenbreak/input.py +++ b/shakenbreak/input.py @@ -21,8 +21,8 @@ from doped.core import Defect, DefectEntry, guess_and_set_oxi_states_with_timeout from doped.generation import DefectsGenerator, name_defect_entries from doped.utils.parsing import ( - get_defect_site_idxs_and_unrelaxed_structure, get_defect_type_and_composition_diff, + get_defect_type_site_idxs_and_unrelaxed_structure, ) from doped.vasp import DefectDictSet from monty.json import MontyDecoder @@ -959,12 +959,11 @@ def _remove_matching_sites(bulk_site_list, defect_site_list): try: ( + _defect_type, auto_matching_bulk_site_index, auto_matching_defect_site_index, unrelaxed_defect_structure, - ) = get_defect_site_idxs_and_unrelaxed_structure( - bulk_structure, defect_structure, defect_type, comp_diff - ) + ) = get_defect_type_site_idxs_and_unrelaxed_structure(bulk_structure, defect_structure) except Exception as exc: # failed auto-site matching, rely on user input or raise error if no user input diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 478b9a0f..c913f305 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -59,6 +59,7 @@ def setUp(self): self.V_Cd_unperturbed = Structure.from_file( os.path.join(self.VASP_CDTE_DATA_DIR, "vac_1_Cd_0/Unperturbed/CONTCAR") ) + self.dense_bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) def tearDown(self): # restore the original file (after 'no unperturbed' tests): @@ -77,6 +78,10 @@ def tearDown(self): if_present_rm("v_Ca_s0_0.png") if_present_rm(f"{self.VASP_TIO2_DATA_DIR}/Bond_Distortion_20.0%") + for i in os.listdir(os.path.join(self.VASP_DIR, "v_Ca_s0_0")): + if os.path.isdir(os.path.join(self.VASP_DIR, "v_Ca_s0_0", i)): + shutil.rmtree(os.path.join(self.VASP_DIR, "v_Ca_s0_0", i)) + def copy_v_Ti_OUTCARs(self): """ Copy the OUTCAR files from the `v_Ti_0` `example_results` directory to the `vac_1_Ti_0` `vasp` @@ -348,7 +353,7 @@ def test_get_structures(self): defect_species="vac_1_Cd_0", output_path=self.VASP_CDTE_DATA_DIR ) self.assertEqual(len(defect_structures_dict), 26) - bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) + bond_distortions = self.dense_bond_distortions self.assertEqual( set(defect_structures_dict.keys()), set(bond_distortions + ["Unperturbed"]) ) @@ -409,7 +414,7 @@ def test_get_structures(self): defect_species="vac_1_Cd_0", output_path=self.VASP_CDTE_DATA_DIR ) self.assertEqual(len(defect_structures_dict), 26) - bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) + bond_distortions = self.dense_bond_distortions self.assertEqual( set(defect_structures_dict.keys()), set(bond_distortions + ["Unperturbed"]) ) @@ -434,7 +439,7 @@ def test_get_energies(self): ) energies_dict_keys_dict = {"distortions": None, "Unperturbed": None} self.assertEqual(defect_energies_dict.keys(), energies_dict_keys_dict.keys()) - bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) + bond_distortions = self.dense_bond_distortions self.assertEqual( list(defect_energies_dict["distortions"].keys()), bond_distortions ) @@ -460,7 +465,7 @@ def test_get_energies(self): self.assertEqual( defect_energies_dict.keys(), energies_dict_keys_dict.keys() ) - bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) + bond_distortions = self.dense_bond_distortions self.assertEqual( list(defect_energies_dict["distortions"].keys()), bond_distortions ) @@ -517,7 +522,7 @@ def test_get_energies(self): self.assertEqual( defect_energies_dict.keys(), energies_dict_keys_dict.keys() ) - bond_distortions = list(np.around(np.arange(-0.6, 0.001, 0.025), 3)) + bond_distortions = self.dense_bond_distortions self.assertEqual( list(defect_energies_dict["distortions"].keys()), bond_distortions ) @@ -552,11 +557,9 @@ def test_calculate_struct_comparison(self): self.assertEqual( max_dist_dict.keys(), defect_structures_dict.keys() ) # one for each - np.testing.assert_almost_equal(max_dist_dict[-0.4], 0.8082011457587672) - np.testing.assert_almost_equal(max_dist_dict[-0.2], 0.02518600797944396) - np.testing.assert_almost_equal( - max_dist_dict["Unperturbed"], 1.7500286730158273e-15 - ) + assert np.isclose(max_dist_dict[-0.4], 0.8082011457587672, atol=1e-2) + assert np.isclose(max_dist_dict[-0.2], 0.024, atol=1e-2) + assert np.isclose(max_dist_dict["Unperturbed"], 0) # V_Cd_0 with 'disp' (reading from `vac_1_Cd_0` and `distortion_metadata.json`): disp_dict = analysis.calculate_struct_comparison(defect_structures_dict, "disp") @@ -605,7 +608,6 @@ def test_calculate_struct_comparison(self): ) # spot check: self.assertEqual(round(max_dist_dict[-0.2], 3), 0.025) - self.assertIsNone(max_dist_dict[-0.4]) np.testing.assert_almost_equal(max_dist_dict["Unperturbed"], 0) disp_dict = analysis.calculate_struct_comparison( @@ -613,7 +615,6 @@ def test_calculate_struct_comparison(self): ) # spot check: self.assertEqual(round(disp_dict[-0.2], 3), 0.121) - self.assertIsNone(disp_dict[-0.4]) self.assertTrue(np.isclose(disp_dict["Unperturbed"], 0)) # test error catching: @@ -651,7 +652,7 @@ def test_calculate_struct_comparison(self): with self.assertRaises(ValueError) as e: wrong_metric_error = ValueError( - f"Invalid metric 'metwhat'. Must be one of 'disp' or 'max_dist'." + "Invalid metric 'metwhat'. Must be one of 'disp' or 'max_dist'." ) # https://youtu.be/DmH1prySUpA analysis.calculate_struct_comparison( defect_structures_dict, metric="metwhat" @@ -689,7 +690,7 @@ def test_compare_structures(self): ) # spot check: self.assertEqual( - struct_comparison_df.iloc[16].to_list(), [-0.2, 0.0, 0.025, 0.0] + struct_comparison_df.iloc[16].to_list(), [-0.2, 0.0, 0.024, 0.0] ) self.assertEqual( struct_comparison_df.iloc[8].to_list(), [-0.4, 5.760, 0.808, -0.75] @@ -751,16 +752,6 @@ def test_compare_structures(self): self.assertEqual( struct_comparison_df.iloc[16].to_list(), [-0.2, 0.121, 0.025, 0.0] ) - # When a too tight stol is used, check the code retries with larger stol - warning_message = ( - f"The specified tolerance {0.01} seems to be too tight as" - " too many lattices could not be matched. Will retry with" - f" larger tolerance ({0.01+0.4})." - ) - # self.assertEqual(w[-1].category, UserWarning) - self.assertTrue( - any([warning_message in str(warning.message) for warning in w]) - ) self.assertEqual( struct_comparison_df.iloc[8].to_list(), [-0.4, 8.31, 0.808, -0.75] ) @@ -774,7 +765,7 @@ def test_compare_structures(self): "Bond Distortion", "\u03A3{Displacements} (\u212B)", # Sigma and Angstrom "Max Distance (\u212B)", # Angstrom - f"\u0394 Energy (meV)", # Delta + "\u0394 Energy (meV)", # Delta ], ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 5db1d248..e1797d48 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -209,6 +209,18 @@ def tearDown(self): if os.path.exists(f"{v_Ti_0_distortion_10pct_dir}_High_Energy"): os.rename(f"{v_Ti_0_distortion_10pct_dir}_High_Energy", v_Ti_0_distortion_10pct_dir) + if_present_rm(f"{self.VASP_DIR}/vac_1_Ti_1") + if os.path.exists(f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_-60.0%/prev_otcr"): + shutil.move( + f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_-60.0%/prev_otcr", + f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_-60.0%/OUTCAR" + ) + if os.path.exists(f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_50.0%_High_Energy"): + os.rename( + f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_50.0%_High_Energy", + f"{self.VASP_DIR}/v_Ge_s16_0/Bond_Distortion_50.0%" + ) + def copy_v_Ti_OUTCARs(self): """ Copy the OUTCAR files from the `v_Ti_0` `example_results` directory to the `vac_1_Ti_0` `vasp` @@ -1139,7 +1151,9 @@ def test_snb_generate_config(self): self.assertEqual(kpoints.kpts, [(1, 1, 1)]) if _potcars_available(): - assert filecmp.cmp(f"{defect_name}_0/Bond_Distortion_-50.0%/INCAR", self.V_Cd_INCAR_file) + assert Incar.from_file(f"{defect_name}_0/Bond_Distortion_-50.0%/INCAR") == Incar.from_file( + self.V_Cd_INCAR_file + ) # check if POTCARs have been written: potcar = Potcar.from_file(f"{defect_name}_0/Bond_Distortion_-50.0%/POTCAR") diff --git a/tests/test_energy_lowering_distortions.py b/tests/test_energy_lowering_distortions.py index 5c50570e..f378cebd 100644 --- a/tests/test_energy_lowering_distortions.py +++ b/tests/test_energy_lowering_distortions.py @@ -7,10 +7,22 @@ import ase import numpy as np from monty.serialization import dumpfn, loadfn -from pymatgen.core.structure import Structure +from pymatgen.core.structure import Structure, Composition, IStructure, PeriodicSite from shakenbreak import analysis, distortions, energy_lowering_distortions, io +# use doped efficiency functions for speed (speeds up structure matching dramatically): +from doped.utils.efficiency import Composition as doped_Composition +from doped.utils.efficiency import IStructure as doped_IStructure +from doped.utils.efficiency import PeriodicSite as doped_PeriodicSite + +Composition.__instances__ = {} +Composition.__eq__ = doped_Composition.__eq__ +PeriodicSite.__eq__ = doped_PeriodicSite.__eq__ +PeriodicSite.__hash__ = doped_PeriodicSite.__hash__ +IStructure.__instances__ = {} +IStructure.__eq__ = doped_IStructure.__eq__ + def if_present_rm(path): if os.path.exists(path): @@ -119,6 +131,12 @@ def tearDown(self): if file not in orig_files: if_present_rm(os.path.join(data_dir, "vac_1_Cd_0", "Bond_Distortion_30.0%", file)) + if os.path.exists(f"{self.VASP_CDTE_DATA_DIR}/vac_1_Cd_0/Bond_Distortion_-10.0%/CONTCAR_original"): + shutil.move( + f"{self.VASP_CDTE_DATA_DIR}/vac_1_Cd_0/Bond_Distortion_-10.0%/CONTCAR_original", + f"{self.VASP_CDTE_DATA_DIR}/vac_1_Cd_0/Bond_Distortion_-10.0%/CONTCAR", + ) + def test__format_distortion_directory_name(self): self.assertEqual( "my_output_path/my_defect_species/Unperturbed_from_+2", @@ -1008,7 +1026,7 @@ def test_write_retest_inputs(self): "vac_1_Cd_-1/Bond_Distortion_-55.0%_from_0/structure.cif", ) ) - self.assertTrue(analysis._calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) + self.assertTrue(analysis._cached_calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) # Test copying over Quantum Espresso input files shutil.move( # avoid overwriting yaml file @@ -1058,7 +1076,7 @@ def test_write_retest_inputs(self): ) as f: atoms = ase.io.espresso.read_espresso_in(f) struct = Structure.from_ase_atoms(atoms) - self.assertTrue(analysis._calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) + self.assertTrue(analysis._cached_calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) # Test copying over FHI-aims input files when the input files are only # present in one distortion directory (different from Unperturbed) @@ -1107,7 +1125,7 @@ def test_write_retest_inputs(self): "vac_1_Cd_-1/Bond_Distortion_-55.0%_from_0/geometry.in", ) ) - self.assertTrue(analysis._calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) + self.assertTrue(analysis._cached_calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) # Test CASTEP input files shutil.move( # avoid overwriting yaml file @@ -1161,7 +1179,7 @@ def test_write_retest_inputs(self): ) ) ) - self.assertTrue(analysis._calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) + self.assertTrue(analysis._cached_calculate_atomic_disp(struct, self.V_Cd_minus_0pt55_structure)[0] < 0.01) if __name__ == "__main__": diff --git a/tests/test_input.py b/tests/test_input.py index ff9fd824..6a66d4ee 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -3725,27 +3725,6 @@ def test_from_structures(self): list([0.8125, 0.1875, 0.8125]), ) - # test defect_coords working even when significantly off (~2.2 Å) correct site, - # with rattled bulk - rattled_bulk = rattle(self.CdTe_bulk_struc) - with patch("builtins.print") as mock_print: - dist = input.Distortions.from_structures( - [(self.V_Cd_struc, [0, 0, 0])], bulk=rattled_bulk - ) - for charge in [0, -1, -2]: - self.assertEqual( - [ - i.defect - for i in dist.defects_dict[list(dist.defects_dict.keys())[0]] - if i.charge_state == charge - ][0], - [ - i.defect - for i in self.cdte_defects["vac_1_Cd"] - if i.charge_state == charge - ][0], - ) - # Test wrong type for defect index/coords with warnings.catch_warnings(record=True) as w: dist = input.Distortions.from_structures( diff --git a/tests/test_io.py b/tests/test_io.py index 2483b28b..0aeacef9 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -2,9 +2,9 @@ import unittest import warnings -from pymatgen.core.structure import Structure +from pymatgen.core.structure import Structure, Composition, IStructure, PeriodicSite -from shakenbreak.analysis import _calculate_atomic_disp +from shakenbreak.analysis import _cached_calculate_atomic_disp from shakenbreak.io import ( parse_fhi_aims_input, parse_qe_input, @@ -15,9 +15,21 @@ read_vasp_structure, ) +# use doped efficiency functions for speed (speeds up structure matching dramatically): +from doped.utils.efficiency import Composition as doped_Composition +from doped.utils.efficiency import IStructure as doped_IStructure +from doped.utils.efficiency import PeriodicSite as doped_PeriodicSite + +Composition.__instances__ = {} +Composition.__eq__ = doped_Composition.__eq__ +PeriodicSite.__eq__ = doped_PeriodicSite.__eq__ +PeriodicSite.__hash__ = doped_PeriodicSite.__hash__ +IStructure.__instances__ = {} +IStructure.__eq__ = doped_IStructure.__eq__ + class IoTestCase(unittest.TestCase): - """ "Test functions in shakenbreak.io. + """Test functions in shakenbreak.io. Note that io.parse_energies is tested via test_cli.py""" def setUp(self): @@ -80,7 +92,7 @@ def test_read_espresso_structure(self): ) ) self.assertTrue( - _calculate_atomic_disp(structure_from_cif, structure_from_espresso_output)[ + _cached_calculate_atomic_disp(structure_from_cif, structure_from_espresso_output)[ 0 ] < 0.01 @@ -100,7 +112,7 @@ def test_read_castep_structure(self): test_structure = Structure.from_file( os.path.join(self.DATA_DIR, "castep/POSCAR") ) - self.assertTrue(_calculate_atomic_disp(test_structure, structure)[0] < 0.01) + self.assertTrue(_cached_calculate_atomic_disp(test_structure, structure)[0] < 0.01) def test_read_fhi_aims_structure(self): "Test read_cp2k_structure() function." @@ -110,14 +122,14 @@ def test_read_fhi_aims_structure(self): test_structure = Structure.from_file( os.path.join(self.DATA_DIR, "fhi_aims/POSCAR") ) - self.assertTrue(_calculate_atomic_disp(test_structure, structure)[0] < 0.01) + self.assertTrue(_cached_calculate_atomic_disp(test_structure, structure)[0] < 0.01) # Test parsing geometry.in file path = os.path.join(self.DATA_DIR, "fhi_aims/geometry.in") structure = read_fhi_aims_structure(path, format="aims") test_structure = Structure.from_file( os.path.join(self.DATA_DIR, "fhi_aims/POSCAR_geom") ) - self.assertTrue(_calculate_atomic_disp(test_structure, structure)[0] < 0.01) + self.assertTrue(_cached_calculate_atomic_disp(test_structure, structure)[0] < 0.01) def test_parse_qe_input(self): "Test parse_qe_input() function."