diff --git a/pymatgen/io/cif.py b/pymatgen/io/cif.py index c94c114d6d5..c3b6083d83f 100644 --- a/pymatgen/io/cif.py +++ b/pymatgen/io/cif.py @@ -323,9 +323,9 @@ def __init__( """ Args: filename (PathLike): CIF file, gzipped or bzipped CIF files are fine too. - occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the - occupancies will be scaled down to 1. - site_tolerance (float): This tolerance is used to determine if two sites are in the same position, + occupancy_tolerance (float): If total occupancy of a site is between + 1 and occupancy_tolerance, it will be scaled down to 1. + site_tolerance (float): This tolerance is used to determine if two sites are at the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. frac_tolerance (float): This tolerance is used to determine is a coordinate should be rounded to an ideal value. e.g. 0.6667 is rounded to 2/3. This is desired if symmetry operations are going to be applied. @@ -1027,11 +1027,11 @@ def get_matching_coord( # Get occupancy try: - occu = str2float(data["_atom_site_occupancy"][idx]) + occu: float = str2float(data["_atom_site_occupancy"][idx]) except (KeyError, ValueError): occu = 1 - # If check_occu is True or the occupancy is greater than 0, create comp_d + # If don't check_occu or the occupancy is greater than 0, create comp_dict if not check_occu or occu > 0: # Create site coordinate coord: Vector3D = ( @@ -1073,7 +1073,7 @@ def get_matching_coord( if any(occu > 1 for occu in _sum_occupancies): msg = ( - f"Some occupancies ({_sum_occupancies}) sum to > 1! If they are within " + f"Some occupancies ({filter(lambda x: x<=1, _sum_occupancies)}) sum to > 1! If they are within " "the occupancy_tolerance, they will be rescaled. " f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}" ) @@ -1149,7 +1149,10 @@ def get_matching_coord( all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below for idx, species in enumerate(all_species): total_occu = sum(species.values()) - if 1 < total_occu <= self._occupancy_tolerance: + if check_occu and total_occu > self._occupancy_tolerance: + raise ValueError(f"Occupancy {total_occu} exceeded tolerance.") + + if total_occu > 1: all_species[idx] = species / total_occu if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms): @@ -1198,6 +1201,7 @@ def get_matching_coord( all_coords[idx], lattice, properties=site_properties, + label=all_labels[idx], skip_checks=True, ) @@ -1278,8 +1282,6 @@ def parse_structures( "in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.", UserWarning, ) - if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836 - warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.") if primitive and symmetrized: raise ValueError( diff --git a/tests/io/test_cif.py b/tests/io/test_cif.py index 5f0a897f0bf..87627ee9a7c 100644 --- a/tests/io/test_cif.py +++ b/tests/io/test_cif.py @@ -731,17 +731,28 @@ def test_empty(self): cb2 = CifBlock.from_str(str(cb)) assert cb == cb2 - def test_bad_cif(self): + def test_bad_occu(self): filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif" parser = CifParser(filepath) with pytest.raises( - ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!" + ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance." ): parser.parse_structures(on_error="raise") parser = CifParser(filepath, occupancy_tolerance=2) struct = parser.parse_structures()[0] assert struct[0].species["Al3+"] == approx(0.778) + def test_not_check_occu(self): + # Test large occupancy with check_occu turned off + with open(f"{TEST_FILES_DIR}/cif/site_type_symbol_test.cif") as cif_file: + cif_str = cif_file.read() + cif_str = cif_str.replace("Te Te 1.0000", "Te_label Te 10.0", 1) + + structs = CifParser.from_str(cif_str).parse_structures(check_occu=False) + + assert len(structs) > 0 + assert set(structs[0].labels) == {"Te_label", "Ge"} + def test_one_line_symm(self): cif_file = f"{TEST_FILES_DIR}/cif/OneLineSymmP1.cif" parser = CifParser(cif_file)