Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Breaking] Fix valence electron configuration parsing for PotcarSingle.electron_configuration #4278

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
13 changes: 8 additions & 5 deletions dev_scripts/potcar_scrambler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

class PotcarScrambler:
"""
Takes a POTCAR and replaces its values with completely random values
Takes a POTCAR and replaces its values with completely random values.
Does type matching and attempts precision matching on floats to ensure
file is read correctly by Potcar and PotcarSingle classes.

Expand All @@ -40,14 +40,15 @@ class PotcarScrambler:

def __init__(self, potcars: Potcar | PotcarSingle) -> None:
self.PSP_list = [potcars] if isinstance(potcars, PotcarSingle) else potcars
self.scrambled_potcars_str = ""
self.scrambled_potcars_str: str = ""
for psp in self.PSP_list:
scrambled_potcar_str = self.scramble_single_potcar(psp)
self.scrambled_potcars_str += scrambled_potcar_str

def _rand_float_from_str_with_prec(self, input_str: str, bloat: float = 1.5) -> float:
n_prec = len(input_str.split(".")[1])
bd = max(1, bloat * abs(float(input_str))) # ensure we don't get 0
"""Generate a random float from str to replace true values."""
n_prec: int = len(input_str.split(".")[1])
bd: float = max(1.0, bloat * abs(float(input_str))) # ensure we don't get 0
return round(bd * np.random.default_rng().random(), n_prec)

def _read_fortran_str_and_scramble(self, input_str: str, bloat: float = 1.5):
Expand Down Expand Up @@ -124,14 +125,16 @@ def scramble_single_potcar(self, potcar: PotcarSingle) -> str:
return scrambled_potcar_str

def to_file(self, filename: str) -> None:
"""Write scrambled POTCAR to file."""
with zopen(filename, mode="wt", encoding="utf-8") as file:
file.write(self.scrambled_potcars_str)

@classmethod
def from_file(cls, input_filename: str, output_filename: str | None = None) -> Self:
"""Read a POTCAR from file and generate a scrambled version."""
psp = Potcar.from_file(input_filename)
psp_scrambled = cls(psp)
if output_filename:
if output_filename is not None:
psp_scrambled.to_file(output_filename)
return psp_scrambled

Expand Down
66 changes: 41 additions & 25 deletions src/pymatgen/core/periodic_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@

from pymatgen.util.typing import SpeciesLike

# Load element data from JSON file
# Load element data (periodic table) from JSON file
with open(Path(__file__).absolute().parent / "periodic_table.json", encoding="utf-8") as ptable_json:
_pt_data = json.load(ptable_json)
_PT_DATA: dict = json.load(ptable_json)

_pt_row_sizes = (2, 8, 8, 18, 18, 32, 32)
_PT_ROW_SIZES: tuple[int, ...] = (2, 8, 8, 18, 18, 32, 32)

_madelung = [
# Madelung energy ordering rule (lower to higher energy)
_MADELUNG: list[tuple[int, str]] = [
(1, "s"),
(2, "s"),
(2, "p"),
Expand Down Expand Up @@ -137,21 +138,21 @@ def __init__(self, symbol: SpeciesLike) -> None:
Solid State Communications, 1984.
"""
self.symbol = str(symbol)
data = _pt_data[symbol]
data = _PT_DATA[symbol]

# Store key variables for quick access
self.Z = data["Atomic no"]

self._is_named_isotope = data.get("Is named isotope", False)
if self._is_named_isotope:
for sym in _pt_data:
if _pt_data[sym]["Atomic no"] == self.Z and not _pt_data[sym].get("Is named isotope", False):
for sym, info in _PT_DATA.items():
if info["Atomic no"] == self.Z and not info.get("Is named isotope", False):
self.symbol = sym
break
# For specified/named isotopes, treat the same as named element
# (the most common isotope). Then we pad the data block with the
# entries for the named element.
data = {**_pt_data[self.symbol], **data}
data = {**_PT_DATA[self.symbol], **data}

at_r = data.get("Atomic radius", "no data")
if str(at_r).startswith("no data"):
Expand Down Expand Up @@ -452,33 +453,48 @@ def icsd_oxidation_states(self) -> tuple[int, ...]:

@property
def full_electronic_structure(self) -> list[tuple[int, str, int]]:
"""Full electronic structure as list of tuples, in order of increasing
"""Full electronic structure in order of increasing
energy level (according to the Madelung rule). Therefore, the final
element in the list gives the electronic structure of the valence shell.

For example, the electronic structure for Fe is represented as:
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
(4, "s", 2), (3, "d", 6)].
For example, the full electronic structure for Fe is:
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
(4, "s", 2), (3, "d", 6)].

References:
Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2023). NIST
Atomic Spectra Database (ver. 5.11). https://physics.nist.gov/asd [2024,
June 3]. National Institute of Standards and Technology, Gaithersburg,
MD. DOI: https://doi.org/10.18434/T4W30F

Returns:
list[tuple[int, str, int]]: A list of tuples representing each subshell,
where each tuple contains:
- `n` (int): Principal quantum number.
- `orbital_type` (str): Orbital type (e.g., "s", "p", "d", "f").
- `electron_count` (int): Number of electrons in the subshell.
"""
e_str = self.electronic_structure
e_str: str = self.electronic_structure

def parse_orbital(orb_str):
def parse_orbital(orb_str: str) -> str | tuple[int, str, int]:
"""Parse orbital information from split electron configuration string."""
# Parse valence subshell notation (e.g., "3d6" -> (3, "d", 6))
if match := re.match(r"(\d+)([spdfg]+)(\d+)", orb_str):
return int(match[1]), match[2], int(match[3])

# Return core-electron configuration as-is (e.g. "[Ar]")
return orb_str

data = [parse_orbital(s) for s in e_str.split(".")]
if data[0][0] == "[":
sym = data[0].replace("[", "").replace("]", "")
# Split e_str (e.g. for Fe "[Ar].3d6.4s2" into ["[Ar]", "3d6", "4s2"])
data: list = [parse_orbital(s) for s in e_str.split(".")]

# Fully expand core-electron configuration (replace noble gas notation string)
if isinstance(data[0], str):
sym: str = data[0].replace("[", "").replace("]", "")
data = list(Element(sym).full_electronic_structure) + data[1:]
# sort the final electronic structure by increasing energy level
return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))

# Sort the final electronic structure by increasing energy level
return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))

@property
def n_electrons(self) -> int:
Expand Down Expand Up @@ -563,7 +579,7 @@ def ground_state_term_symbol(self) -> str:
L_symbols = "SPDFGHIKLMNOQRTUVWXYZ"

term_symbols = self.term_symbols
term_symbol_flat = { # type: ignore[var-annotated]
term_symbol_flat: dict = {
term: {
"multiplicity": int(term[0]),
"L": L_symbols.index(term[1]),
Expand Down Expand Up @@ -595,7 +611,7 @@ def from_Z(Z: int, A: int | None = None) -> Element:
Returns:
Element with atomic number Z.
"""
for sym, data in _pt_data.items():
for sym, data in _PT_DATA.items():
atomic_mass_num = data.get("Atomic mass no") if A else None
if data["Atomic no"] == Z and atomic_mass_num == A:
return Element(sym)
Expand All @@ -616,7 +632,7 @@ def from_name(name: str) -> Element:
uk_to_us = {"aluminium": "aluminum", "caesium": "cesium"}
name = uk_to_us.get(name.lower(), name)

for sym, data in _pt_data.items():
for sym, data in _PT_DATA.items():
if data["Name"] == name.capitalize():
return Element(sym)

Expand All @@ -643,7 +659,7 @@ def from_row_and_group(row: int, group: int) -> Element:
Note:
The 18 group number system is used, i.e. noble gases are group 18.
"""
for sym in _pt_data:
for sym in _PT_DATA:
el = Element(sym)
if 57 <= el.Z <= 71:
el_pseudo_row = 8
Expand Down Expand Up @@ -683,7 +699,7 @@ def row(self) -> int:
return 6
if 89 <= z <= 103:
return 7
for idx, size in enumerate(_pt_row_sizes, start=1):
for idx, size in enumerate(_PT_ROW_SIZES, start=1):
total += size
if total >= z:
return idx
Expand Down Expand Up @@ -1187,7 +1203,7 @@ def parse_orbital(orb_str):
sym = data[0].replace("[", "").replace("]", "")
data = list(Element(sym).full_electronic_structure) + data[1:]
# sort the final electronic structure by increasing energy level
return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))
return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))

# NOTE - copied exactly from Element. Refactoring / inheritance may improve
# robustness
Expand Down
74 changes: 51 additions & 23 deletions src/pymatgen/io/vasp/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2127,24 +2127,52 @@ def __repr__(self) -> str:
return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})"

@property
def electron_configuration(self) -> list[tuple[int, str, int]] | None:
"""Electronic configuration of the PotcarSingle."""
if not self.nelectrons.is_integer():
warnings.warn(
"POTCAR has non-integer charge, electron configuration not well-defined.",
stacklevel=2,
)
return None

el = Element.from_Z(self.atomic_no)
full_config = el.full_electronic_structure
nelect = self.nelectrons
config = []
while nelect > 0:
e = full_config.pop(-1)
config.append(e)
nelect -= e[-1]
return config
def electron_configuration(self) -> list[tuple[int, str, float]]:
"""Valence electronic configuration corresponding to the ZVAL,
read from the "Atomic configuration" section of POTCAR.

Returns:
list[tuple[int, str, float]]: A list of tuples containing:
- n (int): Principal quantum number.
- subshell (str): Subshell notation (s, p, d, f).
- occ (float): Occupation number, limited to ZVAL.
"""
# Find "Atomic configuration" section
match = re.search(r"Atomic configuration", self.data)
if match is None:
raise RuntimeError("Cannot find atomic configuration section in POTCAR.")

start_idx: int = self.data[: match.start()].count("\n")

lines = self.data.splitlines()

# Extract all subshells
match_entries = re.search(r"(\d+)\s+entries", lines[start_idx + 1])
if match_entries is None:
raise RuntimeError("Cannot find entries in POTCAR.")
num_entries: int = int(match_entries.group(1))

l_map: dict[int, str] = {0: "s", 1: "p", 2: "d", 3: "f", 4: "g", 5: "h"}
all_config: list[tuple[int, str, float]] = []
for line in lines[start_idx + 3 : start_idx + 3 + num_entries]:
parts = line.split()
n, ang_moment, _j, _E, occ = int(parts[0]), int(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])

all_config.append((n, l_map[ang_moment], occ))

# Get valence electron configuration (defined by ZVAL)
valence_config: list[tuple[int, str, float]] = []
total_electrons = 0.0

for n, subshell, occ in reversed(all_config):
if occ >= 0.01: # TODO: hard-coded occupancy cutoff
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO tag: hard-coded cutoff

valence_config.append((n, subshell, occ))
total_electrons += occ

if total_electrons >= self.zval:
break

return list(reversed(valence_config))

@property
def element(self) -> str:
Expand Down Expand Up @@ -2763,7 +2791,7 @@ def _gen_potcar_summary_stats(
}
)

if summary_stats_filename:
if summary_stats_filename is not None:
dumpfn(new_summary_stats, summary_stats_filename)

return new_summary_stats
Expand Down Expand Up @@ -2892,16 +2920,16 @@ def set_symbols(
functional (str): The functional to use. If None, the setting
PMG_DEFAULT_FUNCTIONAL in .pmgrc.yaml is used, or if this is
not set, it will default to PBE.
sym_potcar_map (dict): A map of symbol:raw POTCAR string. If
sym_potcar_map (dict): A map of symbol to raw POTCAR string. If
sym_potcar_map is specified, POTCARs will be generated from
the given map data rather than the config file location.
"""
del self[:]

if sym_potcar_map:
self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)
else:
if sym_potcar_map is None:
self.extend(PotcarSingle.from_symbol_and_functional(el, functional) for el in symbols)
else:
self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)


class UnknownPotcarWarning(UserWarning):
Expand Down
3 changes: 2 additions & 1 deletion src/pymatgen/io/vasp/sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3460,7 +3460,8 @@ def _combine_kpoints(*kpoints_objects: Kpoints | None) -> Kpoints:
_kpoints: list[Sequence[Kpoint]] = []
_weights = []

for kpoints_object in filter(None, kpoints_objects): # type: ignore[var-annotated]
kpoints_object: Kpoints
for kpoints_object in filter(None, kpoints_objects):
if kpoints_object.style != Kpoints.supported_modes.Reciprocal:
raise ValueError("Can only combine kpoints with style=Kpoints.supported_modes.Reciprocal")
if kpoints_object.labels is None:
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading