materialsproject · DanielYang59 · Feb 1, 2025 · Feb 1, 2025 · Feb 1, 2025 · Feb 1, 2025
@@ -21,7 +21,7 @@
 
 class PotcarScrambler:
     """
-    Takes a POTCAR and replaces its values with completely random values
+    Takes a POTCAR and replaces its values with completely random values.
     Does type matching and attempts precision matching on floats to ensure
     file is read correctly by Potcar and PotcarSingle classes.
 
@@ -40,14 +40,15 @@ class PotcarScrambler:
 
     def __init__(self, potcars: Potcar | PotcarSingle) -> None:
         self.PSP_list = [potcars] if isinstance(potcars, PotcarSingle) else potcars
-        self.scrambled_potcars_str = ""
+        self.scrambled_potcars_str: str = ""
         for psp in self.PSP_list:
             scrambled_potcar_str = self.scramble_single_potcar(psp)
             self.scrambled_potcars_str += scrambled_potcar_str
 
     def _rand_float_from_str_with_prec(self, input_str: str, bloat: float = 1.5) -> float:
-        n_prec = len(input_str.split(".")[1])
-        bd = max(1, bloat * abs(float(input_str)))  # ensure we don't get 0
+        """Generate a random float from str to replace true values."""
+        n_prec: int = len(input_str.split(".")[1])
+        bd: float = max(1.0, bloat * abs(float(input_str)))  # ensure we don't get 0
         return round(bd * np.random.default_rng().random(), n_prec)
 
     def _read_fortran_str_and_scramble(self, input_str: str, bloat: float = 1.5):
@@ -124,14 +125,16 @@ def scramble_single_potcar(self, potcar: PotcarSingle) -> str:
         return scrambled_potcar_str
 
     def to_file(self, filename: str) -> None:
+        """Write scrambled POTCAR to file."""
         with zopen(filename, mode="wt", encoding="utf-8") as file:
             file.write(self.scrambled_potcars_str)
 
     @classmethod
     def from_file(cls, input_filename: str, output_filename: str | None = None) -> Self:
+        """Read a POTCAR from file and generate a scrambled version."""
         psp = Potcar.from_file(input_filename)
         psp_scrambled = cls(psp)
-        if output_filename:
+        if output_filename is not None:
             psp_scrambled.to_file(output_filename)
         return psp_scrambled
 

@@ -33,13 +33,14 @@
 
     from pymatgen.util.typing import SpeciesLike
 
-# Load element data from JSON file
+# Load element data (periodic table) from JSON file
 with open(Path(__file__).absolute().parent / "periodic_table.json", encoding="utf-8") as ptable_json:
-    _pt_data = json.load(ptable_json)
+    _PT_DATA: dict = json.load(ptable_json)
 
-_pt_row_sizes = (2, 8, 8, 18, 18, 32, 32)
+_PT_ROW_SIZES: tuple[int, ...] = (2, 8, 8, 18, 18, 32, 32)
 
-_madelung = [
+# Madelung energy ordering rule (lower to higher energy)
+_MADELUNG: list[tuple[int, str]] = [
     (1, "s"),
     (2, "s"),
     (2, "p"),
@@ -137,21 +138,21 @@ def __init__(self, symbol: SpeciesLike) -> None:
             Solid State Communications, 1984.
         """
         self.symbol = str(symbol)
-        data = _pt_data[symbol]
+        data = _PT_DATA[symbol]
 
         # Store key variables for quick access
         self.Z = data["Atomic no"]
 
         self._is_named_isotope = data.get("Is named isotope", False)
         if self._is_named_isotope:
-            for sym in _pt_data:
-                if _pt_data[sym]["Atomic no"] == self.Z and not _pt_data[sym].get("Is named isotope", False):
+            for sym, info in _PT_DATA.items():
+                if info["Atomic no"] == self.Z and not info.get("Is named isotope", False):
                     self.symbol = sym
                     break
             # For specified/named isotopes, treat the same as named element
             # (the most common isotope). Then we pad the data block with the
             # entries for the named element.
-            data = {**_pt_data[self.symbol], **data}
+            data = {**_PT_DATA[self.symbol], **data}
 
         at_r = data.get("Atomic radius", "no data")
         if str(at_r).startswith("no data"):
@@ -452,33 +453,48 @@ def icsd_oxidation_states(self) -> tuple[int, ...]:
 
     @property
     def full_electronic_structure(self) -> list[tuple[int, str, int]]:
-        """Full electronic structure as list of tuples, in order of increasing
+        """Full electronic structure in order of increasing
         energy level (according to the Madelung rule). Therefore, the final
         element in the list gives the electronic structure of the valence shell.
 
-        For example, the electronic structure for Fe is represented as:
-        [(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
-        (4, "s", 2), (3, "d", 6)].
+        For example, the full electronic structure for Fe is:
+            [(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
+            (4, "s", 2), (3, "d", 6)].
 
         References:
             Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2023). NIST
             Atomic Spectra Database (ver. 5.11). https://physics.nist.gov/asd [2024,
             June 3]. National Institute of Standards and Technology, Gaithersburg,
             MD. DOI: https://doi.org/10.18434/T4W30F
+
+        Returns:
+            list[tuple[int, str, int]]: A list of tuples representing each subshell,
+            where each tuple contains:
+                - `n` (int): Principal quantum number.
+                - `orbital_type` (str): Orbital type (e.g., "s", "p", "d", "f").
+                - `electron_count` (int): Number of electrons in the subshell.
         """
-        e_str = self.electronic_structure
+        e_str: str = self.electronic_structure
 
-        def parse_orbital(orb_str):
+        def parse_orbital(orb_str: str) -> str | tuple[int, str, int]:
+            """Parse orbital information from split electron configuration string."""
+            # Parse valence subshell notation (e.g., "3d6" -> (3, "d", 6))
             if match := re.match(r"(\d+)([spdfg]+)(\d+)", orb_str):
                 return int(match[1]), match[2], int(match[3])
+
+            # Return core-electron configuration as-is (e.g. "[Ar]")
             return orb_str
 
-        data = [parse_orbital(s) for s in e_str.split(".")]
-        if data[0][0] == "[":
-            sym = data[0].replace("[", "").replace("]", "")
+        # Split e_str (e.g. for Fe "[Ar].3d6.4s2" into ["[Ar]", "3d6", "4s2"])
+        data: list = [parse_orbital(s) for s in e_str.split(".")]
+
+        # Fully expand core-electron configuration (replace noble gas notation string)
+        if isinstance(data[0], str):
+            sym: str = data[0].replace("[", "").replace("]", "")
             data = list(Element(sym).full_electronic_structure) + data[1:]
-        # sort the final electronic structure by increasing energy level
-        return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))
+
+        # Sort the final electronic structure by increasing energy level
+        return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))
 
     @property
     def n_electrons(self) -> int:
@@ -563,7 +579,7 @@ def ground_state_term_symbol(self) -> str:
         L_symbols = "SPDFGHIKLMNOQRTUVWXYZ"
 
         term_symbols = self.term_symbols
-        term_symbol_flat = {  # type: ignore[var-annotated]
+        term_symbol_flat: dict = {
             term: {
                 "multiplicity": int(term[0]),
                 "L": L_symbols.index(term[1]),
@@ -595,7 +611,7 @@ def from_Z(Z: int, A: int | None = None) -> Element:
         Returns:
             Element with atomic number Z.
         """
-        for sym, data in _pt_data.items():
+        for sym, data in _PT_DATA.items():
             atomic_mass_num = data.get("Atomic mass no") if A else None
             if data["Atomic no"] == Z and atomic_mass_num == A:
                 return Element(sym)
@@ -616,7 +632,7 @@ def from_name(name: str) -> Element:
         uk_to_us = {"aluminium": "aluminum", "caesium": "cesium"}
         name = uk_to_us.get(name.lower(), name)
 
-        for sym, data in _pt_data.items():
+        for sym, data in _PT_DATA.items():
             if data["Name"] == name.capitalize():
                 return Element(sym)
 
@@ -643,7 +659,7 @@ def from_row_and_group(row: int, group: int) -> Element:
         Note:
             The 18 group number system is used, i.e. noble gases are group 18.
         """
-        for sym in _pt_data:
+        for sym in _PT_DATA:
             el = Element(sym)
             if 57 <= el.Z <= 71:
                 el_pseudo_row = 8
@@ -683,7 +699,7 @@ def row(self) -> int:
             return 6
         if 89 <= z <= 103:
             return 7
-        for idx, size in enumerate(_pt_row_sizes, start=1):
+        for idx, size in enumerate(_PT_ROW_SIZES, start=1):
             total += size
             if total >= z:
                 return idx
@@ -1187,7 +1203,7 @@ def parse_orbital(orb_str):
             sym = data[0].replace("[", "").replace("]", "")
             data = list(Element(sym).full_electronic_structure) + data[1:]
         # sort the final electronic structure by increasing energy level
-        return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))
+        return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))
 
     # NOTE - copied exactly from Element. Refactoring / inheritance may improve
     # robustness

@@ -2127,24 +2127,52 @@ def __repr__(self) -> str:
         return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})"
 
     @property
-    def electron_configuration(self) -> list[tuple[int, str, int]] | None:
-        """Electronic configuration of the PotcarSingle."""
-        if not self.nelectrons.is_integer():
-            warnings.warn(
-                "POTCAR has non-integer charge, electron configuration not well-defined.",
-                stacklevel=2,
-            )
-            return None
-
-        el = Element.from_Z(self.atomic_no)
-        full_config = el.full_electronic_structure
-        nelect = self.nelectrons
-        config = []
-        while nelect > 0:
-            e = full_config.pop(-1)
-            config.append(e)
-            nelect -= e[-1]
-        return config
+    def electron_configuration(self) -> list[tuple[int, str, float]]:
+        """Valence electronic configuration corresponding to the ZVAL,
+        read from the "Atomic configuration" section of POTCAR.
+
+        Returns:
+            list[tuple[int, str, float]]: A list of tuples containing:
+                - n (int): Principal quantum number.
+                - subshell (str): Subshell notation (s, p, d, f).
+                - occ (float): Occupation number, limited to ZVAL.
+        """
+        # Find "Atomic configuration" section
+        match = re.search(r"Atomic configuration", self.data)
+        if match is None:
+            raise RuntimeError("Cannot find atomic configuration section in POTCAR.")
+
+        start_idx: int = self.data[: match.start()].count("\n")
+
+        lines = self.data.splitlines()
+
+        # Extract all subshells
+        match_entries = re.search(r"(\d+)\s+entries", lines[start_idx + 1])
+        if match_entries is None:
+            raise RuntimeError("Cannot find entries in POTCAR.")
+        num_entries: int = int(match_entries.group(1))
+
+        l_map: dict[int, str] = {0: "s", 1: "p", 2: "d", 3: "f", 4: "g", 5: "h"}
+        all_config: list[tuple[int, str, float]] = []
+        for line in lines[start_idx + 3 : start_idx + 3 + num_entries]:
+            parts = line.split()
+            n, ang_moment, _j, _E, occ = int(parts[0]), int(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])
+
+            all_config.append((n, l_map[ang_moment], occ))
+
+        # Get valence electron configuration (defined by ZVAL)
+        valence_config: list[tuple[int, str, float]] = []
+        total_electrons = 0.0
+
+        for n, subshell, occ in reversed(all_config):
+            if occ >= 0.01:  # TODO: hard-coded occupancy cutoff
+                valence_config.append((n, subshell, occ))
+                total_electrons += occ
+
+            if total_electrons >= self.zval:
+                break
+
+        return list(reversed(valence_config))
 
     @property
     def element(self) -> str:
@@ -2763,7 +2791,7 @@ def _gen_potcar_summary_stats(
                 }
             )
 
-    if summary_stats_filename:
+    if summary_stats_filename is not None:
         dumpfn(new_summary_stats, summary_stats_filename)
 
     return new_summary_stats
@@ -2892,16 +2920,16 @@ def set_symbols(
             functional (str): The functional to use. If None, the setting
                 PMG_DEFAULT_FUNCTIONAL in .pmgrc.yaml is used, or if this is
                 not set, it will default to PBE.
-            sym_potcar_map (dict): A map of symbol:raw POTCAR string. If
+            sym_potcar_map (dict): A map of symbol to raw POTCAR string. If
                 sym_potcar_map is specified, POTCARs will be generated from
                 the given map data rather than the config file location.
         """
         del self[:]
 
-        if sym_potcar_map:
-            self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)
-        else:
+        if sym_potcar_map is None:
             self.extend(PotcarSingle.from_symbol_and_functional(el, functional) for el in symbols)
+        else:
+            self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)
 
 
 class UnknownPotcarWarning(UserWarning):

@@ -3460,7 +3460,8 @@ def _combine_kpoints(*kpoints_objects: Kpoints | None) -> Kpoints:
     _kpoints: list[Sequence[Kpoint]] = []
     _weights = []
 
-    for kpoints_object in filter(None, kpoints_objects):  # type: ignore[var-annotated]
+    kpoints_object: Kpoints
+    for kpoints_object in filter(None, kpoints_objects):
         if kpoints_object.style != Kpoints.supported_modes.Reciprocal:
             raise ValueError("Can only combine kpoints with style=Kpoints.supported_modes.Reciprocal")
         if kpoints_object.labels is None:

diff --git a/tests/files/io/vasp/inputs/fake_potcars/POT_GGA_PAW_PBE_54/POTCAR.K_sv.gz b/tests/files/io/vasp/inputs/fake_potcars/POT_GGA_PAW_PBE_54/POTCAR.K_sv.gz