[pre-commit.ci] auto fixes from pre-commit.com hooks

For more information, see https://pre-commit.ci
catalyst-cooperative · Jan 13, 2025 · ae1ed21 · ae1ed21
1 parent 7e1c3b5
commit ae1ed21
Show file tree

Hide file tree

Showing 28 changed files with 123 additions and 122 deletions.
diff --git a/src/pudl/analysis/allocate_gen_fuel.py b/src/pudl/analysis/allocate_gen_fuel.py
@@ -1041,7 +1041,7 @@ def _allocate_unassociated_pm_records(
     )
     logger.info(
         f"Associating and allocating {len(eia_generators_unassociated)} "
-        f"({len(eia_generators_unassociated)/len(gen_assoc):.1%}) records with "
+        f"({len(eia_generators_unassociated) / len(gen_assoc):.1%}) records with "
         f"unexpected {col_w_unexpected_codes}."
     )
 
@@ -1579,7 +1579,7 @@ def assign_plant_year(df):
         ]
 
         logger.info(
-            f"Distributing {len(annual_reporters)/len(reporters):.1%} annually reported"
+            f"Distributing {len(annual_reporters) / len(reporters):.1%} annually reported"
             " records to months."
         )
         # first convert the december month to january bc expand_timeseries expands from
@@ -1965,7 +1965,7 @@ def calc_net_gen_diff(gen_pm_fuel, idx):
         & (gen_pm_fuel_test.net_generation_mwh_diff.notnull())
     ]
     logger.info(
-        f"{len(bad_diff)/len(gen_pm_fuel):.03%} of records have are partially "
+        f"{len(bad_diff) / len(gen_pm_fuel):.03%} of records have are partially "
         "off from their 'IDX_PM_ESC' group"
     )
     no_cap_gen = gen_pm_fuel_test[
@@ -1978,11 +1978,11 @@ def calc_net_gen_diff(gen_pm_fuel, idx):
     fuel_net_gen = gf[gf.plant_id_eia != "99999"].net_generation_mwh.sum()
     logger.info(
         "gen v fuel table net gen diff:      "
-        f"{(gen.net_generation_mwh.sum())/fuel_net_gen:.1%}"
+        f"{(gen.net_generation_mwh.sum()) / fuel_net_gen:.1%}"
     )
     logger.info(
         "new v fuel table net gen diff:      "
-        f"{(gen_pm_fuel_test.net_generation_mwh.sum())/fuel_net_gen:.1%}"
+        f"{(gen_pm_fuel_test.net_generation_mwh.sum()) / fuel_net_gen:.1%}"
     )
 
     gen_pm_fuel_test = gen_pm_fuel_test.drop(

diff --git a/src/pudl/analysis/plant_parts_eia.py b/src/pudl/analysis/plant_parts_eia.py
@@ -555,7 +555,7 @@ def label_operating_gens(self, gen_df: pd.DataFrame) -> pd.DataFrame:
         )
 
         logger.info(
-            f"Labeled {len(gen_df.loc[~existing_mask])/len(gen_df):.02%} of "
+            f"Labeled {len(gen_df.loc[~existing_mask]) / len(gen_df):.02%} of "
             "generators as non-operative."
         )
         return gen_df
@@ -726,7 +726,9 @@ def add_one_to_many(
             )
         ]
 
-        assert double_df.empty, f"The following record ids have >1 faked part. Double-check these records or move them to the eia_ferc1_null.csv: {one_to_many.loc[one_to_many.gen_id.isin(orig_ids.record_id_eia), 'record_id_ferc1'].drop_duplicates().tolist()}"
+        assert double_df.empty, (
+            f"The following record ids have >1 faked part. Double-check these records or move them to the eia_ferc1_null.csv: {one_to_many.loc[one_to_many.gen_id.isin(orig_ids.record_id_eia), 'record_id_ferc1'].drop_duplicates().tolist()}"
+        )
 
         return pd.concat([plant_parts_eia, part_df])
 

diff --git a/src/pudl/analysis/record_linkage/eia_ferc1_record_linkage.py b/src/pudl/analysis/record_linkage/eia_ferc1_record_linkage.py
@@ -262,9 +262,9 @@ def get_best_matches(
         f"   True positives:  {true_pos}\n"
         f"   False positives: {false_pos}\n"
         f"   False negatives: {false_neg}\n"
-        f"   Precision:       {true_pos/(true_pos + false_pos):.03}\n"
-        f"   Recall:          {true_pos/(true_pos + false_neg):.03}\n"
-        f"   Accuracy:        {true_pos/len(train_df):.03}\n"
+        f"   Precision:       {true_pos / (true_pos + false_pos):.03}\n"
+        f"   Recall:          {true_pos / (true_pos + false_neg):.03}\n"
+        f"   Accuracy:        {true_pos / len(train_df):.03}\n"
         "Precision = of the training data FERC records that the model predicted a match for, this percentage was correct.\n"
         "A measure of accuracy when the model makes a prediction.\n"
         "Recall = of all of the training data FERC records, the model predicted a match for this percentage.\n"

diff --git a/src/pudl/analysis/record_linkage/eia_ferc1_train.py b/src/pudl/analysis/record_linkage/eia_ferc1_train.py
@@ -399,9 +399,9 @@ def _check_id_consistency(
     """
     logger.debug(f"Checking {id_col} consistency for {error_message}")
 
-    assert (
-        len(bad_ids := df[~df[id_col].isin(actual_ids)][id_col].to_list()) == 0
-    ), f"{id_col} {error_message}: {bad_ids}"
+    assert len(bad_ids := df[~df[id_col].isin(actual_ids)][id_col].to_list()) == 0, (
+        f"{id_col} {error_message}: {bad_ids}"
+    )
 
 
 def check_if_already_in_training(training_data, validated_connections):
@@ -511,8 +511,10 @@ def validate_override_fixes(
             ]
         )
         == 0
-    ), f"Found record_id_eia_override_1 duplicates: \
+    ), (
+        f"Found record_id_eia_override_1 duplicates: \
     {override_dups.record_id_eia_override_1.unique()}"
+    )
 
     if not allow_mismatched_utilities:
         # Make sure the EIA utility id from the override matches the PUDL id from the

diff --git a/src/pudl/etl/glue_assets.py b/src/pudl/etl/glue_assets.py
@@ -328,7 +328,7 @@ def core_epa__assn_eia_epacamd_subplant_ids(
     ]
     logger.info(
         "Edited subplant_ids after update_subplant_ids: "
-        f"{len(subplant_id_diff)/len(subplant_ids_updated):.1}%"
+        f"{len(subplant_id_diff) / len(subplant_ids_updated):.1}%"
     )
     # overwrite the subplant ids and apply mannual update
     subplant_ids_updated = (
@@ -456,9 +456,9 @@ def _subplant_ids_from_prepped_crosswalk(prepped: pd.DataFrame) -> pd.DataFrame:
     )
     for i, node_set in enumerate(nx.connected_components(graph)):
         subgraph = graph.subgraph(node_set)
-        assert nx.algorithms.bipartite.is_bipartite(
-            subgraph
-        ), f"non-bipartite: i={i}, node_set={node_set}"
+        assert nx.algorithms.bipartite.is_bipartite(subgraph), (
+            f"non-bipartite: i={i}, node_set={node_set}"
+        )
         nx.set_edge_attributes(subgraph, name="global_subplant_id", values=i)
     return nx.to_pandas_edgelist(graph)
 

diff --git a/src/pudl/extract/extractor.py b/src/pudl/extract/extractor.py
@@ -396,9 +396,9 @@ def partitions_from_settings(context) -> DynamicOutput:
                 for date_partition in ["years", "half_years", "year_quarters"]
             )
         ]
-        assert (
-            len(partition) == 1
-        ), f"Only one working partition is supported: {partition}."
+        assert len(partition) == 1, (
+            f"Only one working partition is supported: {partition}."
+        )
         partition = partition[0]
         parts = getattr(data_settings, partition)  # Get the actual values
         # In Zenodo we use "year", "half_year" as the partition, but in our settings

diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py
@@ -218,7 +218,7 @@ def add_fips_ids(
 
     logger.info(
         f"Assigned state FIPS codes for "
-        f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records."
+        f"{len(df[df.state_id_fips.notnull()]) / len(df):.2%} of records."
     )
     if county_col:
         df["county_id_fips"] = df.apply(
@@ -234,7 +234,7 @@ def add_fips_ids(
         df = df.astype({"county_id_fips": pd.StringDtype()})
         logger.info(
             f"Assigned county FIPS codes for "
-            f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records."
+            f"{len(df[df.county_id_fips.notnull()]) / len(df):.2%} of records."
         )
     return df
 
@@ -1378,7 +1378,7 @@ def zero_pad_numeric_string(col: pd.Series, n_digits: int) -> pd.Series:
         # Replace anything that's not entirely digits with NA
         .replace(r"[^\d]+", pd.NA, regex=True)
         # Set any string longer than n_digits to NA
-        .replace(f"[\\d]{{{n_digits+1},}}", pd.NA, regex=True)
+        .replace(f"[\\d]{{{n_digits + 1},}}", pd.NA, regex=True)
         # Pad the numeric string with leading zeroes to n_digits length
         .str.zfill(n_digits)
         # All-zero ZIP & FIPS codes are invalid.
@@ -1575,7 +1575,7 @@ def standardize_percentages_ratio(
         frac_df.loc[dates, col] /= 100
         if frac_df[col].max() > 1:
             raise AssertionError(
-                f"{col}: Values >100pct observed: {frac_df.loc[frac_df[col]>1][col].unique()}"
+                f"{col}: Values >100pct observed: {frac_df.loc[frac_df[col] > 1][col].unique()}"
             )
     return frac_df
 
@@ -2152,9 +2152,9 @@ def check_tables_have_metadata(
         not bool(value) for value in tables_missing_metadata_results.values()
     )
 
-    assert (
-        has_no_missing_tables_with_missing_metadata
-    ), f"These tables are missing datasette metadata: {tables_missing_metadata_results}"
+    assert has_no_missing_tables_with_missing_metadata, (
+        f"These tables are missing datasette metadata: {tables_missing_metadata_results}"
+    )
 
 
 def retry(

diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py
@@ -606,8 +606,7 @@ def _check_encoder(cls, value, info: ValidationInfo):
         dtype = info.data["type"]
         if dtype not in ["string", "integer"]:
             errors.append(
-                "Encoding only supported for string and integer fields, found "
-                f"{dtype}"
+                f"Encoding only supported for string and integer fields, found {dtype}"
             )
         if errors:
             raise ValueError(format_errors(*errors, pydantic=True))

diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py
@@ -206,8 +206,7 @@
         "title": "EIA Form 861 -- Annual Electric Power Industry Report",
         "path": "https://www.eia.gov/electricity/data/eia861",
         "description": (
-            "EIA Form 861 Annual Electric Power Industry Report, detailed "
-            "data files."
+            "EIA Form 861 Annual Electric Power Industry Report, detailed data files."
         ),
         "field_namespace": "eia",
         "working_partitions": {

diff --git a/src/pudl/output/eia.py b/src/pudl/output/eia.py
@@ -422,7 +422,7 @@ def add_consistent_ba_code_column(plants: pd.DataFrame) -> pd.DataFrame:
     )
     plants_w_ba_codes = plants[plants.balancing_authority_code_eia_consistent.notnull()]
     logger.info(
-        f"{len(plants_w_ba_codes)/len(plants):.1%} of plant records have consistently "
+        f"{len(plants_w_ba_codes) / len(plants):.1%} of plant records have consistently "
         "reported BA Codes"
     )
     return plants
@@ -472,7 +472,7 @@ def log_current_ba_code_nulls(plants: pd.DataFrame, method_str: str) -> None:
         """
         currently_null_len = len(plants[plants.balancing_authority_code_eia.isnull()])
         logger.info(
-            f"{method_str}. {currently_null_len/len(plants):.1%} of records have no BA codes"
+            f"{method_str}. {currently_null_len / len(plants):.1%} of records have no BA codes"
         )
 
     # add a column for each of our backfilling options

diff --git a/src/pudl/transform/classes.py b/src/pudl/transform/classes.py
@@ -748,7 +748,7 @@ def correct_units(df: pd.DataFrame, params: UnitCorrections) -> pd.DataFrame:
     na_after = sum(selected.isna())
     total_nullified = na_after - na_before
     logger.info(
-        f"{total_nullified}/{len(selected)} ({total_nullified/len(selected):.2%}) "
+        f"{total_nullified}/{len(selected)} ({total_nullified / len(selected):.2%}) "
         "of records could not be corrected and were set to NA."
     )
     # Combine our cleaned up values with the other values we didn't select.
@@ -859,7 +859,7 @@ def drop_invalid_rows(df: pd.DataFrame, params: InvalidRows) -> pd.DataFrame:
     # Mask the input dataframe and make a copy to avoid returning a slice.
     df_out = df[mask].copy()
     logger.info(
-        f"{1 - (len(df_out)/pre_drop_len):.1%} of records ({pre_drop_len-len(df_out)} "
+        f"{1 - (len(df_out) / pre_drop_len):.1%} of records ({pre_drop_len - len(df_out)} "
         f"rows) contain only {params.invalid_values} values in required columns. "
         "Dropped these 💩💩💩 records."
     )
@@ -1067,8 +1067,7 @@ def _wrapper(self: AbstractTableTransformer, *args, **kwargs) -> pd.DataFrame:
                 )
             if self.cache_dfs:
                 logger.debug(
-                    f"{self.table_id.value}: Caching df to {key=} "
-                    f"in {func.__name__}()"
+                    f"{self.table_id.value}: Caching df to {key=} in {func.__name__}()"
                 )
                 self._cached_dfs[key] = df.copy()
             return df

diff --git a/src/pudl/transform/eia861.py b/src/pudl/transform/eia861.py
@@ -582,7 +582,7 @@ def add_backfilled_ba_code_column(df, by_cols: list[str]) -> pd.DataFrame:
     start_nas = len(df.loc[df.balancing_authority_code_eia.isnull()])
     logger.info(
         f"Started with {start_nas} missing BA Codes out of {start_len} "
-        f"records ({start_nas/start_len:.2%})"
+        f"records ({start_nas / start_len:.2%})"
     )
     ba_ids = (
         df[by_cols + ["balancing_authority_code_eia", "report_date"]]
@@ -606,7 +606,7 @@ def add_backfilled_ba_code_column(df, by_cols: list[str]) -> pd.DataFrame:
     )
     logger.info(
         f"Ended with {end_nas} missing BA Codes out of {end_len} "
-        f"records ({end_nas/end_len:.2%})"
+        f"records ({end_nas / end_len:.2%})"
     )
     return ba_eia861_filled
 
@@ -768,9 +768,9 @@ def _drop_dupes(df, df_name, subset):
     deduped_df = df.drop_duplicates(subset=subset)
     deduped_nrows = len(df)
     logger.info(
-        f"Dropped {tidy_nrows-deduped_nrows} duplicate records from EIA 861 "
+        f"Dropped {tidy_nrows - deduped_nrows} duplicate records from EIA 861 "
         f"{df_name} table, out of a total of {tidy_nrows} records "
-        f"({(tidy_nrows-deduped_nrows)/tidy_nrows:.4%} of all records). "
+        f"({(tidy_nrows - deduped_nrows) / tidy_nrows:.4%} of all records). "
     )
     return deduped_df
 

diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py
@@ -79,13 +79,13 @@ def _get_plant_nuclear_unit_id_map(nuc_fuel: pd.DataFrame) -> dict[int, str]:
     plant_to_nuc_id = plant_to_nuc_id.explode()
 
     # check there is one nuclear unit per plant.
-    assert (
-        plant_to_nuc_id.index.is_unique
-    ), "Found multiple nuclear units in plant_to_nuc_id mapping."
+    assert plant_to_nuc_id.index.is_unique, (
+        "Found multiple nuclear units in plant_to_nuc_id mapping."
+    )
     # Check there are no missing nuclear unit ids.
-    assert (
-        ~plant_to_nuc_id.isna()
-    ).all(), "Found missing nuclear_unit_ids in plant_to_nuc_id mappings."
+    assert (~plant_to_nuc_id.isna()).all(), (
+        "Found missing nuclear_unit_ids in plant_to_nuc_id mappings."
+    )
 
     plant_to_nuc_id = plant_to_nuc_id.astype("string")
 
@@ -154,13 +154,13 @@ def _get_plant_prime_mover_map(gen_fuel: pd.DataFrame) -> dict[int, str]:
     plant_to_prime_mover = plant_to_prime_mover.explode()
 
     # check there is one prime mover per plant.
-    assert (
-        plant_to_prime_mover.index.is_unique
-    ), "Found multiple plants in plant_to_prime_mover mapping."
+    assert plant_to_prime_mover.index.is_unique, (
+        "Found multiple plants in plant_to_prime_mover mapping."
+    )
     # Check there are no missing prime mover codes.
-    assert (
-        plant_to_prime_mover.notnull()
-    ).all(), "Found missing prime_mover_codes in plant_to_prime_mover mappings."
+    assert (plant_to_prime_mover.notnull()).all(), (
+        "Found missing prime_mover_codes in plant_to_prime_mover mappings."
+    )
 
     return dict(plant_to_prime_mover)
 
@@ -201,9 +201,9 @@ def _backfill_prime_mover_code(gen_fuel: pd.DataFrame) -> pd.DataFrame:
     missing_prime_movers = gen_fuel.prime_mover_code.isna()
     gen_fuel.loc[missing_prime_movers, "prime_mover_code"] = "UNK"
 
-    assert (
-        gen_fuel.prime_mover_code.notna().all()
-    ), "generation_fuel_923.prime_mover_code has missing values after backfill."
+    assert gen_fuel.prime_mover_code.notna().all(), (
+        "generation_fuel_923.prime_mover_code has missing values after backfill."
+    )
     return gen_fuel
 
 

diff --git a/src/pudl/transform/eia_bulk_elec.py b/src/pudl/transform/eia_bulk_elec.py
@@ -77,9 +77,9 @@ def _map_key_codes_to_readable_values(compound_keys: pd.DataFrame) -> pd.DataFra
     }
     for col_name, mapping in mappings.items():
         keys.loc[:, col_name] = keys.loc[:, col_name].map(mapping)
-        assert (
-            keys.loc[:, col_name].notnull().all()
-        ), f"{col_name} contains an unmapped category."
+        assert keys.loc[:, col_name].notnull().all(), (
+            f"{col_name} contains an unmapped category."
+        )
 
     keys = keys.astype("category")
     return keys