Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
For more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 13, 2025
1 parent 7e1c3b5 commit ae1ed21
Show file tree
Hide file tree
Showing 28 changed files with 123 additions and 122 deletions.
10 changes: 5 additions & 5 deletions src/pudl/analysis/allocate_gen_fuel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ def _allocate_unassociated_pm_records(
)
logger.info(
f"Associating and allocating {len(eia_generators_unassociated)} "
f"({len(eia_generators_unassociated)/len(gen_assoc):.1%}) records with "
f"({len(eia_generators_unassociated) / len(gen_assoc):.1%}) records with "
f"unexpected {col_w_unexpected_codes}."
)

Expand Down Expand Up @@ -1579,7 +1579,7 @@ def assign_plant_year(df):
]

logger.info(
f"Distributing {len(annual_reporters)/len(reporters):.1%} annually reported"
f"Distributing {len(annual_reporters) / len(reporters):.1%} annually reported"
" records to months."
)
# first convert the december month to january bc expand_timeseries expands from
Expand Down Expand Up @@ -1965,7 +1965,7 @@ def calc_net_gen_diff(gen_pm_fuel, idx):
& (gen_pm_fuel_test.net_generation_mwh_diff.notnull())
]
logger.info(
f"{len(bad_diff)/len(gen_pm_fuel):.03%} of records have are partially "
f"{len(bad_diff) / len(gen_pm_fuel):.03%} of records have are partially "
"off from their 'IDX_PM_ESC' group"
)
no_cap_gen = gen_pm_fuel_test[
Expand All @@ -1978,11 +1978,11 @@ def calc_net_gen_diff(gen_pm_fuel, idx):
fuel_net_gen = gf[gf.plant_id_eia != "99999"].net_generation_mwh.sum()
logger.info(
"gen v fuel table net gen diff: "
f"{(gen.net_generation_mwh.sum())/fuel_net_gen:.1%}"
f"{(gen.net_generation_mwh.sum()) / fuel_net_gen:.1%}"
)
logger.info(
"new v fuel table net gen diff: "
f"{(gen_pm_fuel_test.net_generation_mwh.sum())/fuel_net_gen:.1%}"
f"{(gen_pm_fuel_test.net_generation_mwh.sum()) / fuel_net_gen:.1%}"
)

gen_pm_fuel_test = gen_pm_fuel_test.drop(
Expand Down
6 changes: 4 additions & 2 deletions src/pudl/analysis/plant_parts_eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def label_operating_gens(self, gen_df: pd.DataFrame) -> pd.DataFrame:
)

logger.info(
f"Labeled {len(gen_df.loc[~existing_mask])/len(gen_df):.02%} of "
f"Labeled {len(gen_df.loc[~existing_mask]) / len(gen_df):.02%} of "
"generators as non-operative."
)
return gen_df
Expand Down Expand Up @@ -726,7 +726,9 @@ def add_one_to_many(
)
]

assert double_df.empty, f"The following record ids have >1 faked part. Double-check these records or move them to the eia_ferc1_null.csv: {one_to_many.loc[one_to_many.gen_id.isin(orig_ids.record_id_eia), 'record_id_ferc1'].drop_duplicates().tolist()}"
assert double_df.empty, (
f"The following record ids have >1 faked part. Double-check these records or move them to the eia_ferc1_null.csv: {one_to_many.loc[one_to_many.gen_id.isin(orig_ids.record_id_eia), 'record_id_ferc1'].drop_duplicates().tolist()}"
)

return pd.concat([plant_parts_eia, part_df])

Expand Down
6 changes: 3 additions & 3 deletions src/pudl/analysis/record_linkage/eia_ferc1_record_linkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,9 @@ def get_best_matches(
f" True positives: {true_pos}\n"
f" False positives: {false_pos}\n"
f" False negatives: {false_neg}\n"
f" Precision: {true_pos/(true_pos + false_pos):.03}\n"
f" Recall: {true_pos/(true_pos + false_neg):.03}\n"
f" Accuracy: {true_pos/len(train_df):.03}\n"
f" Precision: {true_pos / (true_pos + false_pos):.03}\n"
f" Recall: {true_pos / (true_pos + false_neg):.03}\n"
f" Accuracy: {true_pos / len(train_df):.03}\n"
"Precision = of the training data FERC records that the model predicted a match for, this percentage was correct.\n"
"A measure of accuracy when the model makes a prediction.\n"
"Recall = of all of the training data FERC records, the model predicted a match for this percentage.\n"
Expand Down
10 changes: 6 additions & 4 deletions src/pudl/analysis/record_linkage/eia_ferc1_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,9 @@ def _check_id_consistency(
"""
logger.debug(f"Checking {id_col} consistency for {error_message}")

assert (
len(bad_ids := df[~df[id_col].isin(actual_ids)][id_col].to_list()) == 0
), f"{id_col} {error_message}: {bad_ids}"
assert len(bad_ids := df[~df[id_col].isin(actual_ids)][id_col].to_list()) == 0, (
f"{id_col} {error_message}: {bad_ids}"
)


def check_if_already_in_training(training_data, validated_connections):
Expand Down Expand Up @@ -511,8 +511,10 @@ def validate_override_fixes(
]
)
== 0
), f"Found record_id_eia_override_1 duplicates: \
), (
f"Found record_id_eia_override_1 duplicates: \
{override_dups.record_id_eia_override_1.unique()}"
)

if not allow_mismatched_utilities:
# Make sure the EIA utility id from the override matches the PUDL id from the
Expand Down
8 changes: 4 additions & 4 deletions src/pudl/etl/glue_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def core_epa__assn_eia_epacamd_subplant_ids(
]
logger.info(
"Edited subplant_ids after update_subplant_ids: "
f"{len(subplant_id_diff)/len(subplant_ids_updated):.1}%"
f"{len(subplant_id_diff) / len(subplant_ids_updated):.1}%"
)
# overwrite the subplant ids and apply mannual update
subplant_ids_updated = (
Expand Down Expand Up @@ -456,9 +456,9 @@ def _subplant_ids_from_prepped_crosswalk(prepped: pd.DataFrame) -> pd.DataFrame:
)
for i, node_set in enumerate(nx.connected_components(graph)):
subgraph = graph.subgraph(node_set)
assert nx.algorithms.bipartite.is_bipartite(
subgraph
), f"non-bipartite: i={i}, node_set={node_set}"
assert nx.algorithms.bipartite.is_bipartite(subgraph), (
f"non-bipartite: i={i}, node_set={node_set}"
)
nx.set_edge_attributes(subgraph, name="global_subplant_id", values=i)
return nx.to_pandas_edgelist(graph)

Expand Down
6 changes: 3 additions & 3 deletions src/pudl/extract/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,9 @@ def partitions_from_settings(context) -> DynamicOutput:
for date_partition in ["years", "half_years", "year_quarters"]
)
]
assert (
len(partition) == 1
), f"Only one working partition is supported: {partition}."
assert len(partition) == 1, (
f"Only one working partition is supported: {partition}."
)
partition = partition[0]
parts = getattr(data_settings, partition) # Get the actual values
# In Zenodo we use "year", "half_year" as the partition, but in our settings
Expand Down
14 changes: 7 additions & 7 deletions src/pudl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def add_fips_ids(

logger.info(
f"Assigned state FIPS codes for "
f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records."
f"{len(df[df.state_id_fips.notnull()]) / len(df):.2%} of records."
)
if county_col:
df["county_id_fips"] = df.apply(
Expand All @@ -234,7 +234,7 @@ def add_fips_ids(
df = df.astype({"county_id_fips": pd.StringDtype()})
logger.info(
f"Assigned county FIPS codes for "
f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records."
f"{len(df[df.county_id_fips.notnull()]) / len(df):.2%} of records."
)
return df

Expand Down Expand Up @@ -1378,7 +1378,7 @@ def zero_pad_numeric_string(col: pd.Series, n_digits: int) -> pd.Series:
# Replace anything that's not entirely digits with NA
.replace(r"[^\d]+", pd.NA, regex=True)
# Set any string longer than n_digits to NA
.replace(f"[\\d]{{{n_digits+1},}}", pd.NA, regex=True)
.replace(f"[\\d]{{{n_digits + 1},}}", pd.NA, regex=True)
# Pad the numeric string with leading zeroes to n_digits length
.str.zfill(n_digits)
# All-zero ZIP & FIPS codes are invalid.
Expand Down Expand Up @@ -1575,7 +1575,7 @@ def standardize_percentages_ratio(
frac_df.loc[dates, col] /= 100
if frac_df[col].max() > 1:
raise AssertionError(
f"{col}: Values >100pct observed: {frac_df.loc[frac_df[col]>1][col].unique()}"
f"{col}: Values >100pct observed: {frac_df.loc[frac_df[col] > 1][col].unique()}"
)
return frac_df

Expand Down Expand Up @@ -2152,9 +2152,9 @@ def check_tables_have_metadata(
not bool(value) for value in tables_missing_metadata_results.values()
)

assert (
has_no_missing_tables_with_missing_metadata
), f"These tables are missing datasette metadata: {tables_missing_metadata_results}"
assert has_no_missing_tables_with_missing_metadata, (
f"These tables are missing datasette metadata: {tables_missing_metadata_results}"
)


def retry(
Expand Down
3 changes: 1 addition & 2 deletions src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,7 @@ def _check_encoder(cls, value, info: ValidationInfo):
dtype = info.data["type"]
if dtype not in ["string", "integer"]:
errors.append(
"Encoding only supported for string and integer fields, found "
f"{dtype}"
f"Encoding only supported for string and integer fields, found {dtype}"
)
if errors:
raise ValueError(format_errors(*errors, pydantic=True))
Expand Down
3 changes: 1 addition & 2 deletions src/pudl/metadata/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,7 @@
"title": "EIA Form 861 -- Annual Electric Power Industry Report",
"path": "https://www.eia.gov/electricity/data/eia861",
"description": (
"EIA Form 861 Annual Electric Power Industry Report, detailed "
"data files."
"EIA Form 861 Annual Electric Power Industry Report, detailed data files."
),
"field_namespace": "eia",
"working_partitions": {
Expand Down
4 changes: 2 additions & 2 deletions src/pudl/output/eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def add_consistent_ba_code_column(plants: pd.DataFrame) -> pd.DataFrame:
)
plants_w_ba_codes = plants[plants.balancing_authority_code_eia_consistent.notnull()]
logger.info(
f"{len(plants_w_ba_codes)/len(plants):.1%} of plant records have consistently "
f"{len(plants_w_ba_codes) / len(plants):.1%} of plant records have consistently "
"reported BA Codes"
)
return plants
Expand Down Expand Up @@ -472,7 +472,7 @@ def log_current_ba_code_nulls(plants: pd.DataFrame, method_str: str) -> None:
"""
currently_null_len = len(plants[plants.balancing_authority_code_eia.isnull()])
logger.info(
f"{method_str}. {currently_null_len/len(plants):.1%} of records have no BA codes"
f"{method_str}. {currently_null_len / len(plants):.1%} of records have no BA codes"
)

# add a column for each of our backfilling options
Expand Down
7 changes: 3 additions & 4 deletions src/pudl/transform/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def correct_units(df: pd.DataFrame, params: UnitCorrections) -> pd.DataFrame:
na_after = sum(selected.isna())
total_nullified = na_after - na_before
logger.info(
f"{total_nullified}/{len(selected)} ({total_nullified/len(selected):.2%}) "
f"{total_nullified}/{len(selected)} ({total_nullified / len(selected):.2%}) "
"of records could not be corrected and were set to NA."
)
# Combine our cleaned up values with the other values we didn't select.
Expand Down Expand Up @@ -859,7 +859,7 @@ def drop_invalid_rows(df: pd.DataFrame, params: InvalidRows) -> pd.DataFrame:
# Mask the input dataframe and make a copy to avoid returning a slice.
df_out = df[mask].copy()
logger.info(
f"{1 - (len(df_out)/pre_drop_len):.1%} of records ({pre_drop_len-len(df_out)} "
f"{1 - (len(df_out) / pre_drop_len):.1%} of records ({pre_drop_len - len(df_out)} "
f"rows) contain only {params.invalid_values} values in required columns. "
"Dropped these 💩💩💩 records."
)
Expand Down Expand Up @@ -1067,8 +1067,7 @@ def _wrapper(self: AbstractTableTransformer, *args, **kwargs) -> pd.DataFrame:
)
if self.cache_dfs:
logger.debug(
f"{self.table_id.value}: Caching df to {key=} "
f"in {func.__name__}()"
f"{self.table_id.value}: Caching df to {key=} in {func.__name__}()"
)
self._cached_dfs[key] = df.copy()
return df
Expand Down
8 changes: 4 additions & 4 deletions src/pudl/transform/eia861.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def add_backfilled_ba_code_column(df, by_cols: list[str]) -> pd.DataFrame:
start_nas = len(df.loc[df.balancing_authority_code_eia.isnull()])
logger.info(
f"Started with {start_nas} missing BA Codes out of {start_len} "
f"records ({start_nas/start_len:.2%})"
f"records ({start_nas / start_len:.2%})"
)
ba_ids = (
df[by_cols + ["balancing_authority_code_eia", "report_date"]]
Expand All @@ -606,7 +606,7 @@ def add_backfilled_ba_code_column(df, by_cols: list[str]) -> pd.DataFrame:
)
logger.info(
f"Ended with {end_nas} missing BA Codes out of {end_len} "
f"records ({end_nas/end_len:.2%})"
f"records ({end_nas / end_len:.2%})"
)
return ba_eia861_filled

Expand Down Expand Up @@ -768,9 +768,9 @@ def _drop_dupes(df, df_name, subset):
deduped_df = df.drop_duplicates(subset=subset)
deduped_nrows = len(df)
logger.info(
f"Dropped {tidy_nrows-deduped_nrows} duplicate records from EIA 861 "
f"Dropped {tidy_nrows - deduped_nrows} duplicate records from EIA 861 "
f"{df_name} table, out of a total of {tidy_nrows} records "
f"({(tidy_nrows-deduped_nrows)/tidy_nrows:.4%} of all records). "
f"({(tidy_nrows - deduped_nrows) / tidy_nrows:.4%} of all records). "
)
return deduped_df

Expand Down
30 changes: 15 additions & 15 deletions src/pudl/transform/eia923.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ def _get_plant_nuclear_unit_id_map(nuc_fuel: pd.DataFrame) -> dict[int, str]:
plant_to_nuc_id = plant_to_nuc_id.explode()

# check there is one nuclear unit per plant.
assert (
plant_to_nuc_id.index.is_unique
), "Found multiple nuclear units in plant_to_nuc_id mapping."
assert plant_to_nuc_id.index.is_unique, (
"Found multiple nuclear units in plant_to_nuc_id mapping."
)
# Check there are no missing nuclear unit ids.
assert (
~plant_to_nuc_id.isna()
).all(), "Found missing nuclear_unit_ids in plant_to_nuc_id mappings."
assert (~plant_to_nuc_id.isna()).all(), (
"Found missing nuclear_unit_ids in plant_to_nuc_id mappings."
)

plant_to_nuc_id = plant_to_nuc_id.astype("string")

Expand Down Expand Up @@ -154,13 +154,13 @@ def _get_plant_prime_mover_map(gen_fuel: pd.DataFrame) -> dict[int, str]:
plant_to_prime_mover = plant_to_prime_mover.explode()

# check there is one prime mover per plant.
assert (
plant_to_prime_mover.index.is_unique
), "Found multiple plants in plant_to_prime_mover mapping."
assert plant_to_prime_mover.index.is_unique, (
"Found multiple plants in plant_to_prime_mover mapping."
)
# Check there are no missing prime mover codes.
assert (
plant_to_prime_mover.notnull()
).all(), "Found missing prime_mover_codes in plant_to_prime_mover mappings."
assert (plant_to_prime_mover.notnull()).all(), (
"Found missing prime_mover_codes in plant_to_prime_mover mappings."
)

return dict(plant_to_prime_mover)

Expand Down Expand Up @@ -201,9 +201,9 @@ def _backfill_prime_mover_code(gen_fuel: pd.DataFrame) -> pd.DataFrame:
missing_prime_movers = gen_fuel.prime_mover_code.isna()
gen_fuel.loc[missing_prime_movers, "prime_mover_code"] = "UNK"

assert (
gen_fuel.prime_mover_code.notna().all()
), "generation_fuel_923.prime_mover_code has missing values after backfill."
assert gen_fuel.prime_mover_code.notna().all(), (
"generation_fuel_923.prime_mover_code has missing values after backfill."
)
return gen_fuel


Expand Down
6 changes: 3 additions & 3 deletions src/pudl/transform/eia_bulk_elec.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ def _map_key_codes_to_readable_values(compound_keys: pd.DataFrame) -> pd.DataFra
}
for col_name, mapping in mappings.items():
keys.loc[:, col_name] = keys.loc[:, col_name].map(mapping)
assert (
keys.loc[:, col_name].notnull().all()
), f"{col_name} contains an unmapped category."
assert keys.loc[:, col_name].notnull().all(), (
f"{col_name} contains an unmapped category."
)

keys = keys.astype("category")
return keys
Expand Down
Loading

0 comments on commit ae1ed21

Please sign in to comment.