diff --git a/workflow/scripts/weighted_distances.py b/workflow/scripts/weighted_distances.py index 27dbd8f..2091ed9 100644 --- a/workflow/scripts/weighted_distances.py +++ b/workflow/scripts/weighted_distances.py @@ -78,9 +78,9 @@ def calc_fst_weir_cockerham(hs: float, ht: float) -> float: return (ht - hs) / ht if ht != 0 else 0 -def build_cache(variant_table: pd.DataFrame, reference: Seq): +def build_cache(variant_table: pd.DataFrame, samples: List[str], reference: Seq): cache = {"freq": {}, "hz": {}} - for sample_name in variant_table["REGION"].unique(): + for sample_name in set(samples): for position in variant_table["POS"].astype("Int64").unique(): if sample_name not in cache["freq"]: cache["freq"][sample_name] = {} @@ -120,7 +120,7 @@ def calculate_sample_distances(positions: List[int], sample_name: str, samples: def calculate_distance_matrix(variant_table: pd.DataFrame, samples: List[str], reference: Seq) -> pd.DataFrame: positions = variant_table["POS"].astype("Int64").unique().tolist() - cache = build_cache(variant_table, reference) + cache = build_cache(variant_table, samples, reference) distance_matrix = {} for sample_name in samples: distance_matrix[sample_name] = calculate_sample_distances(positions, sample_name, samples, cache)