Skip to content

Commit

Permalink
Actually fix the thing
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmig committed Oct 20, 2023
1 parent 1eff69c commit 18775d5
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions workflow/scripts/weighted_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def build_ancestor_variant_table(ancestor: Seq, reference: Seq, reference_name:
pos = []
alt = []
for i in range(1, len(ancestor) + 1):
if i not in masked_positions:
if i not in masked_positions and ancestor[i-1] != reference[i-1]:
pos.append(i)
alt.append(reference[i-1])
df = pd.DataFrame({"POS": pos, "ALT": alt})
Expand Down Expand Up @@ -78,9 +78,9 @@ def calc_fst_weir_cockerham(hs: float, ht: float) -> float:
return (ht - hs) / ht if ht != 0 else 0


def build_cache(variant_table: pd.DataFrame, reference: Seq):
def build_cache(variant_table: pd.DataFrame, samples: List[str], reference: Seq):
cache = {"freq": {}, "hz": {}}
for sample_name in variant_table["REGION"].unique():
for sample_name in set(samples):
for position in variant_table["POS"].astype("Int64").unique():
if sample_name not in cache["freq"]:
cache["freq"][sample_name] = {}
Expand Down Expand Up @@ -120,7 +120,7 @@ def calculate_sample_distances(positions: List[int], sample_name: str, samples:

def calculate_distance_matrix(variant_table: pd.DataFrame, samples: List[str], reference: Seq) -> pd.DataFrame:
positions = variant_table["POS"].astype("Int64").unique().tolist()
cache = build_cache(variant_table, reference)
cache = build_cache(variant_table, samples, reference)
distance_matrix = {}
for sample_name in samples:
distance_matrix[sample_name] = calculate_sample_distances(positions, sample_name, samples, cache)
Expand Down

0 comments on commit 18775d5

Please sign in to comment.