Skip to content

Commit

Permalink
feat(repeats): improvements in repeat check.
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasprlic committed Sep 19, 2024
1 parent bb7eb3c commit fed5af4
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions src/hgvs/repeats.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, fs: VariantCoords, reverse:bool=False) -> None:

if len(self.repeat_units_ref) == 0 and len(self.repeat_units_alt) ==0 :
return

# check longest repeat blocks:
# we only look at ref to determine if there are repeats
# If ref has no repeat, we don't call this a repeat variant, even if alt would have a repetitive unit
Expand Down Expand Up @@ -98,9 +98,8 @@ def detect_repetitive_block_lengths(sequence: str, longest_ref_unit:RepeatUnit|N
ru = RepeatUnit(repeat_count, longest_ref_unit.repeat_unit, len(block), block)
result.append(ru)
shuffleable_bases = sequence[repeat_count*len(ru.repeat_unit):]
for b in shuffleable_bases:
ru = RepeatUnit(1, b, 1, b)
result.append(ru)
rus = detect_repetitive_block_lengths(shuffleable_bases, reverse = reverse)
result.extend(rus)
return result


Expand Down Expand Up @@ -236,9 +235,16 @@ def assemble_repeat_string(sequence: str, repeat_units: list[RepeatUnit], revers
break
if not found_unit:
# remove one character if no matching repeat unit is found
count = 1
seq_char = seq[0]
seq = seq[:-1]
return_str = f"{seq_char}[1]" + return_str

# count consecutive repeating chars from the end
while seq and seq[-1] == seq_char:
count += 1
seq = seq[:-1]

return_str = f"{seq_char}[{count}]" + return_str

else: # forward direction
while len(seq) > 0:
Expand All @@ -251,9 +257,16 @@ def assemble_repeat_string(sequence: str, repeat_units: list[RepeatUnit], revers
break
if not found_unit:
# remove one character if no matching repeat unit is found
count = 1
seq_char = seq[0]
seq = seq[1:]
return_str += f"{seq_char}[1]"

# count consecutive repeating chars
while seq and seq[0] == seq_char:
count += 1
seq = seq[1:]

return_str += f"{seq_char}[{count}]"


return return_str
Expand Down

0 comments on commit fed5af4

Please sign in to comment.