Skip to content

Commit

Permalink
Fix #95
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Jan 13, 2025
1 parent fb6afc5 commit 7f7bb09
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 17 deletions.
30 changes: 13 additions & 17 deletions dnachisel/builtin_specifications/UniquifyAllKmers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def extract_kmer(i):
@lru_cache(maxsize=1)
def get_kmer_extractor_cached(sequence, include_reverse_complement=True, k=1):
"""Kmer extractor with memoization.
This globally cached method enables much faster computations when
several UniquifyAllKmers functions with equal k are used.
several UniquifyAllKmers functions with equal k are used.
"""
L = len(sequence)
if include_reverse_complement:
Expand Down Expand Up @@ -89,7 +89,7 @@ class UniquifyAllKmers(Specification):
----------
k
Minimal length of sequences to be considered repeats
reference
The default None indicates that the specification's location should have
no homologies anywhere in the whole sequence. If reference="here", then
Expand Down Expand Up @@ -117,7 +117,7 @@ class UniquifyAllKmers(Specification):

best_possible_score = 0
use_cache = True
shorthand_name = 'all_unique_kmers'
shorthand_name = "all_unique_kmers"

def __init__(
self,
Expand Down Expand Up @@ -176,12 +176,8 @@ def local_evaluation(self, problem):
nonunique_locations += indices
location_variable_kmers = set(variable_kmers["location"].keys())
extended_variable_kmers = set(variable_kmers["extended"].keys())
fixed_location_kmers = self.localization_data["location"][
"fixed_kmers"
]
extended_fixed_kmers = self.localization_data["extended"][
"fixed_kmers"
]
fixed_location_kmers = self.localization_data["location"]["fixed_kmers"]
extended_fixed_kmers = self.localization_data["extended"]["fixed_kmers"]

for c in [
extended_variable_kmers,
Expand All @@ -200,9 +196,7 @@ def local_evaluation(self, problem):
for kmer in extended_variable_kmers.intersection(c)
for i in variable_kmers["extended"][kmer]
]
nonunique_locations = [
Location(i, i + self.k) for i in nonunique_locations
]
nonunique_locations = [Location(i, i + self.k) for i in nonunique_locations]
return SpecEvaluation(
self,
problem,
Expand All @@ -227,7 +221,7 @@ def global_evaluation(self, problem):
extract_kmer = self.get_kmer_extractor(problem.sequence)
kmers_locations = defaultdict(lambda: [])
start, end = self.reference.start, self.reference.end
for i in range(start, end - self.k):
for i in range(start, end - self.k + 1):
location = (i, i + self.k)
kmer_sequence = extract_kmer(i)
kmers_locations[kmer_sequence].append(location)
Expand All @@ -238,7 +232,7 @@ def global_evaluation(self, problem):
for locations_list in kmers_locations.values()
for start_, end_ in locations_list
if len(locations_list) > 1
and (self.location.start <= start_ < end_ < self.location.end)
and (self.location.start <= start_ < end_ <= self.location.end)
],
key=lambda l: l.start,
)
Expand Down Expand Up @@ -299,14 +293,16 @@ def shifted(self, shift):
new_location = None if self.location is None else self.location + shift
reference = None if self.reference is None else self.reference + shift
return self.copy_with_changes(
location=new_location, reference=reference, derived_from=self,
location=new_location,
reference=reference,
derived_from=self,
)

def label_parameters(self):
return [("k", str(self.k))]

def short_label(self):
return "All %dbp unique" % self.k

def breach_label(self):
return "%dbp homologies" % self.k
10 changes: 10 additions & 0 deletions tests/builtin_specifications/test_UniquifyAllKmers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
)
import numpy


# Note: we are not providing a location for AvoidChanges: it applies globally
def test_UniquifyAllKmers_as_constraint():
numpy.random.seed(123)
Expand Down Expand Up @@ -55,3 +56,12 @@ def test_UniquifyAllKmers_include_reverse_complement_false():
constraint = UniquifyAllKmers(10, include_reverse_complement=False)
problem = DnaOptimizationProblem(sequence=40 * "A", constraints=[constraint])
problem.constraints_text_summary()


# issue 95
def test_UniquifyAllKmers_at_ends():
sequence = "AGTTC" + "CCGGTC" + "AGTTC"
problem = DnaOptimizationProblem(
sequence=sequence, constraints=[UniquifyAllKmers(k=5)]
)
assert not problem.all_constraints_pass()

0 comments on commit 7f7bb09

Please sign in to comment.