From b4f859a3b8de0abb8de7d4e10b843ee5c3b55c3a Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:22 -0500 Subject: [PATCH 1/8] implement `double_runs` property --- opr/primer.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/opr/primer.py b/opr/primer.py index 8d0ccbf..21c2f4a 100644 --- a/opr/primer.py +++ b/opr/primer.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- """OPR primer.""" +import itertools from enum import Enum from warnings import warn from .errors import OPRBaseError @@ -41,6 +42,7 @@ def __init__(self, sequence): self._gc_content = None self._gc_clamp = None self._single_runs = None + self._double_runs = None self._melting_temperature = { MeltingTemperature.BASIC: None, MeltingTemperature.SALT_ADJUSTED: None, @@ -213,6 +215,28 @@ def single_runs(self): self._single_runs[base] = single_run_length(self._sequence, base) return self._single_runs + @property + def double_runs(self): + """ + Calculate Double Runs of the primer. + + It refers to how many times each 2-base pairs occurs consecutively in the primer. + + :return: Dictionary of double runs (2-base pairs) and their counts in the primer + """ + if self._double_runs is None: + bases = ['A', 'T', 'G', 'C'] + pairs = [''.join(pair) for pair in itertools.product(bases, repeat=2) if pair[0] != pair[1]] + counts = {pair: 0 for pair in pairs} + for i in range(len(self.sequence) - 1): + if self.sequence[i:i+2] in counts: + counts[self.sequence[i:i+2]] += 1 + for pair in counts: + if counts[pair] == 1: + counts[pair] = 0 + self._double_runs = counts + return self._double_runs + def melting_temperature(self, method=MeltingTemperature.BASIC): """ Calculate(if needed) the melting temperature. From 8420d43da30d970edcaa023c1854d97ba88ebe19 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:34 -0500 Subject: [PATCH 2/8] add testcase for double_runs --- tests/test_calculations.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_calculations.py b/tests/test_calculations.py index ebbe542..2fb2f03 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -66,3 +66,13 @@ def test_single_runs_3(): # Reference: https://www.oligoevaluator.com/OligoCalc oprimer = Primer("AAAAATTCGGGGATCCCCG") runs = oprimer.single_runs assert runs['A'] == 5 and runs['T'] == 2 and runs['C'] == 4 and runs['G'] == 4 + + +def test_double_runs(): + p1 = Primer("ATATCGAACACACACACA") + double_runs = p1.double_runs + print(double_runs) + true_answer = {'GT': 0, 'CA': 5, 'AT': 2, 'TA': 0, 'GC': 0, 'GA': 0, 'AG': 0, 'TG': 0, 'CG': 0, 'TC': 0, 'AC': 5, 'CT': 0} + assert len(true_answer) == len(double_runs) and all(double_runs[pair] == true_answer[pair] for pair in double_runs) + + From 295306136b40f85796c615f329a69e0723971964 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:44 -0500 Subject: [PATCH 3/8] implement `repeats` method --- opr/primer.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/opr/primer.py b/opr/primer.py index 21c2f4a..67d5e42 100644 --- a/opr/primer.py +++ b/opr/primer.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- """OPR primer.""" +import re import itertools from enum import Enum from warnings import warn @@ -237,6 +238,26 @@ def double_runs(self): self._double_runs = counts return self._double_runs + def repeats(self, sequence, consecutive=False): + """ + Count occurrences of a subsequence in a given sequence. + + :param sequence: The sequence to search within. + :type sequence: str + :param consecutive: Whether to count only consecutive repeats. + :type consecutive: bool + :return: The count of occurrences. + """ + if consecutive: + pattern = f"(?:{re.escape(sequence)})+" + matches = re.findall(f"({pattern})+", self.sequence) + result = max((len(match) // len(sequence) for match in matches), default=0) + if result == 1: + result = 0 + return result + else: + return self.sequence.count(sequence) + def melting_temperature(self, method=MeltingTemperature.BASIC): """ Calculate(if needed) the melting temperature. From f7f803c20a6292f643a2e561ad5c5b1b6fe288ce Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:49 -0500 Subject: [PATCH 4/8] add testcase for repeats function --- tests/test_calculations.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_calculations.py b/tests/test_calculations.py index 2fb2f03..fe4bf71 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -76,3 +76,25 @@ def test_double_runs(): assert len(true_answer) == len(double_runs) and all(double_runs[pair] == true_answer[pair] for pair in double_runs) +def test_repeats_1(): + p = Primer("ATCG") + assert ( + p.repeats(sequence="A", consecutive=False) == 1 and + p.repeats(sequence="AT", consecutive=False) == 1 and + p.repeats(sequence="AC", consecutive=False) == 0 and + p.repeats(sequence="A", consecutive=True) == 0 and + p.repeats(sequence="AT", consecutive=True) == 0 + ) + + +def test_repeats_2(): + p = Primer("AAAATCGTGT") + assert ( + p.repeats(sequence="AA", consecutive=True) == 2 and + p.repeats(sequence="GT", consecutive=True) == 2 + ) + + +def test_repeats_3(): + p = Primer("ATCGATCGATCG") + assert p.repeats(sequence="ATCG", consecutive=True) == 3 From a758119023260aa21590f95a5ac31a7d5662903f Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:54 -0500 Subject: [PATCH 5/8] `CHANGELOG.md` updated --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 916d635..e50649e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- `double_runs` property +- `repeats` method +### Changed ## [0.2] - 2025-01-09 ### Added - `__eq__` overload From 1b9a842be018613bc3f7941feeec4549da083c02 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Tue, 14 Jan 2025 22:00:58 -0500 Subject: [PATCH 6/8] `README.md` updated --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 62a4ee9..56fc97a 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,20 @@ >>> primer1.single_runs {'A': 2, 'T': 0, 'C': 0, 'G': 2} ``` +#### Double run length +```pycon +>>> primer1.double_runs +{'AT': 0, 'AG': 4, 'AC': 0, 'TA': 0, 'TG': 0, 'TC': 0, 'GA': 5, 'GT': 0, 'GC': 0, 'CA': 0, 'CT': 0, 'CG': 0} +``` +#### Repeats +```pycon +>>> primer1.repeats(sequence="GG", consecutive=False) +4 +``` +```pycon +>>> primer1.repeats(sequence="GG", consecutive=True) +0 +``` #### Melting temperature ```pycon >>> primer1.melting_temperature() From e4230fd7f23ab8a2e5dacdaeaa9ee1ac25cefa47 Mon Sep 17 00:00:00 2001 From: AHReccese Date: Fri, 17 Jan 2025 19:02:18 -0500 Subject: [PATCH 7/8] use `VALID_BASES` and refactor `double_runs` by using `repeats` method --- opr/primer.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/opr/primer.py b/opr/primer.py index 0ab5efa..de44382 100644 --- a/opr/primer.py +++ b/opr/primer.py @@ -238,15 +238,10 @@ def double_runs(self): :return: Dictionary of double runs (2-base pairs) and their counts in the primer """ if self._double_runs is None: - bases = ['A', 'T', 'G', 'C'] - pairs = [''.join(pair) for pair in itertools.product(bases, repeat=2) if pair[0] != pair[1]] + pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]] counts = {pair: 0 for pair in pairs} - for i in range(len(self.sequence) - 1): - if self.sequence[i:i+2] in counts: - counts[self.sequence[i:i+2]] += 1 for pair in counts: - if counts[pair] == 1: - counts[pair] = 0 + counts[pair] = self.repeats(pair, consecutive=True) self._double_runs = counts return self._double_runs From 2163746f7ff96212282bab1edc5d45c80a32cddd Mon Sep 17 00:00:00 2001 From: AHReccese Date: Sun, 19 Jan 2025 20:29:04 -0500 Subject: [PATCH 8/8] add doube_runs to cache test --- tests/test_cache.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_cache.py b/tests/test_cache.py index d1edff9..46f08f3 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,3 +1,5 @@ +import itertools +from opr.params import VALID_BASES from opr import Primer, MeltingTemperature TEST_CASE_NAME = "Cache tests" @@ -32,3 +34,13 @@ def test_single_runs(): runs = oprimer.single_runs assert oprimer.single_runs['A'] == runs['A'] and oprimer.single_runs['T'] == runs[ 'T'] and oprimer.single_runs['C'] == runs['C'] and oprimer.single_runs['G'] == runs['G'] + + +def test_double_runs(): + p1 = Primer("ATATCGAACACACACACA") + double_runs = p1.double_runs + pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]] + double_runs_2nd = {} + for pair in pairs: + double_runs_2nd[pair] = p1.double_runs[pair] + assert len(double_runs_2nd) == len(double_runs) and all(double_runs[pair] == double_runs_2nd[pair] for pair in double_runs)