diff --git a/prymer/offtarget/offtarget_detector.py b/prymer/offtarget/offtarget_detector.py index 11ad2af..592eb30 100644 --- a/prymer/offtarget/offtarget_detector.py +++ b/prymer/offtarget/offtarget_detector.py @@ -104,6 +104,9 @@ ReferenceName: TypeAlias = str """Alias for a reference sequence name.""" +MINIMUM_THREE_PRIME_REGION_LENGTH: int = 8 +"""Minimum allowable seed length for the 3' region.""" + @dataclass(init=True, frozen=True) class OffTargetResult: @@ -157,7 +160,7 @@ class OffTargetDetector(AbstractContextManager): alignments does not exceed the specified maximum number of primer pair hits. """ - def __init__( + def __init__( # noqa: C901 self, ref: Path, max_primer_hits: int, @@ -189,33 +192,39 @@ def __init__( 3. Checking of primer pairs: `max_primer_hits`, `min_primer_pair_hits`, `max_primer_pair_hits`, and `max_amplicon_size`. + The `three_prime_region_length` parameter is used as the seed length for `bwa aln`. + Args: ref: the reference genome fasta file (must be indexed with BWA) max_primer_hits: the maximum number of hits an individual primer can have in the genome before it is considered an invalid primer, and all primer pairs containing the - primer failed. + primer failed. Must be greater than or equal to 0. max_primer_pair_hits: the maximum number of amplicons a primer pair can make and be - considered passing + considered passing. Must be greater than or equal to 0. min_primer_pair_hits: The minimum number of amplicons a primer pair can make and be considered passing. (In most cases, this is the number of amplicons a primer pair is expected to generate.) The default is 1, which is appropriate when the primer pair is being evaluated for off-target hits against the same reference genome from which the primers were generated. If the primer pair was generated from a different - reference sequence, it may be appropriate to set this value to 0. + reference sequence, it may be appropriate to set this value to 0. Must be greater + than or equal to 0. three_prime_region_length: the number of bases at the 3' end of the primer in which the - parameter max_mismatches_in_three_prime_region is evaluated + parameter `max_mismatches_in_three_prime_region` is evaluated. This value is used as + the seed length (`bwa aln -l`). Must be a minimum of 8. max_mismatches_in_three_prime_region: the maximum number of mismatches that are tolerated in the three prime region of each primer defined by - three_prime_region_length + `three_prime_region_length`. Must be between 0 and `three_prime_region_length`, + inclusive. max_mismatches: the maximum number of mismatches allowed in the full length primer - (including any in the three prime region) + (including any in the three prime region). Must be greater than or equal to 0. max_gap_opens: the maximum number of gaps (insertions or deletions) allowable in an - alignment of a oligo to the reference + alignment of a oligo to the reference. Must be greater than or equal to 0. max_gap_extends: the maximum number of gap extensions allowed; extending a gap beyond a single base costs 1 gap extension. Can be set to -1 to allow unlimited extensions up to max diffs (aka max mismatches), while disallowing - "long gaps". - max_amplicon_size: the maximum amplicon size to consider amplifiable + "long gaps". Must be greater than or equal to -1. + max_amplicon_size: the maximum amplicon size to consider amplifiable. Must be greater + than 0. cache_results: if True, cache results for faster re-querying threads: the number of threads to use when invoking bwa keep_spans: if True, [[OffTargetResult]] objects will be reported with amplicon spans @@ -223,7 +232,66 @@ def __init__( keep_primer_spans: if True, [[OffTargetResult]] objects will be reported with left and right primer spans executable: string or Path representation of the `bwa` executable path + + Raises: + ValueError: If `max_amplicon_size` is not greater than 0. + ValueError: If any of `max_primer_hits`, `max_primer_pair_hits`, or + `min_primer_pair_hits` are not greater than or equal to 0. + ValueError: If `three_prime_region_length` is not greater than or equal to 8. + ValueError: If `max_mismatches_in_three_prime_region` is outside the range 0 to + `three_prime_region_length`, inclusive. + ValueError: If `max_mismatches` is not greater than or equal to 0. + ValueError: If `max_gap_opens` is not greater than or equal to 0. + ValueError: If `max_gap_extends` is not -1 or greater than or equal to 0. """ + errors: list[str] = [] + if max_amplicon_size < 1: + errors.append(f"'max_amplicon_size' must be greater than 0. Saw {max_amplicon_size}") + if max_primer_hits < 0: + errors.append( + f"'max_primer_hits' must be greater than or equal to 0. Saw {max_primer_hits}" + ) + if max_primer_pair_hits < 0: + errors.append( + "'max_primer_pair_hits' must be greater than or equal to 0. " + f"Saw {max_primer_pair_hits}" + ) + if min_primer_pair_hits < 0: + errors.append( + "'min_primer_pair_hits' must be greater than or equal to 0. " + f"Saw {min_primer_pair_hits}" + ) + if three_prime_region_length < MINIMUM_THREE_PRIME_REGION_LENGTH: + errors.append( + "'three_prime_region_length' must be greater than or equal to " + f"{MINIMUM_THREE_PRIME_REGION_LENGTH}. Saw {three_prime_region_length}" + ) + if ( + max_mismatches_in_three_prime_region < 0 + or max_mismatches_in_three_prime_region > three_prime_region_length + ): + errors.append( + "'max_mismatches_in_three_prime_region' must be between 0 and " + f"'three_prime_region_length'={three_prime_region_length} inclusive. " + f"Saw {max_mismatches_in_three_prime_region}" + ) + if max_mismatches < 0: + errors.append( + f"'max_mismatches' must be greater than or equal to 0. Saw {max_mismatches}" + ) + if max_gap_opens < 0: + errors.append( + f"'max_gap_opens' must be greater than or equal to 0. Saw {max_gap_opens}" + ) + if max_gap_extends < -1: + errors.append( + "'max_gap_extends' must be -1 (for unlimited extensions up to 'max_mismatches'=" + f"{max_mismatches}) or greater than or equal to 0. Saw {max_gap_extends}" + ) + + if len(errors) > 0: + raise ValueError("\n".join(errors)) + self._primer_cache: dict[str, BwaResult] = {} self._primer_pair_cache: dict[PrimerPair, OffTargetResult] = {} self._bwa = BwaAlnInteractive( diff --git a/tests/offtarget/test_offtarget.py b/tests/offtarget/test_offtarget.py index a2f3a46..7cd3ae9 100644 --- a/tests/offtarget/test_offtarget.py +++ b/tests/offtarget/test_offtarget.py @@ -1,3 +1,4 @@ +import re from dataclasses import dataclass from pathlib import Path @@ -349,3 +350,52 @@ class CustomPrimer(Oligo): # 2. Return a list of the same type. # NB: we're ignoring the unused value error because we want to check the type hint filtered_primers: list[CustomPrimer] = detector.filter(primers) # noqa: F841 + + +# fmt: off +@pytest.mark.parametrize( + ( + "max_primer_hits,max_primer_pair_hits,min_primer_pair_hits,three_prime_region_length," + "max_mismatches_in_three_prime_region,max_mismatches,max_amplicon_size," + "max_gap_opens,max_gap_extends,expected_error" + ), + [ + (-1, 1, 1, 20, 0, 0, 1, 0, 0, "'max_primer_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501 + (1, -1, 1, 20, 0, 0, 1, 0, 0, "'max_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501 + (1, 1, -1, 20, 0, 0, 1, 0, 0, "'min_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501 + (1, 1, 1, 5, 0, 0, 1, 0, 0, "'three_prime_region_length' must be greater than or equal to 8. Saw 5"), # noqa: E501 + (1, 1, 1, 20, -1, 0, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw -1"), # noqa: E501 + (1, 1, 1, 20, 21, 0, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw 21"), # noqa: E501 + (1, 1, 1, 20, 0, -1, 1, 0, 0, "'max_mismatches' must be greater than or equal to 0. Saw -1"), # noqa: E501 + (1, 1, 1, 20, 0, 0, 0, 0, 0, "'max_amplicon_size' must be greater than 0. Saw 0"), + (1, 1, 1, 20, 0, 0, 1, -1, 0, "'max_gap_opens' must be greater than or equal to 0. Saw -1"), + (1, 1, 1, 20, 0, 5, 1, 0, -2, re.escape("'max_gap_extends' must be -1 (for unlimited extensions up to 'max_mismatches'=5) or greater than or equal to 0. Saw -2")), #noqa: E501 + ], +) +# fmt: on +def test_init( + ref_fasta: Path, + max_primer_hits: int, + max_primer_pair_hits: int, + min_primer_pair_hits: int, + three_prime_region_length: int, + max_mismatches_in_three_prime_region: int, + max_mismatches: int, + max_amplicon_size: int, + max_gap_opens: int, + max_gap_extends: int, + expected_error: str, +) -> None: + with pytest.raises(ValueError, match=expected_error): + OffTargetDetector( + ref=ref_fasta, + max_primer_hits=max_primer_hits, + max_primer_pair_hits=max_primer_pair_hits, + min_primer_pair_hits=min_primer_pair_hits, + three_prime_region_length=three_prime_region_length, + max_mismatches_in_three_prime_region=max_mismatches_in_three_prime_region, + max_mismatches=max_mismatches, + max_amplicon_size=max_amplicon_size, + max_gap_opens=max_gap_opens, + max_gap_extends=max_gap_extends, + )