Skip to content

Commit

Permalink
Allow passing either a scoring matrix name or object to Aligner con…
Browse files Browse the repository at this point in the history
…structor
  • Loading branch information
althonos committed May 6, 2024
1 parent 6e92f9f commit 33a779d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyfamsa/_famsa.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Aligner:
n_refinements: int = 100,
keep_duplicates: bool = False,
refine: typing.Optional[bool] = None,
scoring_matrix: ScoringMatrix = MIQS,
scoring_matrix: typing.Union[ScoringMatrix, str, None] = None,
) -> None: ...
def align(self, sequences: typing.Iterable[Sequence]) -> Alignment: ...
def build_tree(self, sequences: typing.Iterable[Sequence]) -> GuideTree: ...
Expand Down
23 changes: 16 additions & 7 deletions pyfamsa/_famsa.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ cdef class Aligner:
int n_refinements=100,
bool keep_duplicates=False,
object refine=None,
ScoringMatrix scoring_matrix not None=MIQS,
object scoring_matrix=None,
):
"""__init__(self, *, threads=0, guide_tree="sl", tree_heuristic=None, medoid_threshold=0, n_refinements=100, keep_duplicates=False, refine=None)\n--
Expand Down Expand Up @@ -350,9 +350,10 @@ cdef class Aligner:
refine (`bool` or `None`): Set to `True` to force refinement,
`False` to disable refinement, or leave as `None` to disable
refinement automatically for sets of more than 1000 sequences.
scoring_matrix (`~scoring_matrices.ScoringMatrix`): The scoring
matrix to use for scoring alignments. By default, the *MIQS*
matrix by Yamada & Tomii (2014) is used.
scoring_matrix (`~scoring_matrices.ScoringMatrix` or `str`): The
scoring matrix to use for scoring alignments. By default, the
*MIQS* matrix by Yamada & Tomii (2014) is used like in the
original FAMSA implementation.
"""
self._params.keepDuplicates = keep_duplicates
Expand Down Expand Up @@ -403,9 +404,17 @@ cdef class Aligner:
else:
raise ValueError("`n_refinements` argument must be positive")

if scoring_matrix.alphabet != FAMSA_ALPHABET:
raise ValueError(f"invalid scoring matrix alphabet: expected {FAMSA_ALPHABET!r}, got {scoring_matrix.alphabet!r}")
self.scoring_matrix = scoring_matrix
if scoring_matrix is None:
self.scoring_matrix = MIQS
elif isinstance(scoring_matrix, str):
self.scoring_matrix = ScoringMatrix.from_name(scoring_matrix).shuffle(FAMSA_ALPHABET)
elif isinstance(scoring_matrix, ScoringMatrix):
if scoring_matrix.alphabet != FAMSA_ALPHABET:
raise ValueError(f"invalid scoring matrix alphabet: expected {FAMSA_ALPHABET!r}, got {scoring_matrix.alphabet!r}")
self.scoring_matrix = scoring_matrix
else:
ty = type(scoring_matrix).__name__
raise TypeError(f"expected str or ScoringMatrix, found {ty}")

# --- Methods ------------------------------------------------------------

Expand Down
18 changes: 18 additions & 0 deletions pyfamsa/tests/test_aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import textwrap
import tempfile

from scoring_matrices import ScoringMatrix

from .. import Aligner, Sequence
from . import fasta, data

Expand Down Expand Up @@ -44,6 +46,22 @@ def test_adeno_fiber_upgma(self):
self._test_famsa("adeno_fiber", "upgma", None)


class TestAligner(unittest.TestCase):

def test_init_scoring_matrix_str(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
aligner = Aligner(scoring_matrix="BLOSUM62")
self.assertEqual(aligner.scoring_matrix, matrix)

def test_init_scoring_matrix_object(self):
matrix = ScoringMatrix.from_name("BLOSUM62")
aligner = Aligner(scoring_matrix=matrix)
self.assertEqual(aligner.scoring_matrix, matrix)

def test_init_scoring_matrix_error(self):
self.assertRaises(TypeError, Aligner, scoring_matrix=1)


class TestAlign(unittest.TestCase, _Test):

def test_no_sequence(self):
Expand Down

0 comments on commit 33a779d

Please sign in to comment.