diff --git a/pyfamsa/_famsa.pxd b/pyfamsa/_famsa.pxd index 40b1626..8f47476 100644 --- a/pyfamsa/_famsa.pxd +++ b/pyfamsa/_famsa.pxd @@ -42,6 +42,8 @@ cdef class Aligner: cdef CParams _params cdef readonly ScoringMatrix scoring_matrix + cdef int _copy_matrix(self, CFAMSA* famsa) except 1 nogil + cpdef Alignment align(self, object sequences) cpdef GuideTree build_tree(self, object sequences) diff --git a/pyfamsa/_famsa.pyx b/pyfamsa/_famsa.pyx index 6aa061b..9921f16 100644 --- a/pyfamsa/_famsa.pyx +++ b/pyfamsa/_famsa.pyx @@ -318,7 +318,7 @@ cdef class Aligner: with ``tree_heuristic``. n_refinements (`int`): The number of refinement iterations to run. - keep_duplicates (`bool`): Set to `True` to avoid discarding + keep_duplicates (`bool`): Set to `True` to avoid discarding duplicate sequences before building trees or alignments. refine (`bool` or `None`): Set to `True` to force refinement, `False` to disable refinement, or leave as `None` to disable @@ -392,6 +392,16 @@ cdef class Aligner: # --- Methods ------------------------------------------------------------ + cdef int _copy_matrix(self, CFAMSA* famsa) except 1 nogil: + cdef size_t i + cdef size_t j + cdef const float** matrix = self.scoring_matrix.matrix() + for i in range(NO_AMINOACIDS): + famsa.score_vector[i] = roundf(cost_cast_factor * matrix[i][i]) + for j in range(NO_AMINOACIDS): + famsa.score_matrix[i][j] = roundf(cost_cast_factor * matrix[i][j]) + return 0 + cpdef Alignment align(self, object sequences): """align(self, sequences)\n-- @@ -417,11 +427,7 @@ cdef class Aligner: # copy score matrix weights with nogil: - matrix = self.scoring_matrix.matrix() - for i in range(NO_AMINOACIDS): - famsa.score_vector[i] = roundf(cost_cast_factor * matrix[i][i]) - for j in range(NO_AMINOACIDS): - famsa.score_matrix[i][j] = roundf(cost_cast_factor * matrix[i][j]) + self._copy_matrix(famsa) # record the aligner on the resulting alignment alignment._famsa = shared_ptr[CFAMSA](famsa) @@ -466,11 +472,7 @@ cdef class Aligner: # copy score matrix weights with nogil: - matrix = self.scoring_matrix.matrix() - for i in range(NO_AMINOACIDS): - famsa.score_vector[i] = roundf(cost_cast_factor * matrix[i][i]) - for j in range(NO_AMINOACIDS): - famsa.score_matrix[i][j] = roundf(cost_cast_factor * matrix[i][j]) + self._copy_matrix(famsa) # copy the aligner input and record original order for i, sequence in enumerate(sequences): @@ -485,7 +487,7 @@ cdef class Aligner: og2map.push_back(i) ptrvec.push_back(&seqvec.data()[i]) tree._names.push_back(move(CSequence(seqvec[i].id, string(), i, NULL))) - + # remove duplicates and record sequence order if not self._params.keepDuplicates: famsa.removeDuplicates(ptrvec, og2map)