Skip to content

Commit

Permalink
translate: add check_start option and refactor, v.0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
trichter committed Jun 20, 2024
1 parent 9fc6cb3 commit 4c3d574
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 31 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
v0.2.1:
* translate: add check_start option
v0.2.0:
* io: add option to write files automatically into an archive
* io.gff: allow to store comment lines while reading
Expand Down
2 changes: 1 addition & 1 deletion sugar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
The test suite can be run with ``sugar test``.
"""

__version__ = '0.2.1-dev'
__version__ = '0.2.1'

from sugar.core.meta import Attr, Meta
from sugar.core.fts import Feature, FeatureList, Location
Expand Down
62 changes: 33 additions & 29 deletions sugar/core/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,65 +4,69 @@
import warnings


def translate(seq, complete=False, gap='-', gap_after=2,
astop='X', warn=False, check_stop=False, tt=1):
def translate(seq, *, complete=False, check_start=None, check_stop=False,
warn=False, astop='X', gap='-', gap_after=2, tt=1):
"""
Translate a string or `.BioSeq` object into an amino acid string
:param bool complete: If set to ``True`` ignores stop codons
:param bool complete: If set to ``True`` ignores stop codons,
otherwise the translation is stopped before the first stop codon
:param bool check_start: Check that the first codon is a start codon,
default is False for ``complete=False`` otherwise True
:param bool check_stop: Check that the sequence ends with the first stop
codon, default is False
:param bool warn: Warn if the first codon might not be a start codon
(if ``check_start=True``) and warn for amigious stop codons,
default is False
:param str astop: Symbol for ambigious stop codons
:param str gap: gap character, default ``'-'``, set to ``None``
to raise an error for non nucleotide characters
:param int gap_after: A single gap in the amino acis string is
written after the first ``gap_after`` gaps in the
nucleotide sequence and afterwards after each third gap,
defaults to 2
:param str astop: Symbol for ambigious stop codons
:param bool warn: Warn if start codon might not be a start codon and
warn for amigious stop codons for
``complete=False``
:param bool check_stop: Check that last codon is a stop codon for
``complete=False``
default is 2
:param int tt: the number of the translation table, default is 1
"""
gc = gcode(tt)
aas = []
codon = ''
ngap = 0
check_start_codon = not complete
for nb in str(seq).replace('U', 'T'):
if nb == gap:
check_start = check_start if check_start is not None else not complete
codon = ''
for i, nt in enumerate(str(seq).replace('U', 'T')):
if nt == gap:
ngap += 1
else:
codon = codon + nb
codon = codon + nt
if gap and gap_after is not None and ngap == gap_after:
aas.append(gap)
ngap -= 3
if len(codon) == 3:
if check_start_codon:
if check_start:
check_start = False
if codon not in gc.starts and codon not in gc.astarts:
msg = (f'Codon {codon} is not a start codon {gc.starts} '
f'in genetic code #{gc.id}')
raise ValueError(msg)
if warn and codon not in gc.starts:
if codon not in gc.starts:
msg = f'Codon {codon} possibly is not a start codon.'
warnings.warn(msg)
check_start_codon = False
try:
aa = gc.tt[codon]
except (KeyError):
aa = 'X'
if not complete and codon in gc.stops:
break
if codon in gc.astops:
aa = astop
if warn and not complete:
msg = f'Codon {codon} might be a stop codon.'
warnings.warn(msg)
if warn:
warnings.warn(f'Codon {codon} might be a stop codon.')
if codon in gc.stops:
if check_stop and i < len(nt) - 1:
msg = 'First stop codon is not at the end of the sequence.'
raise ValueError(msg)
if not complete:
break
aas.append(aa)
codon = ''
else:
if not complete and check_stop:
msg = (f'Last codon is not a stop codon {gc.stops} '
f'in genetic code #{gc.id}')
raise ValueError(msg)

if check_stop and aa != 'X':
msg = f'Last codon is not a stop codon {gc.stops} in genetic code #{gc.id}'
raise ValueError(msg)
return ''.join(aas)
2 changes: 1 addition & 1 deletion sugar/tests/test_core_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_translate():
s = 'CCC-AT-GAT-NCC--CCCCT---ANT-A--GGGN'
aas = 'P-MX-PP-X-*G'

assert translate(s, complete=True, warn=True) == aas
assert translate(s, complete=True) == aas
with pytest.warns(UserWarning, match='might be a stop'):
assert translate(s[3:-3], warn=True) == aas[1:-2]
assert translate(s[3:-3], warn=False) == aas[1:-2]
Expand Down

0 comments on commit 4c3d574

Please sign in to comment.