diff --git a/CHANGELOG b/CHANGELOG index 5348690..98e1ece 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +v0.2.1: + * translate: add check_start option v0.2.0: * io: add option to write files automatically into an archive * io.gff: allow to store comment lines while reading diff --git a/sugar/__init__.py b/sugar/__init__.py index 717b301..a57972a 100644 --- a/sugar/__init__.py +++ b/sugar/__init__.py @@ -52,7 +52,7 @@ The test suite can be run with ``sugar test``. """ -__version__ = '0.2.1-dev' +__version__ = '0.2.1' from sugar.core.meta import Attr, Meta from sugar.core.fts import Feature, FeatureList, Location diff --git a/sugar/core/translate.py b/sugar/core/translate.py index c76fa6f..407d167 100644 --- a/sugar/core/translate.py +++ b/sugar/core/translate.py @@ -4,65 +4,69 @@ import warnings -def translate(seq, complete=False, gap='-', gap_after=2, - astop='X', warn=False, check_stop=False, tt=1): +def translate(seq, *, complete=False, check_start=None, check_stop=False, + warn=False, astop='X', gap='-', gap_after=2, tt=1): """ Translate a string or `.BioSeq` object into an amino acid string - :param bool complete: If set to ``True`` ignores stop codons + :param bool complete: If set to ``True`` ignores stop codons, + otherwise the translation is stopped before the first stop codon + :param bool check_start: Check that the first codon is a start codon, + default is False for ``complete=False`` otherwise True + :param bool check_stop: Check that the sequence ends with the first stop + codon, default is False + :param bool warn: Warn if the first codon might not be a start codon + (if ``check_start=True``) and warn for amigious stop codons, + default is False + :param str astop: Symbol for ambigious stop codons :param str gap: gap character, default ``'-'``, set to ``None`` to raise an error for non nucleotide characters :param int gap_after: A single gap in the amino acis string is written after the first ``gap_after`` gaps in the nucleotide sequence and afterwards after each third gap, - defaults to 2 - :param str astop: Symbol for ambigious stop codons - :param bool warn: Warn if start codon might not be a start codon and - warn for amigious stop codons for - ``complete=False`` - :param bool check_stop: Check that last codon is a stop codon for - ``complete=False`` + default is 2 + :param int tt: the number of the translation table, default is 1 """ gc = gcode(tt) aas = [] - codon = '' ngap = 0 - check_start_codon = not complete - for nb in str(seq).replace('U', 'T'): - if nb == gap: + check_start = check_start if check_start is not None else not complete + codon = '' + for i, nt in enumerate(str(seq).replace('U', 'T')): + if nt == gap: ngap += 1 else: - codon = codon + nb + codon = codon + nt if gap and gap_after is not None and ngap == gap_after: aas.append(gap) ngap -= 3 if len(codon) == 3: - if check_start_codon: + if check_start: + check_start = False if codon not in gc.starts and codon not in gc.astarts: msg = (f'Codon {codon} is not a start codon {gc.starts} ' f'in genetic code #{gc.id}') raise ValueError(msg) - if warn and codon not in gc.starts: + if codon not in gc.starts: msg = f'Codon {codon} possibly is not a start codon.' warnings.warn(msg) - check_start_codon = False try: aa = gc.tt[codon] except (KeyError): aa = 'X' - if not complete and codon in gc.stops: - break if codon in gc.astops: aa = astop - if warn and not complete: - msg = f'Codon {codon} might be a stop codon.' - warnings.warn(msg) + if warn: + warnings.warn(f'Codon {codon} might be a stop codon.') + if codon in gc.stops: + if check_stop and i < len(nt) - 1: + msg = 'First stop codon is not at the end of the sequence.' + raise ValueError(msg) + if not complete: + break aas.append(aa) codon = '' - else: - if not complete and check_stop: - msg = (f'Last codon is not a stop codon {gc.stops} ' - f'in genetic code #{gc.id}') - raise ValueError(msg) - + if check_stop and aa != 'X': + msg = f'Last codon is not a stop codon {gc.stops} in genetic code #{gc.id}' + raise ValueError(msg) return ''.join(aas) diff --git a/sugar/tests/test_core_translate.py b/sugar/tests/test_core_translate.py index 20549cb..d35f110 100644 --- a/sugar/tests/test_core_translate.py +++ b/sugar/tests/test_core_translate.py @@ -9,7 +9,7 @@ def test_translate(): s = 'CCC-AT-GAT-NCC--CCCCT---ANT-A--GGGN' aas = 'P-MX-PP-X-*G' - assert translate(s, complete=True, warn=True) == aas + assert translate(s, complete=True) == aas with pytest.warns(UserWarning, match='might be a stop'): assert translate(s[3:-3], warn=True) == aas[1:-2] assert translate(s[3:-3], warn=False) == aas[1:-2]