Skip to content

Commit

Permalink
Merge pull request #2282 from merenlab/prodigal-segmentation-fault-fixy
Browse files Browse the repository at this point in the history
A solution for prodigal segmentation fault errors
  • Loading branch information
meren authored Jun 16, 2024
2 parents c957c6c + 8ee205c commit ae1ae6f
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 12 deletions.
13 changes: 13 additions & 0 deletions anvio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,19 @@ def TABULATE(table, header, numalign="right", max_width=0):
"frames in contigs by running a bacterial gene caller. Declaring this flag will by-pass that "
"process. If you prefer, you can later import your own gene calling results into the database."}
),
'prodigal-single-mode': (
['--prodigal-single-mode'],
{'default': False,
'action': 'store_true',
'help': "By default, anvi'o will use prodigal for gene calling (unless you skipped gene calling, or provided "
"anvi'o with external gene calls). One of the flags anvi'o includes in prodigal run is `-p meta`, which "
"optimizes prodigal's ability to identify genes in metagenomic assemblies. In some rare cases, for a "
"given set of contigs prodigal will yield a segmentation fault error due to one or more genes in your "
"collections will confuse the program when it is used with the `-p meta` flag. While anvi'o developers "
"are not quite sure under what circumstances this happens, we realized that removal of this flag often "
"solves this issue. If you are dealing with such cyrptic errors, the inclusion of `--skip-prodigal-meta-flag` "
"will instruct anvi'o to run prodigal without the `-meta` flag, and may resolve this issue for you."}
),
'remove-partial-hits': (
['--remove-partial-hits'],
{'default': False,
Expand Down
3 changes: 3 additions & 0 deletions anvio/dbops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4316,6 +4316,7 @@ def create(self, args):
ignore_internal_stop_codons = A('ignore_internal_stop_codons')
skip_predict_frame= A('skip_predict_frame')
prodigal_translation_table = A('prodigal_translation_table')
prodigal_single_mode = A('prodigal_single_mode')

if external_gene_calls_file_path:
filesnpaths.is_proper_external_gene_calls_file(external_gene_calls_file_path)
Expand Down Expand Up @@ -4516,6 +4517,8 @@ def create(self, args):
if external_gene_calls_file_path:
self.run.info('External gene calls file have AA sequences?', external_gene_calls_include_amino_acid_sequences, mc='green')
self.run.info('Proper frames will be predicted?', (not skip_predict_frame), mc='green')
else:
self.run.info('Is prodigal run in single mode?', ('YES' if prodigal_single_mode else 'NO'), mc='green')

self.run.info('Ignoring internal stop codons?', ignore_internal_stop_codons)
self.run.info('Splitting pays attention to gene calls?', (not skip_mindful_splitting))
Expand Down
18 changes: 11 additions & 7 deletions anvio/drivers/prodigal.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def __init__(self, args=None, progress=progress, run=run):
self.progress = progress
self.run = run
self.args = args

A = lambda x: (args.__dict__[x] if x in args.__dict__ else None) if args else None
self.prodigal_translation_table = A('prodigal_translation_table')
self.num_threads = A('num_threads')
self.prodigal_single_mode = A('prodigal_single_mode')

self.run.info('Num threads for gene calling', self.num_threads)

Expand Down Expand Up @@ -89,6 +89,7 @@ def __parser_1(self, defline):

return hit


def check_version(self):
"""checks the installed version of prodigal, sets the parser"""

Expand All @@ -105,6 +106,7 @@ def check_version(self):
self.installed_version = version_found
self.parser = self.available_parsers[version_found]


def process(self, fasta_file_path, output_dir):
"""Take the fasta file, run prodigal on it, and make sense of the output
Expand All @@ -116,16 +118,17 @@ def process(self, fasta_file_path, output_dir):
self.genes_in_contigs = os.path.join(output_dir, 'contigs.genes')
self.amino_acid_sequences_in_contigs = os.path.join(output_dir, 'contigs.amino_acid_sequences')

self.run.warning("Anvi'o will use 'prodigal' by Hyatt et al (doi:10.1186/1471-2105-11-119) to identify open "
"reading frames in your data. When you publish your findings, please do not forget to "
"properly credit their work.", lc='green', header="CITATION")

# Put some nice logging info.
self.run.warning('', header='Finding ORFs in contigs', lc='green')
self.run.warning('', header='Finding ORFs in contigs using prodigal', lc='green')
self.run.info('Procedure', 'single' if self.prodigal_single_mode else 'meta')
self.run.info('Genes', self.genes_in_contigs)
self.run.info('Amino acid sequences', self.amino_acid_sequences_in_contigs)
self.run.info('Log file', log_file_path)

self.run.warning("Anvi'o will use 'prodigal' by Hyatt et al (doi:10.1186/1471-2105-11-119) to identify open "
"reading frames in your data. When you publish your findings, please do not forget to "
"properly credit their work.", lc='green', header="CITATION")

self.progress.new('Processing')
self.progress.update(f"Identifying ORFs using {terminal.pluralize('thread', self.num_threads)}.")

Expand Down Expand Up @@ -153,7 +156,8 @@ def process(self, fasta_file_path, output_dir):
logger=terminal.Logger(progress=self.progress, run=self.run),
installed_version=self.installed_version,
parser=self.parser,
translation_table=self.prodigal_translation_table)
translation_table=self.prodigal_translation_table,
prodigal_single_mode=self.prodigal_single_mode)

prodigal_runner = ThreadedProdigalRunner(args)

Expand Down
21 changes: 19 additions & 2 deletions anvio/tables/genecalls.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ def run_gene_caller(self, gene_caller='prodigal'):

if not self.contigs_fasta:
self.contigs_fasta = filesnpaths.get_temp_file_path()
utils.export_sequences_from_contigs_db(self.contigs_db_path,
utils.export_sequences_from_contigs_db(self.db_path,
output_file_path=self.contigs_fasta,
run=self.run)
remove_fasta_after_processing = True
Expand All @@ -495,7 +495,24 @@ def run_gene_caller(self, gene_caller='prodigal'):

gene_caller = genecalling.GeneCaller(self.contigs_fasta, gene_caller=gene_caller, args=self.args, debug=self.debug)

gene_calls_dict, amino_acid_sequences = gene_caller.process()
try:
gene_calls_dict, amino_acid_sequences = gene_caller.process()
except Exception as e:
if 'prodigal' in e.e:
self.run.warning("There was a problem with your gene calling, and the error seems to be related to 'prodigal'. Please "
"find additional details below. It is difficult to determine what caused this error, but if you would "
"like to be certain, you can literally copy the command shown below into a single line, and run on "
"the same machine manually (or re-run the same command you run to get this error with the `--debug` flag "
"to keep all the original log files from profigal). If this error is due to a 'segmentation fault', "
"please consider including `--prodigal-single-mode` flag `anvi-gen-contigs-database` command. More "
"information `--prodigal-single-mode` is available in the help menu of `anvi-gen-contigs-database`.",
header="💀 PRODIGAL FAILED 💀")

# remove the unfinished contigs-db file
os.remove(self.db_path)

# show the user the actual error from down below
raise ConfigError(f"{e}")

if not self.debug and remove_fasta_after_processing:
os.remove(self.contigs_fasta)
Expand Down
15 changes: 12 additions & 3 deletions anvio/threadingops.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def __init__(self, args):
A = lambda x: args.__dict__[x] if x in args.__dict__ else None

required_args = ['input_file_path', 'collated_output_file_paths', 'number_of_splits', 'log_file_path',
'installed_version', 'parser']
'installed_version', 'parser', 'prodigal_single_mode']

# Check that the required arguments are present.
for arg in required_args:
Expand All @@ -370,6 +370,7 @@ def __init__(self, args):
log_file_path=A('log_file_path'),
logger=A('logger'))

self.prodigal_single_mode = A('prodigal_single_mode')
self.installed_version = A('installed_version')
self.parser = A('parser')

Expand All @@ -380,6 +381,7 @@ def __init__(self, args):

self.translation_table = translation_table


# Implement the abstract methods
#
#
Expand All @@ -401,6 +403,7 @@ def _split_input_file(self):

return State(input_file_splits=self.input_file_splits)


def _make_commands(self):
"""Make commands and store them in `self.commands`.
Expand Down Expand Up @@ -445,8 +448,14 @@ def _make_commands(self):
"you are reading this message, then please contact an anvi'o developer."
% str(self.translation_table))
else:
# Use 'meta' mode if no translation tables are given.
command.extend(['-p', 'meta'])
if self.prodigal_single_mode:
# the user explicitly requested to not use the `-p meta` flag to run
# prodigal (the default procedure is single, so prodigal will fall back
# to 'single' mode in this case)
pass
else:
# Use 'meta' mode if no translation tables are given.
command.extend(['-p', 'meta'])

self.commands.append(command)

Expand Down
1 change: 1 addition & 0 deletions bin/anvi-gen-contigs-database
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ if __name__ == '__main__':

groupD = parser.add_argument_group('GENES IN CONTIGS', 'Expert thingies.')
groupD.add_argument(*anvio.A('skip-gene-calling'), **anvio.K('skip-gene-calling'))
groupD.add_argument(*anvio.A('prodigal-single-mode'), **anvio.K('prodigal-single-mode'))
groupD.add_argument(*anvio.A('prodigal-translation-table'), **anvio.K('prodigal-translation-table'))
groupD.add_argument(*anvio.A('external-gene-calls'), **anvio.K('external-gene-calls'))
groupD.add_argument(*anvio.A('ignore-internal-stop-codons'), **anvio.K('ignore-internal-stop-codons'))
Expand Down

0 comments on commit ae1ae6f

Please sign in to comment.