diff --git a/errors.py b/errors.py index f9c81aed..4d369652 100644 --- a/errors.py +++ b/errors.py @@ -4,4 +4,8 @@ class QCError(RuntimeError): '''Indicates a failure at a QC step.''' def __init__(self, reason): - super(QCError, self).__init__(reason) \ No newline at end of file + super(QCError, self).__init__(reason) + +class InvalidBamHeaderError(ValueError): + '''Indicates a malformed or otherwise unusable bam file header''' + pass \ No newline at end of file diff --git a/read_utils.py b/read_utils.py index 7fc3cd86..5d5d2f19 100755 --- a/read_utils.py +++ b/read_utils.py @@ -1388,6 +1388,9 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None, ref_indexed = util.file.mkstempfname('.reference.fasta') shutil.copyfile(refFasta, ref_indexed) + if samtools.isEmpty(inBam): + log.warning("The input bam file appears to have zero reads: %s", inBam) + mm2.align_bam(inBam, ref_indexed, bam_aligned) if filterReadsAfterAlignment: @@ -1401,6 +1404,10 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None, shutil.move(bam_aligned, bam_filtered) if outStats is not None: + # index the final bam before calling idxstats + # but only if it is a bam or cram file (sam cannot be indexed) + if (bam_filtered.endswith(".bam") or bam_filtered.endswith(".cram")): + samtools.index(bam_filtered) samtools.idxstats(bam_filtered, outStats) if outBam is None: diff --git a/tools/bwa.py b/tools/bwa.py index 09a9d180..799ace8d 100644 --- a/tools/bwa.py +++ b/tools/bwa.py @@ -16,6 +16,7 @@ import tools.picard import util.file import util.misc +from errors import * TOOL_NAME = 'bwa' diff --git a/tools/minimap2.py b/tools/minimap2.py index 0162caa3..e20f0e63 100644 --- a/tools/minimap2.py +++ b/tools/minimap2.py @@ -14,6 +14,7 @@ import tools.picard import util.file import util.misc +from errors import * TOOL_NAME = 'minimap2' @@ -80,6 +81,7 @@ def align_bam(self, inBam, refDb, outBam, options=None, log.warning("No alignment output for RG %s in file %s against %s", rg, inBam, refDb) if len(align_bams)==0: + log.warning("All read groups in file %s appear to be empty.", inBam) with util.file.tempfname('.empty.sam') as empty_sam: samtools.dumpHeader(inBam, empty_sam) samtools.sort(empty_sam, outBam) @@ -92,6 +94,7 @@ def align_bam(self, inBam, refDb, outBam, options=None, picardOptions=picardOptions, JVMmemory=JVMmemory ) + for bam in align_bams: os.unlink(bam) @@ -181,8 +184,10 @@ def align_one_rg(self, inBam, refDb, outBam, rgid=None, preset=None, options=Non # perform actual alignment if samtools.isEmpty(one_rg_inBam): + log.warning("Input file %s appears to lack reads for RG '%s'", inBam, rgid) # minimap doesn't like empty inputs, so copy empty bam through - samtools.sort(one_rg_inBam, outBam) + # samtools.sort(one_rg_inBam, outBam) + self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads) else: self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads) diff --git a/tools/novoalign.py b/tools/novoalign.py index 60e91571..80d7ec9e 100644 --- a/tools/novoalign.py +++ b/tools/novoalign.py @@ -7,12 +7,6 @@ either in $PATH or $NOVOALIGN_PATH. ''' -import tools -import tools.picard -import tools.samtools -import util.file -import util.misc - import logging import os import os.path @@ -21,11 +15,16 @@ import stat import sys +import tools +import tools.picard +import tools.samtools +import util.file +import util.misc +from errors import * + _log = logging.getLogger(__name__) TOOL_NAME = "novoalign" -#TOOL_VERSION = "3.09.04" - class NovoalignTool(tools.Tool): @@ -217,7 +216,3 @@ def index_fasta(self, refFasta, k=None, s=None): os.chmod(outfname, mode) except (IOError, OSError): _log.warning('could not chmod "%s", this is likely OK', refFasta) - - -class InvalidBamHeaderError(ValueError): - pass