From dafd8e35963218bef4e916132a681c263a0be7c8 Mon Sep 17 00:00:00 2001 From: Chris Tomkins-Tinch Date: Wed, 6 Mar 2024 14:36:52 -0500 Subject: [PATCH] add unit test for TestBwamemIdxstats (#93) add unit test for TestBwamemIdxstats --- read_utils.py | 20 +++++++++++++------- test/unit/test_read_utils.py | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/read_utils.py b/read_utils.py index 788b3682..5c4db47d 100755 --- a/read_utils.py +++ b/read_utils.py @@ -1370,10 +1370,11 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None, assert outBam or outStats, "Either outBam or outStats must be specified" + bam_aligned = util.file.mkstempfname('.aligned.bam') if outBam is None: - bam_aligned = mkstempfname('.aligned.bam') + bam_filtered = mkstempfname('.filtered.bam') else: - bam_aligned = outBam + bam_filtered = outBam samtools = tools.samtools.SamtoolsTool() mm2 = tools.minimap2.Minimap2() @@ -1382,14 +1383,15 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None, shutil.copyfile(refFasta, ref_indexed) mm2.align_bam(inBam, ref_indexed, bam_aligned) - + if filterReadsAfterAlignment: samtools.filter_to_proper_primary_mapped_reads(bam_aligned, bam_filtered, require_pairs_to_be_proper=not doNotRequirePairsToBeProper, reject_singletons=not keepSingletons) + os.unlink(bam_aligned) else: - bam_filtered = bam_aligned + shutil.move(bam_aligned, bam_filtered) if outStats is not None: samtools.idxstats(bam_filtered, outStats) @@ -1397,6 +1399,8 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None, if outBam is None: os.unlink(bam_filtered) + + def parser_minimap2_idxstats(parser=argparse.ArgumentParser()): parser.add_argument('inBam', help='Input unaligned reads, BAM format.') parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard and Novoalign.') @@ -1435,10 +1439,11 @@ def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None, assert outBam or outStats, "Either outBam or outStats must be specified" + bam_aligned = util.file.mkstempfname('.aligned.bam') if outBam is None: - bam_aligned = mkstempfname('.aligned.bam') + bam_filtered = mkstempfname('.filtered.bam') else: - bam_aligned = outBam + bam_filtered = outBam samtools = tools.samtools.SamtoolsTool() bwa = tools.bwa.Bwa() @@ -1456,8 +1461,9 @@ def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None, bam_filtered, require_pairs_to_be_proper=not doNotRequirePairsToBeProper, reject_singletons=not keepSingletons) + os.unlink(bam_aligned) else: - bam_filtered = bam_aligned + shutil.move(bam_aligned, bam_filtered) if outStats is not None: samtools.idxstats(bam_filtered, outStats) diff --git a/test/unit/test_read_utils.py b/test/unit/test_read_utils.py index dedf49e1..7b402447 100644 --- a/test/unit/test_read_utils.py +++ b/test/unit/test_read_utils.py @@ -48,6 +48,25 @@ def test_bwamem_idxstats(self): self.assertEqual(actual_count, self.samtools.count(outBam, opts=['-F', '4'])) self.assertGreater(actual_count, 18000) + def test_bwamem_idxstats_with_filtering(self): + inBam = os.path.join(util.file.get_test_input_path(), 'G5012.3.testreads.bam') + outBam = util.file.mkstempfname('.bam', directory=self.tempDir) + outStats = util.file.mkstempfname('.stats.txt', directory=self.tempDir) + read_utils.bwamem_idxstats(inBam, self.ebolaRef, outBam, outStats, filterReadsAfterAlignment=True) + with open(outStats, 'rt') as inf: + actual_count = int(inf.readline().strip().split('\t')[2]) + self.assertEqual(actual_count, self.samtools.count(outBam, opts=['-F', '4'])) + self.assertLess(actual_count, 18000) + + outBamNoFiltering = util.file.mkstempfname('.bam', directory=self.tempDir) + outStatsNoFiltering = util.file.mkstempfname('.stats.txt', directory=self.tempDir) + read_utils.bwamem_idxstats(inBam, self.ebolaRef, outBamNoFiltering, outStatsNoFiltering, filterReadsAfterAlignment=False) + with open(outStatsNoFiltering, 'rt') as inf: + count_without_filtering = int(inf.readline().strip().split('\t')[2]) + + # the filtered count should be less than the count without filtering + self.assertLess(actual_count, count_without_filtering) + def test_bwamem_idxstats_no_bam_output(self): inBam = os.path.join(util.file.get_test_input_path(), 'G5012.3.testreads.bam') outStats = util.file.mkstempfname('.stats.txt', directory=self.tempDir)