Skip to content

Commit

Permalink
add unit test for TestBwamemIdxstats (#93)
Browse files Browse the repository at this point in the history
add unit test for TestBwamemIdxstats
  • Loading branch information
tomkinsc committed Mar 6, 2024
1 parent fb0ebd2 commit dafd8e3
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 7 deletions.
20 changes: 13 additions & 7 deletions read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,10 +1370,11 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,

assert outBam or outStats, "Either outBam or outStats must be specified"

bam_aligned = util.file.mkstempfname('.aligned.bam')
if outBam is None:
bam_aligned = mkstempfname('.aligned.bam')
bam_filtered = mkstempfname('.filtered.bam')
else:
bam_aligned = outBam
bam_filtered = outBam

samtools = tools.samtools.SamtoolsTool()
mm2 = tools.minimap2.Minimap2()
Expand All @@ -1382,21 +1383,24 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,
shutil.copyfile(refFasta, ref_indexed)

mm2.align_bam(inBam, ref_indexed, bam_aligned)

if filterReadsAfterAlignment:
samtools.filter_to_proper_primary_mapped_reads(bam_aligned,
bam_filtered,
require_pairs_to_be_proper=not doNotRequirePairsToBeProper,
reject_singletons=not keepSingletons)
os.unlink(bam_aligned)
else:
bam_filtered = bam_aligned
shutil.move(bam_aligned, bam_filtered)

if outStats is not None:
samtools.idxstats(bam_filtered, outStats)

if outBam is None:
os.unlink(bam_filtered)



def parser_minimap2_idxstats(parser=argparse.ArgumentParser()):
parser.add_argument('inBam', help='Input unaligned reads, BAM format.')
parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Picard and Novoalign.')
Expand Down Expand Up @@ -1435,10 +1439,11 @@ def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None,

assert outBam or outStats, "Either outBam or outStats must be specified"

bam_aligned = util.file.mkstempfname('.aligned.bam')
if outBam is None:
bam_aligned = mkstempfname('.aligned.bam')
bam_filtered = mkstempfname('.filtered.bam')
else:
bam_aligned = outBam
bam_filtered = outBam

samtools = tools.samtools.SamtoolsTool()
bwa = tools.bwa.Bwa()
Expand All @@ -1456,8 +1461,9 @@ def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None,
bam_filtered,
require_pairs_to_be_proper=not doNotRequirePairsToBeProper,
reject_singletons=not keepSingletons)
os.unlink(bam_aligned)
else:
bam_filtered = bam_aligned
shutil.move(bam_aligned, bam_filtered)

if outStats is not None:
samtools.idxstats(bam_filtered, outStats)
Expand Down
19 changes: 19 additions & 0 deletions test/unit/test_read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,25 @@ def test_bwamem_idxstats(self):
self.assertEqual(actual_count, self.samtools.count(outBam, opts=['-F', '4']))
self.assertGreater(actual_count, 18000)

def test_bwamem_idxstats_with_filtering(self):
inBam = os.path.join(util.file.get_test_input_path(), 'G5012.3.testreads.bam')
outBam = util.file.mkstempfname('.bam', directory=self.tempDir)
outStats = util.file.mkstempfname('.stats.txt', directory=self.tempDir)
read_utils.bwamem_idxstats(inBam, self.ebolaRef, outBam, outStats, filterReadsAfterAlignment=True)
with open(outStats, 'rt') as inf:
actual_count = int(inf.readline().strip().split('\t')[2])
self.assertEqual(actual_count, self.samtools.count(outBam, opts=['-F', '4']))
self.assertLess(actual_count, 18000)

outBamNoFiltering = util.file.mkstempfname('.bam', directory=self.tempDir)
outStatsNoFiltering = util.file.mkstempfname('.stats.txt', directory=self.tempDir)
read_utils.bwamem_idxstats(inBam, self.ebolaRef, outBamNoFiltering, outStatsNoFiltering, filterReadsAfterAlignment=False)
with open(outStatsNoFiltering, 'rt') as inf:
count_without_filtering = int(inf.readline().strip().split('\t')[2])

# the filtered count should be less than the count without filtering
self.assertLess(actual_count, count_without_filtering)

def test_bwamem_idxstats_no_bam_output(self):
inBam = os.path.join(util.file.get_test_input_path(), 'G5012.3.testreads.bam')
outStats = util.file.mkstempfname('.stats.txt', directory=self.tempDir)
Expand Down

0 comments on commit dafd8e3

Please sign in to comment.