diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index c2f9d4ea39..3026eaedaf 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -40,7 +40,7 @@ /** * Class for reading and querying BAM files. */ -class BAMFileReader extends SamReader.ReaderImplementation { +public class BAMFileReader extends SamReader.ReaderImplementation { // True if reading from a File rather than an InputStream private boolean mIsSeekable = false; @@ -869,25 +869,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) { } } - private CloseableIterator createIndexIterator(final QueryInterval[] intervals, - final boolean contained) { - - assertIntervalsOptimized(intervals); - - // Hit the index to determine the chunk boundaries for the required data. + /** + * Use the index to determine the chunk boundaries for the required intervals. + * @param intervals the intervals to restrict reads to + * @param fileIndex the BAM index to use + * @return file pointer pairs corresponding to chunk boundaries + */ + public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; - final BAMIndex fileIndex = getIndex(); for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } - final long[] filePointers; + final BAMFileSpan span; if (inputSpans.length > 0) { - filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray(); + span = BAMFileSpan.merge(inputSpans); } else { - filePointers = null; + span = null; } + return span; + } + + private CloseableIterator createIndexIterator(final QueryInterval[] intervals, + final boolean contained) { + + assertIntervalsOptimized(intervals); + + BAMFileSpan span = getFileSpan(intervals, getIndex()); + + // Create an iterator over the above chunk boundaries. + final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray()); + + // Add some preprocessing filters for edge-case reads that don't fit into this + // query type. + return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); + } + + /** + * Prepare to iterate through SAMRecords that match the given intervals. + * @param intervals the intervals to restrict reads to + * @param contained if true, return records that are strictly + * contained in the intervals, otherwise return records that overlap + * @param filePointers file pointer pairs corresponding to chunk boundaries for the + * intervals + */ + public CloseableIterator createIndexIterator(final QueryInterval[] intervals, + final boolean contained, + final long[] filePointers) { + + assertIntervalsOptimized(intervals); // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); diff --git a/src/main/java/htsjdk/samtools/BAMFileSpan.java b/src/main/java/htsjdk/samtools/BAMFileSpan.java index 193e44376f..485f69dcfa 100644 --- a/src/main/java/htsjdk/samtools/BAMFileSpan.java +++ b/src/main/java/htsjdk/samtools/BAMFileSpan.java @@ -115,15 +115,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { validateSorted(); final BAMFileSpan trimmedChunkList = new BAMFileSpan(); + final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart(); for(final Chunk chunkToTrim: chunks) { - if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) { - if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) { + if(chunkToTrim.getChunkEnd() > chunkStart) { + if(chunkToTrim.getChunkStart() >= chunkStart) { // This chunk from the list is completely beyond the start of the filtering chunk. trimmedChunkList.add(chunkToTrim.clone()); } else { // This chunk from the list partially overlaps the filtering chunk and must be trimmed. - trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd())); + trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd())); + } + } + } + return trimmedChunkList; + } + + /** + * Creates a new file span by removing all chunks after the given file span ends. + * If a chunk in the chunk list starts before and ends after the given + * chunk, the second portion of the chunk will be deleted. + * @param fileSpan The filespan after which to eliminate. + * @return A new BAMFileSpan which contains the portion of the chunk list before the + * given chunk. + */ + public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) { + if(fileSpan == null) + return clone(); + + if(!(fileSpan instanceof BAMFileSpan)) + throw new SAMException("Unable to compare "); + + final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan; + + if(bamFileSpan.isEmpty()) + return clone(); + + validateSorted(); + + final BAMFileSpan trimmedChunkList = new BAMFileSpan(); + final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); + for(final Chunk chunkToTrim: chunks) { + if(chunkToTrim.getChunkStart() < chunkEnd) { + if(chunkToTrim.getChunkEnd() <= chunkEnd) { + // This chunk from the list is completely before the end of the filtering chunk. + trimmedChunkList.add(chunkToTrim.clone()); + } + else { + // This chunk from the list partially overlaps the filtering chunk and must be trimmed. + trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd)); } } } diff --git a/src/main/java/htsjdk/samtools/SamReader.java b/src/main/java/htsjdk/samtools/SamReader.java index 2f1b2f9dd2..0c551a0459 100644 --- a/src/main/java/htsjdk/samtools/SamReader.java +++ b/src/main/java/htsjdk/samtools/SamReader.java @@ -381,7 +381,11 @@ public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p, final Sa this.resource = resource; } - PrimitiveSamReader underlyingReader() { + /** + * Access the underlying {@link PrimitiveSamReader} used by this adapter. + * @return the {@link PrimitiveSamReader} used by this adapter. + */ + public PrimitiveSamReader underlyingReader() { return p; } diff --git a/src/test/java/htsjdk/samtools/BAMFileSpanTest.java b/src/test/java/htsjdk/samtools/BAMFileSpanTest.java new file mode 100644 index 0000000000..4fc39b2941 --- /dev/null +++ b/src/test/java/htsjdk/samtools/BAMFileSpanTest.java @@ -0,0 +1,70 @@ +package htsjdk.samtools; + +import java.util.Arrays; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class BAMFileSpanTest { + @Test(dataProvider = "testRemoveContentsBeforeProvider") + public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff, + BAMFileSpan expectedSpan) { + // only start value in cutoff is used + Assert.assertEquals( + ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(), + expectedSpan.getChunks()); + } + + @DataProvider(name = "testRemoveContentsBeforeProvider") + private Object[][] testRemoveContentsBeforeProvider() { + return new Object[][] { + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() }, + }; + } + + @Test(dataProvider = "testRemoveContentsAfterProvider") + public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff, + BAMFileSpan expectedSpan) { + // only end value in cutoff is used + Assert.assertEquals( + ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(), + expectedSpan.getChunks()); + } + + @DataProvider(name = "testRemoveContentsAfterProvider") + private Object[][] testRemoveContentsAfterProvider() { + return new Object[][] { + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, + }; + } + + private BAMFileSpan span(Chunk... chunks) { + return new BAMFileSpan(Arrays.asList(chunks)); + } + + private Chunk chunk(long start, long end) { + return new Chunk(start, end); + } +}