Skip to content

Commit

Permalink
Make BAMFileReader and some related classes public, and expose (#786)
Browse files Browse the repository at this point in the history
methods for iterating over a part of a BAM file (needed for Hadoop-BAM,
which processes BAM files in parallel).

Also, add BAMFileSpan#removeContentsAfter method to mirror
removeContentsBefore.
  • Loading branch information
tomwhite authored and droazen committed Feb 17, 2017
1 parent 8b9d5d5 commit 55bf01b
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 15 deletions.
53 changes: 42 additions & 11 deletions src/main/java/htsjdk/samtools/BAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
/**
* Class for reading and querying BAM files.
*/
class BAMFileReader extends SamReader.ReaderImplementation {
public class BAMFileReader extends SamReader.ReaderImplementation {
// True if reading from a File rather than an InputStream
private boolean mIsSeekable = false;

Expand Down Expand Up @@ -869,25 +869,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) {
}
}

private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
final boolean contained) {

assertIntervalsOptimized(intervals);

// Hit the index to determine the chunk boundaries for the required data.
/**
* Use the index to determine the chunk boundaries for the required intervals.
* @param intervals the intervals to restrict reads to
* @param fileIndex the BAM index to use
* @return file pointer pairs corresponding to chunk boundaries
*/
public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) {
final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length];
final BAMIndex fileIndex = getIndex();
for (int i = 0; i < intervals.length; ++i) {
final QueryInterval interval = intervals[i];
final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
inputSpans[i] = span;
}
final long[] filePointers;
final BAMFileSpan span;
if (inputSpans.length > 0) {
filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray();
span = BAMFileSpan.merge(inputSpans);
} else {
filePointers = null;
span = null;
}
return span;
}

private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
final boolean contained) {

assertIntervalsOptimized(intervals);

BAMFileSpan span = getFileSpan(intervals, getIndex());

// Create an iterator over the above chunk boundaries.
final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray());

// Add some preprocessing filters for edge-case reads that don't fit into this
// query type.
return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained));
}

/**
* Prepare to iterate through SAMRecords that match the given intervals.
* @param intervals the intervals to restrict reads to
* @param contained if <code>true</code>, return records that are strictly
* contained in the intervals, otherwise return records that overlap
* @param filePointers file pointer pairs corresponding to chunk boundaries for the
* intervals
*/
public CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
final boolean contained,
final long[] filePointers) {

assertIntervalsOptimized(intervals);

// Create an iterator over the above chunk boundaries.
final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
Expand Down
46 changes: 43 additions & 3 deletions src/main/java/htsjdk/samtools/BAMFileSpan.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
validateSorted();

final BAMFileSpan trimmedChunkList = new BAMFileSpan();
final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart();
for(final Chunk chunkToTrim: chunks) {
if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
if(chunkToTrim.getChunkEnd() > chunkStart) {
if(chunkToTrim.getChunkStart() >= chunkStart) {
// This chunk from the list is completely beyond the start of the filtering chunk.
trimmedChunkList.add(chunkToTrim.clone());
}
else {
// This chunk from the list partially overlaps the filtering chunk and must be trimmed.
trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd()));
}
}
}
return trimmedChunkList;
}

/**
* Creates a new file span by removing all chunks after the given file span ends.
* If a chunk in the chunk list starts before and ends after the given
* chunk, the second portion of the chunk will be deleted.
* @param fileSpan The filespan after which to eliminate.
* @return A new BAMFileSpan which contains the portion of the chunk list before the
* given chunk.
*/
public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) {
if(fileSpan == null)
return clone();

if(!(fileSpan instanceof BAMFileSpan))
throw new SAMException("Unable to compare ");

final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;

if(bamFileSpan.isEmpty())
return clone();

validateSorted();

final BAMFileSpan trimmedChunkList = new BAMFileSpan();
final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd();
for(final Chunk chunkToTrim: chunks) {
if(chunkToTrim.getChunkStart() < chunkEnd) {
if(chunkToTrim.getChunkEnd() <= chunkEnd) {
// This chunk from the list is completely before the end of the filtering chunk.
trimmedChunkList.add(chunkToTrim.clone());
}
else {
// This chunk from the list partially overlaps the filtering chunk and must be trimmed.
trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd));
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/htsjdk/samtools/SamReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,11 @@ public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p, final Sa
this.resource = resource;
}

PrimitiveSamReader underlyingReader() {
/**
* Access the underlying {@link PrimitiveSamReader} used by this adapter.
* @return the {@link PrimitiveSamReader} used by this adapter.
*/
public PrimitiveSamReader underlyingReader() {
return p;
}

Expand Down
70 changes: 70 additions & 0 deletions src/test/java/htsjdk/samtools/BAMFileSpanTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package htsjdk.samtools;

import java.util.Arrays;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

public class BAMFileSpanTest {
@Test(dataProvider = "testRemoveContentsBeforeProvider")
public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff,
BAMFileSpan expectedSpan) {
// only start value in cutoff is used
Assert.assertEquals(
((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(),
expectedSpan.getChunks());
}

@DataProvider(name = "testRemoveContentsBeforeProvider")
private Object[][] testRemoveContentsBeforeProvider() {
return new Object[][] {
{ span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() },
{ span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() },
{ span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() },
};
}

@Test(dataProvider = "testRemoveContentsAfterProvider")
public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff,
BAMFileSpan expectedSpan) {
// only end value in cutoff is used
Assert.assertEquals(
((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(),
expectedSpan.getChunks());
}

@DataProvider(name = "testRemoveContentsAfterProvider")
private Object[][] testRemoveContentsAfterProvider() {
return new Object[][] {
{ span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) },
{ span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
};
}

private BAMFileSpan span(Chunk... chunks) {
return new BAMFileSpan(Arrays.asList(chunks));
}

private Chunk chunk(long start, long end) {
return new Chunk(start, end);
}
}

0 comments on commit 55bf01b

Please sign in to comment.