From 72818a014c24b10280de66e38ce2efb3f4c09a81 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 18 Jun 2018 20:27:35 +0100 Subject: [PATCH] Allow ReferenceSequenceFileFactory to load from streams (#1123) * Add a method to open a ReferenceSequence by passing a FASTA and it's index as SeekableStreams * This is useful for clients that are using filesystems that don't have an nio.Path provider available but can produce a stream * part of #1112 --- .../reference/AbstractFastaSequenceFile.java | 43 ++++++++---- .../AbstractIndexedFastaSequenceFile.java | 14 +++- .../samtools/reference/FastaSequenceFile.java | 36 ++++++++-- .../reference/FastaSequenceIndex.java | 27 ++++--- .../reference/IndexedFastaSequenceFile.java | 15 ++++ .../ReferenceSequenceFileFactory.java | 50 +++++++++++++ .../ReadableSeekableStreamByteChannel.java | 70 +++++++++++++++++++ .../AbstractIndexedFastaSequenceFileTest.java | 23 ++++-- .../reference/FastaSequenceFileTest.java | 26 +++++++ .../reference/FastaSequenceIndexTest.java | 9 ++- 10 files changed, 274 insertions(+), 39 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/seekablestream/ReadableSeekableStreamByteChannel.java diff --git a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java index 9c5360c1d6..701d421515 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java @@ -32,6 +32,7 @@ import htsjdk.samtools.util.IOUtil; import java.io.File; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; @@ -41,6 +42,7 @@ */ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile { private final Path path; + private final String source; protected SAMSequenceDictionary sequenceDictionary; /** @@ -57,20 +59,13 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile { */ AbstractFastaSequenceFile(final Path path) { this.path = path; + this.source = path == null ? "unknown" : path.toAbsolutePath().toString(); final Path dictionary = findSequenceDictionary(path); if (dictionary != null) { IOUtil.assertFileIsReadable(dictionary); - - try { - final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); - final BufferedLineReader reader = new BufferedLineReader(Files.newInputStream(dictionary)); - final SAMFileHeader header = codec.decode(reader, - dictionary.toString()); - if (header.getSequenceDictionary() != null && !header.getSequenceDictionary().isEmpty()) { - this.sequenceDictionary = header.getSequenceDictionary(); - } - reader.close(); + try (InputStream dictionaryIn = Files.newInputStream(dictionary)) { + this.sequenceDictionary = ReferenceSequenceFileFactory.loadDictionary(dictionaryIn); } catch (Exception e) { throw new SAMException("Could not open sequence dictionary file: " + dictionary, e); @@ -78,6 +73,18 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile { } } + /** + * Constructs an {@link AbstractFastaSequenceFile} with an optional sequence dictionary. + * @param path Fasta file to read. Also acts as a prefix for supporting files. + * @param source Named source used for error messages. + * @param sequenceDictionary The sequence dictionary, or null if there isn't one. + */ + AbstractFastaSequenceFile(final Path path, final String source, final SAMSequenceDictionary sequenceDictionary) { + this.path = path; + this.source = source; + this.sequenceDictionary = sequenceDictionary; + } + protected static File findSequenceDictionary(final File file) { if (file == null) { return null; @@ -111,6 +118,11 @@ protected Path getPath() { return path; } + /** Returns the named source of the reference file. */ + protected String getSource() { + return source; + } + /** * Returns the list of sequence records associated with the reference sequence if found * otherwise null. @@ -122,12 +134,15 @@ public SAMSequenceDictionary getSequenceDictionary() { /** Returns the full path to the reference file. */ protected String getAbsolutePath() { + if (path == null) { + return null; + } return path.toAbsolutePath().toString(); } - /** Returns the full path to the reference file. */ + /** Returns the full path to the reference file, or the source if no path was specified. */ public String toString() { - return getAbsolutePath(); + return source; } /** default implementation -- override if index is supported */ @@ -137,13 +152,13 @@ public String toString() { /** default implementation -- override if index is supported */ @Override public ReferenceSequence getSequence( String contig ) { - throw new UnsupportedOperationException("Index does not appear to exist for " + getAbsolutePath() + ". samtools faidx can be used to create an index"); + throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + ". samtools faidx can be used to create an index"); } /** default implementation -- override if index is supported */ @Override public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { - throw new UnsupportedOperationException("Index does not appear to exist for " + getAbsolutePath() + ". samtools faidx can be used to create an index"); + throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + ". samtools faidx can be used to create an index"); } } diff --git a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java index fa77314af6..331c93bbc7 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java @@ -69,6 +69,18 @@ protected AbstractIndexedFastaSequenceFile(final Path path, final FastaSequenceI } } + /** + * Initialise the given indexed fasta sequence file stream. + * @param source The named source of the reference file (used in error messages). + * @param index The fasta index. + * @param dictionary The sequence dictionary, or null if there isn't one. + */ + protected AbstractIndexedFastaSequenceFile(String source, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { + super(null, source, dictionary); + this.index = index; + reset(); + } + protected static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException { Path ret = findFastaIndex(fastaFile); if (ret == null) throw new FileNotFoundException(ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); @@ -192,7 +204,7 @@ public ReferenceSequence getSubsequenceAt( String contig, long start, long stop startOffset += readFromPosition(channelBuffer, indexEntry.getLocation()+startOffset); } catch(IOException ex) { - throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getAbsolutePath(), ex); + throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getSource(), ex); } // Reset the buffer for outbound transfers. diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java index 744d797732..2016b7010f 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java @@ -26,12 +26,15 @@ import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.FastLineReader; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.StringUtil; import java.io.File; +import java.io.IOException; import java.nio.file.Path; /** @@ -42,6 +45,7 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile { private final boolean truncateNamesAtWhitespace; + private final SeekableStream seekableStream; private FastLineReader in; private int sequenceIndex = -1; private final byte[] basesBuffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE]; @@ -56,9 +60,21 @@ public FastaSequenceFile(final File file, final boolean truncateNamesAtWhitespac public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) { super(path); this.truncateNamesAtWhitespace = truncateNamesAtWhitespace; + this.seekableStream = null; this.in = new FastLineReader(IOUtil.openFileForReading(path)); } + /** + * Constructs a FastaSequenceFile that reads from the specified stream (which must not be compressed, i.e. + * the caller is responsible for decompressing the stream). + */ + public FastaSequenceFile(String source, final SeekableStream seekableStream, SAMSequenceDictionary dictionary, final boolean truncateNamesAtWhitespace) { + super(null, source, dictionary); + this.truncateNamesAtWhitespace = truncateNamesAtWhitespace; + this.seekableStream = seekableStream; + this.in = new FastLineReader(seekableStream); + } + /** * It's good to call this to free up memory. */ @@ -88,9 +104,17 @@ public ReferenceSequence nextSequence() { @Override public void reset() { this.sequenceIndex = -1; - this.in.close(); - this.in = new FastLineReader(IOUtil.openFileForReading(getPath())); - + if (getPath() != null) { + this.in.close(); + this.in = new FastLineReader(IOUtil.openFileForReading(getPath())); + } else { + try { + this.seekableStream.seek(0); + } catch (IOException e) { + throw new SAMException("Problem seeking to start of stream during reset", e); + } + this.in = new FastLineReader(this.seekableStream); + } } private String readSequenceName() { @@ -100,7 +124,7 @@ private String readSequenceName() { } final byte b = in.getByte(); if (b != '>') { - throw new SAMException("Format exception reading FASTA " + getAbsolutePath() + ". Expected > but saw chr(" + + throw new SAMException("Format exception reading FASTA " + getSource() + ". Expected > but saw chr(" + b + ") at start of sequence with index " + this.sequenceIndex); } final byte[] nameBuffer = new byte[4096]; @@ -111,11 +135,11 @@ private String readSequenceName() { } nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength); if (nameLength == nameBuffer.length && !in.atEoln()) { - throw new SAMException("Sequence name too long in FASTA " + getAbsolutePath()); + throw new SAMException("Sequence name too long in FASTA " + getSource()); } } while (!in.atEoln()); if (nameLength == 0) { - throw new SAMException("Missing sequence name in FASTA " + getAbsolutePath()); + throw new SAMException("Missing sequence name in FASTA " + getSource()); } String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim(); if (truncateNamesAtWhitespace) { diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java index e9907b2de8..06aa2007dc 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.nio.file.Files; @@ -66,7 +67,19 @@ public FastaSequenceIndex( File indexFile ) { */ public FastaSequenceIndex( Path indexFile ) { IOUtil.assertFileIsReadable(indexFile); - parseIndexFile(indexFile); + try (InputStream in = Files.newInputStream(indexFile)) { + parseIndexFile(in); + } catch (IOException e) { + throw new SAMException("Fasta index file could not be opened: " + indexFile, e); + } + } + + /** + * Build a sequence index from the specified input stream. + * @param in InputStream to read from. + */ + public FastaSequenceIndex(InputStream in) { + parseIndexFile(in); } /** @@ -124,12 +137,10 @@ public boolean equals(Object other) { /** * Parse the contents of an index file, caching the results internally. - * @param indexFile File to parse. - * @throws IOException Thrown if file could not be opened. + * @param in InputStream to parse. */ - private void parseIndexFile(Path indexFile) { - try { - Scanner scanner = new Scanner(indexFile); + private void parseIndexFile(InputStream in) { + try (Scanner scanner = new Scanner(in)) { int sequenceIndex = 0; while( scanner.hasNext() ) { // Tokenize and validate the index line. @@ -154,10 +165,6 @@ private void parseIndexFile(Path indexFile) { // Build sequence structure add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) ); } - scanner.close(); - } catch (IOException e) { - throw new SAMException("Fasta index file could not be opened: " + indexFile, e); - } } diff --git a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java index 0c8dfacb8b..52307ea56d 100644 --- a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java @@ -25,6 +25,9 @@ package htsjdk.samtools.reference; import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.IOUtil; @@ -95,6 +98,18 @@ public IndexedFastaSequenceFile(final Path path) throws FileNotFoundException { this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path)))); } + /** + * Initialise the given indexed fasta sequence file stream. + * @param source The named source of the reference file (used in error messages). + * @param in The input stream to read the fasta file from. + * @param index The fasta index. + * @param dictionary The sequence dictionary, or null if there isn't one. + */ + public IndexedFastaSequenceFile(String source, final SeekableStream in, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { + super(source, index, dictionary); + this.channel = new ReadableSeekableStreamByteChannel(in); + } + /** * @deprecated use {@link ReferenceSequenceFileFactory#canCreateIndexedFastaReader(Path)} instead. */ diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java index 437f53ce4d..3e216b715a 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java @@ -27,6 +27,11 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.GZIIndex; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMTextHeaderCodec; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.IOUtil; import java.io.BufferedInputStream; @@ -166,6 +171,35 @@ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { return false; } + /** + * Return an instance of ReferenceSequenceFile using the given fasta sequence file stream, optional index stream, + * and no sequence dictionary + * + * @param source The named source of the reference file (used in error messages). + * @param in The input stream to read the fasta file from. + * @param index The index, or null to return a non-indexed reader. + */ + public static ReferenceSequenceFile getReferenceSequenceFile(final String source, final SeekableStream in, final FastaSequenceIndex index) { + return getReferenceSequenceFile(source, in, index, null, true); + } + + /** + * Return an instance of ReferenceSequenceFile using the given fasta sequence file stream and optional index stream + * and sequence dictionary. + * + * @param source The named source of the reference file (used in error messages). + * @param in The input stream to read the fasta file from. + * @param index The index, or null to return a non-indexed reader. + * @param dictionary The sequence dictionary, or null if there isn't one. + * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name + */ + public static ReferenceSequenceFile getReferenceSequenceFile(final String source, final SeekableStream in, final FastaSequenceIndex index, final SAMSequenceDictionary dictionary, final boolean truncateNamesAtWhitespace) { + if (truncateNamesAtWhitespace && index != null) { + return new IndexedFastaSequenceFile(source, in, index, dictionary); + } + return new FastaSequenceFile(source, in, dictionary, truncateNamesAtWhitespace); + } + /** * Returns the default dictionary name for a FASTA file. * @@ -186,6 +220,22 @@ public static Path getDefaultDictionaryForReferenceSequence(final Path path) { return path.resolveSibling(name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); } + /** + * Loads the sequence dictionary from a FASTA file input stream. + * + * @param in the FASTA file input stream. + * @return the sequence dictionary, or null if the header has no dictionary or it was empty. + */ + public static SAMSequenceDictionary loadDictionary(final InputStream in) { + final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); + final BufferedLineReader reader = new BufferedLineReader(in); + final SAMFileHeader header = codec.decode(reader, null); + if (header.getSequenceDictionary().isEmpty()) { + return null; + } + return header.getSequenceDictionary(); + } + /** * Returns the FASTA extension for the path. * diff --git a/src/main/java/htsjdk/samtools/seekablestream/ReadableSeekableStreamByteChannel.java b/src/main/java/htsjdk/samtools/seekablestream/ReadableSeekableStreamByteChannel.java new file mode 100644 index 0000000000..054de42a51 --- /dev/null +++ b/src/main/java/htsjdk/samtools/seekablestream/ReadableSeekableStreamByteChannel.java @@ -0,0 +1,70 @@ +package htsjdk.samtools.seekablestream; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.NonWritableChannelException; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.SeekableByteChannel; + +/** + * A class to wrap a {@link SeekableStream} in a read-only {@link SeekableByteChannel}. + */ +public class ReadableSeekableStreamByteChannel implements SeekableByteChannel { + + private final SeekableStream seekableStream; + private final ReadableByteChannel rbc; + private long pos; + + public ReadableSeekableStreamByteChannel(SeekableStream seekableStream) { + this.seekableStream = seekableStream; + this.rbc = Channels.newChannel(seekableStream); + } + + @Override + public int read(ByteBuffer dst) throws IOException { + int n = rbc.read(dst); + if (n > 0) { + pos += n; + } + return n; + } + + @Override + public int write(ByteBuffer src) throws IOException { + throw new NonWritableChannelException(); + } + + @Override + public long position() { + return pos; + } + + @Override + public SeekableByteChannel position(long newPosition) throws IOException { + // ReadableByteChannel is not buffered, so it reads through + seekableStream.seek(newPosition); + pos = newPosition; + return this; + } + + @Override + public long size() { + return seekableStream.length(); + } + + @Override + public SeekableByteChannel truncate(long size) { + throw new NonWritableChannelException(); + } + + @Override + public boolean isOpen() { + return rbc.isOpen(); + } + + @Override + public void close() throws IOException { + rbc.close(); + } +} diff --git a/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java b/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java index be01a315c6..555245ae65 100644 --- a/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java +++ b/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java @@ -26,6 +26,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMException; +import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.StringUtil; import org.testng.Assert; @@ -33,16 +34,18 @@ import org.testng.annotations.Test; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; /** * Test the indexed fasta sequence file reader. */ public class AbstractIndexedFastaSequenceFileTest extends HtsjdkTest { - private static File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/reference"); - private static File SEQUENCE_FILE = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta"); - private static File SEQUENCE_FILE_BGZ = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta.gz"); - private static File SEQUENCE_FILE_NODICT = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.nodict.fasta"); + private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/reference"); + private static final File SEQUENCE_FILE = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta"); + private static final File SEQUENCE_FILE_INDEX = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta.fai"); + private static final File SEQUENCE_FILE_BGZ = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta.gz"); + private static final File SEQUENCE_FILE_NODICT = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.nodict.fasta"); private final String firstBasesOfChrM = "GATCACAGGTCTATCACCCT"; private final String extendedBasesOfChrM = "GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT" + @@ -75,11 +78,19 @@ public Object[][] provideOriginalAndNewReaders() throws FileNotFoundException { new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile( SEQUENCE_FILE_BGZ), new BlockCompressedIndexedFastaSequenceFile( - SEQUENCE_FILE_BGZ.toPath())}, + SEQUENCE_FILE_BGZ.toPath()) }, new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile( SEQUENCE_FILE_BGZ, true), new BlockCompressedIndexedFastaSequenceFile( - SEQUENCE_FILE_BGZ.toPath())} + SEQUENCE_FILE_BGZ.toPath()) }, + new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.getAbsolutePath(), + new SeekableFileStream(SEQUENCE_FILE), new FastaSequenceIndex(new FileInputStream(SEQUENCE_FILE_INDEX))), + new IndexedFastaSequenceFile(SEQUENCE_FILE.getAbsolutePath(), new SeekableFileStream(SEQUENCE_FILE), + new FastaSequenceIndex(new FileInputStream(SEQUENCE_FILE_INDEX)), null) }, + new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.getAbsolutePath(), + new SeekableFileStream(SEQUENCE_FILE), new FastaSequenceIndex(new FileInputStream(SEQUENCE_FILE_INDEX)), null, true), + new IndexedFastaSequenceFile(SEQUENCE_FILE.getAbsolutePath(), new SeekableFileStream(SEQUENCE_FILE), + new FastaSequenceIndex(new FileInputStream(SEQUENCE_FILE_INDEX)), null) }, }; } diff --git a/src/test/java/htsjdk/samtools/reference/FastaSequenceFileTest.java b/src/test/java/htsjdk/samtools/reference/FastaSequenceFileTest.java index a7462dbec4..8013ba9e69 100644 --- a/src/test/java/htsjdk/samtools/reference/FastaSequenceFileTest.java +++ b/src/test/java/htsjdk/samtools/reference/FastaSequenceFileTest.java @@ -24,6 +24,8 @@ package htsjdk.samtools.reference; import htsjdk.HtsjdkTest; +import htsjdk.samtools.seekablestream.SeekableFileStream; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.StringUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -83,4 +85,28 @@ public void testTrailingWhitespaceWithPreexistingSequenceDictionary() throws Exc Assert.assertEquals(StringUtil.bytesToString(referenceSequence.getBases()), "TCGATCGA"); } + + @Test + public void testStream() throws Exception { + final File fasta = File.createTempFile("test", ".fasta"); + fasta.deleteOnExit(); + final PrintWriter writer = new PrintWriter(fasta); + final String chr1 = "chr1"; + writer.println(">" + chr1); + final String sequence = "ACGTACGT"; + writer.println(sequence); + writer.println(sequence + " \t"); + writer.close(); + try (SeekableStream seekableStream = new SeekableFileStream(fasta)) { + final FastaSequenceFile fastaReader = new FastaSequenceFile(fasta.getAbsolutePath(), seekableStream, null, true); + final ReferenceSequence referenceSequence1 = fastaReader.nextSequence(); + Assert.assertEquals(referenceSequence1.getName(), chr1); + Assert.assertEquals(StringUtil.bytesToString(referenceSequence1.getBases()), sequence + sequence); + // try to reset and re-read the first sequence + fastaReader.reset(); + final ReferenceSequence referenceSequence2 = fastaReader.nextSequence(); + Assert.assertEquals(referenceSequence2.getName(), chr1); + Assert.assertEquals(StringUtil.bytesToString(referenceSequence2.getBases()), sequence + sequence); + } + } } diff --git a/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java b/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java index c6fa1384ac..e2aa0acf29 100644 --- a/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java +++ b/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java @@ -32,6 +32,7 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.nio.file.Files; @@ -51,7 +52,9 @@ public Object[][] provideHomoSapiens() throws FileNotFoundException { final File sequenceIndexFile = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.fasta.fai"); return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) }, - { new FastaSequenceIndex(sequenceIndexFile.toPath()) } }; + { new FastaSequenceIndex(sequenceIndexFile.toPath()) }, + { new FastaSequenceIndex(new FileInputStream(sequenceIndexFile)) } + }; } @DataProvider(name="specialcharacters") @@ -59,7 +62,9 @@ public Object[][] provideSpecialCharacters() throws FileNotFoundException { final File sequenceIndexFile = new File(TEST_DATA_DIR,"testing.fai"); return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) }, - { new FastaSequenceIndex(sequenceIndexFile.toPath()) } }; + { new FastaSequenceIndex(sequenceIndexFile.toPath()) }, + { new FastaSequenceIndex(new FileInputStream(sequenceIndexFile)) } + }; } @Test(dataProvider="homosapiens")