Skip to content

Commit

Permalink
Fixes indexing for codecs that implement getPathToDataFile (#1429)
Browse files Browse the repository at this point in the history
* Updated IndexFactory so it is possible to index files with codecs that override getPathToDataFile.
* Moved VCFRedirectCodec to it's own top level test class to share between tests
* Fixes #1428


Co-authored-by: Jonn Smith <[email protected]>
Co-authored-by: Louis Bergelson <[email protected]>
  • Loading branch information
3 people committed Oct 30, 2019
1 parent f70befd commit c21055b
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 31 deletions.
19 changes: 15 additions & 4 deletions src/main/java/htsjdk/tribble/index/IndexFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -439,13 +439,24 @@ public FeatureIterator(final File inputFile, final FeatureCodec<FEATURE_TYPE, SO
throw new IllegalArgumentException("FeatureIterator input file cannot be null");
}
this.codec = codec;
this.inputFile = inputFile;

// We must call getPathToDataFile here to work with codecs that store their configuration and data separately
final String filePath = codec.getPathToDataFile(inputFile.getAbsolutePath());

try {
this.inputFile = IOUtil.getPath(filePath).toFile();
} catch (final IOException e) {
throw new TribbleException("Failed while constructing a FeatureIterator due to a problem converting String to Path", e);
}

try {
if (IOUtil.hasBlockCompressedExtension(inputFile)) {
final BlockCompressedInputStream bcs = initIndexableBlockCompressedStream(inputFile);
// Since we modified inputFile above, we MUST use this.inputFile for all checks and file creations
// for the rest of this method!
if (IOUtil.hasBlockCompressedExtension(this.inputFile)) {
final BlockCompressedInputStream bcs = initIndexableBlockCompressedStream(this.inputFile);
source = (SOURCE) codec.makeIndexableSourceFromStream(bcs);
} else {
final PositionalBufferedStream ps = initIndexablePositionalStream(inputFile);
final PositionalBufferedStream ps = initIndexablePositionalStream(this.inputFile);
source = (SOURCE) codec.makeIndexableSourceFromStream(ps);
}
this.codec.readHeader(source);
Expand Down
29 changes: 4 additions & 25 deletions src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import com.google.common.jimfs.Jimfs;
import htsjdk.HtsjdkTest;
import htsjdk.samtools.FileTruncatedException;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IOUtilTest;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.TestUtil;
import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.bed.BEDFeature;
Expand Down Expand Up @@ -52,7 +50,6 @@ public class AbstractFeatureReaderTest extends HtsjdkTest {

//wrapper which skips the first byte of a file and leaves the rest unchanged
private static final Function<SeekableByteChannel, SeekableByteChannel> WRAPPER = SkippingByteChannel::new;
public static final String REDIRECTING_CODEC_TEST_FILES = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/";

/**
* Asserts readability and correctness of VCF over HTTP. The VCF is indexed and requires and index.
Expand Down Expand Up @@ -233,8 +230,8 @@ public SeekableByteChannel truncate(long size) throws IOException {
@DataProvider
public Object[][] getVcfRedirects(){
return new Object[][]{
{REDIRECTING_CODEC_TEST_FILES + "vcf.redirect"},
{REDIRECTING_CODEC_TEST_FILES + "vcf.gz.redirect"}
{VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "vcf.redirect"},
{VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "vcf.gz.redirect"}
};
}

Expand All @@ -244,8 +241,8 @@ public Object[][] getVcfRedirects(){
*/
@Test(dataProvider = "getVcfRedirects")
public void testCodecWithGetPathToDataFile(String vcfRedirect) throws IOException {
final VcfRedirectCodec vcfRedirectCodec = new VcfRedirectCodec();
final String vcf = REDIRECTING_CODEC_TEST_FILES + "dataFiles/test.vcf";
final VCFRedirectCodec vcfRedirectCodec = new VCFRedirectCodec();
final String vcf = VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "dataFiles/test.vcf";
Assert.assertTrue(vcfRedirectCodec.canDecode(vcfRedirect), "should have been able to decode " + vcfRedirect);
try(FeatureReader<VariantContext> redirectReader = AbstractFeatureReader.getFeatureReader(vcfRedirect, vcfRedirectCodec, false);
FeatureReader<VariantContext> directReader = AbstractFeatureReader.getFeatureReader(vcf, new VCFCodec(), false)){
Expand All @@ -260,22 +257,4 @@ public void testCodecWithGetPathToDataFile(String vcfRedirect) throws IOExceptio
}
}

/**
* codec which redirects to another location after reading the input file
*/
private static class VcfRedirectCodec extends VCFCodec{
@Override
public boolean canDecode(String potentialInput) {
return super.canDecode(this.getPathToDataFile(potentialInput));
}

@Override
public String getPathToDataFile(String path) {
try {
return Files.readAllLines(IOUtil.getPath(path)).get(0);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
}
}
33 changes: 33 additions & 0 deletions src/test/java/htsjdk/tribble/VCFRedirectCodec.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package htsjdk.tribble;

import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.variant.vcf.VCFCodec;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

/**
* Test codec which redirects to another location after reading the input file
* It's an example of a codec which uses {@link FeatureCodec#getPathToDataFile(String)}
*/
public class VCFRedirectCodec extends VCFCodec {
public static final String REDIRECTING_CODEC_TEST_FILE_ROOT = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/";

@Override
public boolean canDecode(final String potentialInput) {
return super.canDecode(this.getPathToDataFile(potentialInput));
}

@Override
public String getPathToDataFile(final String path) {
try {
final Path inputPath = IOUtil.getPath(path);
final Path dataFilePath = IOUtil.getPath(Files.readAllLines(inputPath).get(0));
return inputPath.getParent().resolve(dataFilePath).toString();
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
}
39 changes: 39 additions & 0 deletions src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.TestUtils;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.VCFRedirectCodec;
import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.index.tabix.TabixIndex;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.variant.bcf2.BCF2Codec;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
Expand Down Expand Up @@ -219,4 +222,40 @@ public void testCreateLinearIndexFromBCF(final File inputBCF) throws IOException
}
}
}

@DataProvider
public Object[][] getRedirectFiles(){
return new Object[][] {
{VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "vcf.gz.redirect", IndexFactory.IndexType.TABIX},
{VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "vcf.redirect", IndexFactory.IndexType.INTERVAL_TREE},
{VCFRedirectCodec.REDIRECTING_CODEC_TEST_FILE_ROOT + "vcf.redirect", IndexFactory.IndexType.LINEAR}
};
}

@Test(dataProvider = "getRedirectFiles")
public void testIndexRedirectedFiles(String input, IndexFactory.IndexType type) throws IOException {
final VCFRedirectCodec codec = new VCFRedirectCodec();
final File dir = IOUtil.createTempDir("redirec-test", "dir");
try {
final File tmpInput = new File(dir, new File(input).getName());
Files.copy(new File(input), tmpInput);
final File tmpDataFile = new File(codec.getPathToDataFile(tmpInput.toString()));
Assert.assertTrue(new File(tmpDataFile.getAbsoluteFile().getParent()).mkdir());
final File originalDataFile = new File(codec.getPathToDataFile(input));
Files.copy(originalDataFile, tmpDataFile);

try(final AbstractFeatureReader<VariantContext, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(tmpInput.getAbsolutePath(), codec, false)) {
Assert.assertFalse(featureReader.hasIndex());
}
final Index index = IndexFactory.createIndex(tmpInput, codec, type);
index.writeBasedOnFeatureFile(tmpDataFile);

try(final AbstractFeatureReader<VariantContext, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(tmpInput.getAbsolutePath(), codec)) {
Assert.assertTrue(featureReader.hasIndex());
Assert.assertEquals(featureReader.query("20",1110696,1230237).stream().count(), 2);
}
} finally {
IOUtil.recursiveDelete(dir.toPath());
}
}
}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz
dataFiles/test.vcf.gz
Original file line number Diff line number Diff line change
@@ -1 +1 @@
src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf
dataFiles/test.vcf

0 comments on commit c21055b

Please sign in to comment.