diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index 4238677eb4..f8179e6892 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -177,10 +177,14 @@ void nextContainer() throws IOException, IllegalArgumentException, final Slice slice = container.slices[i]; if (slice.sequenceId < 0) continue; - if (validationStringency != ValidationStringency.SILENT && !slice.validateRefMD5(refs)) { - log.error(String - .format("Reference sequence MD5 mismatch for slice: seq id %d, start %d, span %d, expected MD5 %s", slice.sequenceId, - slice.alignmentStart, slice.alignmentSpan, String.format("%032x", new BigInteger(1, slice.refMD5)))); + if (!slice.validateRefMD5(refs)) { + final String msg = String.format( + "Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s", + slice.sequenceId, + slice.alignmentStart, + slice.alignmentSpan, + String.format("%032x", new BigInteger(1, slice.refMD5))); + throw new CRAMException(msg); } } diff --git a/src/main/java/htsjdk/samtools/cram/build/Utils.java b/src/main/java/htsjdk/samtools/cram/build/Utils.java index 60efc3b9d5..7fdeebac88 100644 --- a/src/main/java/htsjdk/samtools/cram/build/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/build/Utils.java @@ -17,7 +17,9 @@ */ package htsjdk.samtools.cram.build; -class Utils { +final public class Utils { + + private Utils() {}; /** * CRAM operates with upper case bases, so both read and ref bases should be diff --git a/src/main/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java b/src/main/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java index 35a3e79180..c77aaae0f4 100644 --- a/src/main/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java +++ b/src/main/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java @@ -15,8 +15,8 @@ public interface CRAMReferenceSource { * against the reference by using common name variations, * such as adding or removing a leading "chr" prefix * from the requested name. if false, use exact match - * @return the bases representing the requested sequence. or null if the sequence - * cannot be found + * @return the upper cased, normalized (see {@link htsjdk.samtools.cram.build.Utils#normalizeBase}) + * bases representing the requested sequence, or null if the sequence cannot be found */ byte[] getReferenceBases(final SAMSequenceRecord sequenceRecord, final boolean tryNameVariants); } diff --git a/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java index da3d43f899..e73fb4155d 100644 --- a/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java +++ b/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java @@ -20,6 +20,7 @@ import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.cram.build.Utils; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; @@ -123,13 +124,23 @@ private byte[] findInCache(final String name) { return null; } + // Upper case and normalize (-> ACGTN) in-place, and add to the cache + private byte[] addToCache(final String sequenceName, final byte[] bases) { + for (int i = 0; i < bases.length; i++) { + bases[i] = Utils.normalizeBase(bases[i]); + } + cacheW.put(sequenceName, new WeakReference(bases)); + return bases; + } + public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, final boolean tryNameVariants) { { // check cache by sequence name: final String name = record.getSequenceName(); final byte[] bases = findInCache(name); - if (bases != null) + if (bases != null) { return bases; + } } final String md5 = record.getAttribute(SAMSequenceRecord.MD5_TAG); @@ -152,10 +163,7 @@ public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, { // try to fetch sequence by name: bases = findBasesByName(record.getSequenceName(), tryNameVariants); if (bases != null) { - SequenceUtil.upperCase(bases); - cacheW.put(record.getSequenceName(), new WeakReference( - bases)); - return bases; + return addToCache(record.getSequenceName(), bases); } } @@ -169,9 +177,7 @@ public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, } } if (bases != null) { - SequenceUtil.upperCase(bases); - cacheW.put(md5, new WeakReference(bases)); - return bases; + return addToCache(md5, bases); } } } diff --git a/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java b/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java index 19284b2024..11d2f3ce9e 100644 --- a/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java +++ b/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java @@ -30,6 +30,11 @@ public void testCRAIIndexerFromContainer() throws IOException { refSource, ValidationStringency.STRICT); SAMFileHeader samHeader = reader.getFileHeader(); + Iterator it = reader.getIterator(); + while(it.hasNext()) { + SAMRecord samRec = it.next(); + } + reader.close(); FileInputStream fis = new FileInputStream(CRAMFile); diff --git a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java index a5c16697aa..c52dccba14 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java +++ b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java @@ -1,10 +1,19 @@ package htsjdk.samtools.cram.structure; +import htsjdk.samtools.CRAMFileReader; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.util.SequenceUtil; import org.testng.Assert; import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + /** * Created by vadim on 07/12/2015. */ @@ -33,4 +42,29 @@ public void test_validateRef() { Assert.assertEquals(slice.refMD5, md5); Assert.assertTrue(slice.validateRefMD5(ref)); } + + @Test(expectedExceptions= CRAMException.class) + public void testFailsMD5Check() throws IOException { + // auxf.alteredForMD5test.fa has been altered slightly from the original reference + // to cause the CRAM md5 check to fail + final File CRAMFile = new File("src/test/resources/htsjdk/samtools/cram/auxf#values.3.0.cram"); + final File refFile = new File("src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa"); + ReferenceSource refSource = new ReferenceSource(refFile); + CRAMFileReader reader = null; + try { + reader = new CRAMFileReader( + CRAMFile, + null, + refSource, + ValidationStringency.STRICT); + Iterator it = reader.getIterator(); + while (it.hasNext()) { + it.next(); + } + } finally { + if (reader != null) { + reader.close(); + } + } + } } diff --git a/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa b/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa new file mode 100644 index 0000000000..1089240220 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa @@ -0,0 +1,2 @@ +>Sheila +CTAGCTCAGAAAAAAAAAA diff --git a/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa.fai b/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa.fai new file mode 100644 index 0000000000..5709288804 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/auxf.alteredForMD5test.fa.fai @@ -0,0 +1 @@ +Sheila 19 8 19 20