diff --git a/src/java/htsjdk/variant/variantcontext/Allele.java b/src/java/htsjdk/variant/variantcontext/Allele.java index c855b87cda..eb8c62aa8e 100644 --- a/src/java/htsjdk/variant/variantcontext/Allele.java +++ b/src/java/htsjdk/variant/variantcontext/Allele.java @@ -26,6 +26,7 @@ package htsjdk.variant.variantcontext; import htsjdk.samtools.util.StringUtil; +import htsjdk.variant.vcf.VCFConstants; import java.io.Serializable; import java.util.Arrays; @@ -128,8 +129,11 @@ public class Allele implements Comparable, Serializable { /** A generic static NO_CALL allele for use */ public final static String NO_CALL_STRING = "."; + /** A generic static SPAN_DEL allele for use */ + public final static String SPAN_DEL_STRING = "*"; + // no public way to create an allele - protected Allele(byte[] bases, boolean isRef) { + protected Allele(final byte[] bases, final boolean isRef) { // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { throw new IllegalArgumentException("Null alleles are not supported"); @@ -154,11 +158,11 @@ protected Allele(byte[] bases, boolean isRef) { this.isRef = isRef; this.bases = bases; - if ( ! acceptableAlleleBases(bases) ) + if ( ! acceptableAlleleBases(bases, isRef) ) throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'"); } - protected Allele(String bases, boolean isRef) { + protected Allele(final String bases, final boolean isRef) { this(bases.getBytes(), isRef); } @@ -189,6 +193,7 @@ protected Allele(final Allele allele, final boolean ignoreRefState) { private final static Allele ALT_T = new Allele("T", false); private final static Allele REF_N = new Allele("N", true); private final static Allele ALT_N = new Allele("N", false); + public final static Allele SPAN_DEL = new Allele(SPAN_DEL_STRING, false); public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); // --------------------------------------------------------------------------------------------------------- @@ -199,13 +204,13 @@ protected Allele(final Allele allele, final boolean ignoreRefState) { /** * Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases - * == '-', a Null allele is created. If bases == '.', a no call Allele is created. + * == '-', a Null allele is created. If bases == '.', a no call Allele is created. If bases == '*', a spanning deletions Allele is created. * - * @param bases the DNA sequence of this variation, '-', or '.' + * @param bases the DNA sequence of this variation, '-', '.', or '*' * @param isRef should we make this a reference allele? * @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated */ - public static Allele create(byte[] bases, boolean isRef) { + public static Allele create(final byte[] bases, final boolean isRef) { if ( bases == null ) throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); @@ -215,6 +220,9 @@ public static Allele create(byte[] bases, boolean isRef) { case '.': if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); return NO_CALL; + case '*': + if ( isRef ) throw new IllegalArgumentException("Cannot tag a spanning deletions allele as the reference allele"); + return SPAN_DEL; case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'G': case 'g' : return isRef ? REF_G : ALT_G; @@ -227,15 +235,15 @@ public static Allele create(byte[] bases, boolean isRef) { } } - public static Allele create(byte base, boolean isRef) { + public static Allele create(final byte base, final boolean isRef) { return create( new byte[]{ base }, isRef); } - public static Allele create(byte base) { + public static Allele create(final byte base) { return create( base, false ); } - public static Allele extend(Allele left, byte[] right) { + public static Allele extend(final Allele left, final byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); byte[] bases = new byte[left.length() + right.length]; @@ -249,23 +257,31 @@ public static Allele extend(Allele left, byte[] right) { * @param bases bases representing an allele * @return true if the bases represent the null allele */ - public static boolean wouldBeNullAllele(byte[] bases) { - return (bases.length == 1 && bases[0] == '-') || bases.length == 0; + public static boolean wouldBeNullAllele(final byte[] bases) { + return (bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NULL_ALLELE) || bases.length == 0; + } + + /** + * @param bases bases representing an allele + * @return true if the bases represent the SPAN_DEL allele + */ + public static boolean wouldBeStarAllele(final byte[] bases) { + return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.SPANNING_DELETION_ALLELE; } /** * @param bases bases representing an allele * @return true if the bases represent the NO_CALL allele */ - public static boolean wouldBeNoCallAllele(byte[] bases) { - return bases.length == 1 && bases[0] == '.'; + public static boolean wouldBeNoCallAllele(final byte[] bases) { + return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NO_CALL_ALLELE; } /** * @param bases bases representing an allele * @return true if the bases represent a symbolic allele */ - public static boolean wouldBeSymbolicAllele(byte[] bases) { + public static boolean wouldBeSymbolicAllele(final byte[] bases) { if ( bases.length <= 1 ) return false; else { @@ -277,41 +293,54 @@ public static boolean wouldBeSymbolicAllele(byte[] bases) { } /** - * @param bases bases representing an allele + * @param bases bases representing a reference allele * @return true if the bases represent the well formatted allele */ - public static boolean acceptableAlleleBases(String bases) { + public static boolean acceptableAlleleBases(final String bases) { return acceptableAlleleBases(bases.getBytes(), true); } - public static boolean acceptableAlleleBases(String bases, boolean allowNsAsAcceptable) { - return acceptableAlleleBases(bases.getBytes(), allowNsAsAcceptable); + /** + * @param bases bases representing an allele + * @param isReferenceAllele is a reference allele + * @return true if the bases represent the well formatted allele + */ + public static boolean acceptableAlleleBases(final String bases, boolean isReferenceAllele) { + return acceptableAlleleBases(bases.getBytes(), isReferenceAllele); } /** - * @param bases bases representing an allele + * @param bases bases representing a reference allele * @return true if the bases represent the well formatted allele */ - public static boolean acceptableAlleleBases(byte[] bases) { - return acceptableAlleleBases(bases, true); // default: N bases are acceptable + public static boolean acceptableAlleleBases(final byte[] bases) { + return acceptableAlleleBases(bases, true); } - - public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { + + /** + * + * @param bases bases representing an allele + * @param isReferenceAllele true if a reference allele + * @return true if the bases represent the well formatted allele + */ + public static boolean acceptableAlleleBases(final byte[] bases, final boolean isReferenceAllele) { if ( wouldBeNullAllele(bases) ) return false; if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; + if ( wouldBeStarAllele(bases) ) { + if ( isReferenceAllele ) + return false; + else + return true; + } + for (byte base : bases ) { switch (base) { - case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't': + case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't': case 'N' : case 'n' : break; - case 'N' : case 'n' : - if (allowNsAsAcceptable) - break; - else - return false; default: return false; } @@ -326,7 +355,7 @@ public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAccep * @param bases bases representing an allele * @param isRef is this the reference allele? */ - public static Allele create(String bases, boolean isRef) { + public static Allele create(final String bases, final boolean isRef) { return create(bases.getBytes(), isRef); } @@ -336,7 +365,7 @@ public static Allele create(String bases, boolean isRef) { * * @param bases bases representing an allele */ - public static Allele create(String bases) { + public static Allele create(final String bases) { return create(bases, false); } @@ -345,7 +374,7 @@ public static Allele create(String bases) { * * @param bases bases representing an allele */ - public static Allele create(byte[] bases) { + public static Allele create(final byte[] bases) { return create(bases, false); } @@ -447,7 +476,7 @@ public int hashCode() { * @param ignoreRefState if true, ignore ref state in comparison * @return true if this and other are equal */ - public boolean equals(Allele other, boolean ignoreRefState) { + public boolean equals(final Allele other, final boolean ignoreRefState) { return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); } @@ -456,21 +485,21 @@ public boolean equals(Allele other, boolean ignoreRefState) { * * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ - public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } + public boolean basesMatch(final byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } /** * @param test bases to test against * * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ - public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } + public boolean basesMatch(final String test) { return basesMatch(test.toUpperCase().getBytes()); } /** * @param test allele to test against * * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ - public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } + public boolean basesMatch(final Allele test) { return basesMatch(test.getBases()); } /** * @return the length of this allele. Null and NO_CALL alleles have 0 length. @@ -485,7 +514,7 @@ public int length() { // // --------------------------------------------------------------------------------------------------------- - public static Allele getMatchingAllele(Collection allAlleles, byte[] alleleBases) { + public static Allele getMatchingAllele(final Collection allAlleles, final byte[] alleleBases) { for ( Allele a : allAlleles ) { if ( a.basesMatch(alleleBases) ) { return a; @@ -498,7 +527,7 @@ public static Allele getMatchingAllele(Collection allAlleles, byte[] all return null; // couldn't find anything } - public int compareTo(Allele other) { + public int compareTo(final Allele other) { if ( isReference() && other.isNonReference() ) return -1; else if ( isNonReference() && other.isReference() ) @@ -507,14 +536,14 @@ else if ( isNonReference() && other.isReference() ) return getBaseString().compareTo(other.getBaseString()); // todo -- potential performance issue } - public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) { + public static boolean oneIsPrefixOfOther(final Allele a1, final Allele a2) { if ( a2.length() >= a1.length() ) return firstIsPrefixOfSecond(a1, a2); else return firstIsPrefixOfSecond(a2, a1); } - private static boolean firstIsPrefixOfSecond(Allele a1, Allele a2) { + private static boolean firstIsPrefixOfSecond(final Allele a1, final Allele a2) { String a1String = a1.getBaseString(); return a2.getBaseString().substring(0, a1String.length()).equals(a1String); } diff --git a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java index e3cfe37abb..c2fe9ef38c 100644 --- a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -305,7 +305,7 @@ private VariantContext parseVCFLine(final String[] parts, final boolean includeG } builder.start(pos); - if ( parts[2].length() == 0 ) + if ( parts[2].isEmpty() ) generateException("The VCF specification requires a valid ID field"); else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) ) builder.noID(); @@ -398,7 +398,7 @@ protected String getCachedString(String str) { private Map parseInfo(String infoField) { Map attributes = new HashMap(); - if ( infoField.length() == 0 ) + if ( infoField.isEmpty() ) generateException("The VCF specification requires a valid (non-zero length) info field"); if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) { @@ -548,7 +548,7 @@ protected static List parseAlleles(String ref, String alts, int lineNo) * @param lineNo the line number for this record */ private static void checkAllele(String allele, boolean isRef, int lineNo) { - if ( allele == null || allele.length() == 0 ) + if ( allele == null || allele.isEmpty() ) generateException(generateExceptionTextForBadAlleleBases(""), lineNo); if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) { @@ -580,7 +580,7 @@ private static void checkAllele(String allele, boolean isRef, int lineNo) { * @return non-null exception text string */ private static String generateExceptionTextForBadAlleleBases(final String allele) { - if ( allele.length() == 0 ) + if ( allele.isEmpty() ) return "empty alleles are not permitted in VCF records"; if ( allele.contains("[") || allele.contains("]") || allele.contains(":") || allele.contains(".") ) return "VCF support for complex rearrangements with breakends has not yet been implemented"; diff --git a/src/java/htsjdk/variant/vcf/VCFConstants.java b/src/java/htsjdk/variant/vcf/VCFConstants.java index f358ad2882..b05856d5e2 100644 --- a/src/java/htsjdk/variant/vcf/VCFConstants.java +++ b/src/java/htsjdk/variant/vcf/VCFConstants.java @@ -64,6 +64,7 @@ public final class VCFConstants { public static final String VALIDATED_KEY = "VALIDATED"; public static final String THOUSAND_GENOMES_KEY = "1000G"; + // separators public static final String FORMAT_FIELD_SEPARATOR = ":"; public static final String GENOTYPE_FIELD_SEPARATOR = ":"; @@ -93,6 +94,12 @@ public final class VCFConstants { public static final char DELETION_ALLELE_v3 = 'D'; public static final char INSERTION_ALLELE_v3 = 'I'; + // special alleles + public static final char SPANNING_DELETION_ALLELE = '*'; + public static final char NO_CALL_ALLELE = '.'; + public static final char NULL_ALLELE = '-'; + + // missing/default values public static final String UNFILTERED = "."; public static final String PASSES_FILTERS_v3 = "0"; diff --git a/src/tests/java/htsjdk/variant/variantcontext/AlleleUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/AlleleUnitTest.java index 93c9e4328b..c2aa79ff38 100644 --- a/src/tests/java/htsjdk/variant/variantcontext/AlleleUnitTest.java +++ b/src/tests/java/htsjdk/variant/variantcontext/AlleleUnitTest.java @@ -48,7 +48,7 @@ * Basic unit test for RecalData */ public class AlleleUnitTest extends VariantBaseTest { - Allele ARef, A, T, ATIns, ATCIns, NoCall; + Allele ARef, A, T, ATIns, ATCIns, NoCall, SpandDel; @BeforeSuite public void before() { @@ -59,7 +59,9 @@ public void before() { ATIns = Allele.create("AT"); ATCIns = Allele.create("ATC"); - NoCall = Allele.create("."); + NoCall = Allele.create(Allele.NO_CALL_STRING); + + SpandDel = Allele.create(Allele.SPAN_DEL_STRING); } @Test @@ -84,12 +86,19 @@ public void testCreatingSNPAlleles() { public void testCreatingNoCallAlleles() { Assert.assertTrue(NoCall.isNonReference()); Assert.assertFalse(NoCall.isReference()); - Assert.assertFalse(NoCall.basesMatch(".")); + Assert.assertFalse(NoCall.basesMatch(Allele.NO_CALL_STRING)); Assert.assertEquals(NoCall.length(), 0); Assert.assertTrue(NoCall.isNoCall()); Assert.assertFalse(NoCall.isCalled()); } + @Test + public void testCreatingSpanningDeletionAlleles() { + Assert.assertTrue(SpandDel.isNonReference()); + Assert.assertFalse(SpandDel.isReference()); + Assert.assertTrue(SpandDel.basesMatch(Allele.SPAN_DEL_STRING)); + Assert.assertEquals(SpandDel.length(), 1); + } @Test public void testCreatingIndelAlleles() { @@ -232,6 +241,16 @@ public void testBadConstructorArgs6() { Allele.create("", true); // symbolic cannot be ref allele } + @Test (expectedExceptions = IllegalArgumentException.class) + public void testBadNoCallAllelel() { + Allele.create(Allele.NO_CALL_STRING, true); // no call cannot be ref allele + } + + @Test (expectedExceptions = IllegalArgumentException.class) + public void testBadSpanningDeletionAllelel() { + Allele.create(Allele.SPAN_DEL_STRING, true); // spanning deletion cannot be ref allele + } + @Test public void testExtend() { Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());