Skip to content

Commit

Permalink
Merge pull request #260 from ronlevine/rhl_allele_rep_span_dels
Browse files Browse the repository at this point in the history
Rhl allele rep span dels
  • Loading branch information
eitanbanks committed May 28, 2015
2 parents b6bc0ad + 372a699 commit 287b33f
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 47 deletions.
109 changes: 69 additions & 40 deletions src/java/htsjdk/variant/variantcontext/Allele.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
package htsjdk.variant.variantcontext;

import htsjdk.samtools.util.StringUtil;
import htsjdk.variant.vcf.VCFConstants;

import java.io.Serializable;
import java.util.Arrays;
Expand Down Expand Up @@ -128,8 +129,11 @@ public class Allele implements Comparable<Allele>, Serializable {
/** A generic static NO_CALL allele for use */
public final static String NO_CALL_STRING = ".";

/** A generic static SPAN_DEL allele for use */
public final static String SPAN_DEL_STRING = "*";

// no public way to create an allele
protected Allele(byte[] bases, boolean isRef) {
protected Allele(final byte[] bases, final boolean isRef) {
// null alleles are no longer allowed
if ( wouldBeNullAllele(bases) ) {
throw new IllegalArgumentException("Null alleles are not supported");
Expand All @@ -154,11 +158,11 @@ protected Allele(byte[] bases, boolean isRef) {
this.isRef = isRef;
this.bases = bases;

if ( ! acceptableAlleleBases(bases) )
if ( ! acceptableAlleleBases(bases, isRef) )
throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'");
}

protected Allele(String bases, boolean isRef) {
protected Allele(final String bases, final boolean isRef) {
this(bases.getBytes(), isRef);
}

Expand Down Expand Up @@ -189,6 +193,7 @@ protected Allele(final Allele allele, final boolean ignoreRefState) {
private final static Allele ALT_T = new Allele("T", false);
private final static Allele REF_N = new Allele("N", true);
private final static Allele ALT_N = new Allele("N", false);
public final static Allele SPAN_DEL = new Allele(SPAN_DEL_STRING, false);
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);

// ---------------------------------------------------------------------------------------------------------
Expand All @@ -199,13 +204,13 @@ protected Allele(final Allele allele, final boolean ignoreRefState) {

/**
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
* == '-', a Null allele is created. If bases == '.', a no call Allele is created. If bases == '*', a spanning deletions Allele is created.
*
* @param bases the DNA sequence of this variation, '-', or '.'
* @param bases the DNA sequence of this variation, '-', '.', or '*'
* @param isRef should we make this a reference allele?
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
*/
public static Allele create(byte[] bases, boolean isRef) {
public static Allele create(final byte[] bases, final boolean isRef) {
if ( bases == null )
throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");

Expand All @@ -215,6 +220,9 @@ public static Allele create(byte[] bases, boolean isRef) {
case '.':
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
return NO_CALL;
case '*':
if ( isRef ) throw new IllegalArgumentException("Cannot tag a spanning deletions allele as the reference allele");
return SPAN_DEL;
case 'A': case 'a' : return isRef ? REF_A : ALT_A;
case 'C': case 'c' : return isRef ? REF_C : ALT_C;
case 'G': case 'g' : return isRef ? REF_G : ALT_G;
Expand All @@ -227,15 +235,15 @@ public static Allele create(byte[] bases, boolean isRef) {
}
}

public static Allele create(byte base, boolean isRef) {
public static Allele create(final byte base, final boolean isRef) {
return create( new byte[]{ base }, isRef);
}

public static Allele create(byte base) {
public static Allele create(final byte base) {
return create( base, false );
}

public static Allele extend(Allele left, byte[] right) {
public static Allele extend(final Allele left, final byte[] right) {
if (left.isSymbolic())
throw new IllegalArgumentException("Cannot extend a symbolic allele");
byte[] bases = new byte[left.length() + right.length];
Expand All @@ -249,23 +257,31 @@ public static Allele extend(Allele left, byte[] right) {
* @param bases bases representing an allele
* @return true if the bases represent the null allele
*/
public static boolean wouldBeNullAllele(byte[] bases) {
return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
public static boolean wouldBeNullAllele(final byte[] bases) {
return (bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NULL_ALLELE) || bases.length == 0;
}

/**
* @param bases bases representing an allele
* @return true if the bases represent the SPAN_DEL allele
*/
public static boolean wouldBeStarAllele(final byte[] bases) {
return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.SPANNING_DELETION_ALLELE;
}

/**
* @param bases bases representing an allele
* @return true if the bases represent the NO_CALL allele
*/
public static boolean wouldBeNoCallAllele(byte[] bases) {
return bases.length == 1 && bases[0] == '.';
public static boolean wouldBeNoCallAllele(final byte[] bases) {
return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NO_CALL_ALLELE;
}

/**
* @param bases bases representing an allele
* @return true if the bases represent a symbolic allele
*/
public static boolean wouldBeSymbolicAllele(byte[] bases) {
public static boolean wouldBeSymbolicAllele(final byte[] bases) {
if ( bases.length <= 1 )
return false;
else {
Expand All @@ -277,41 +293,54 @@ public static boolean wouldBeSymbolicAllele(byte[] bases) {
}

/**
* @param bases bases representing an allele
* @param bases bases representing a reference allele
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(String bases) {
public static boolean acceptableAlleleBases(final String bases) {
return acceptableAlleleBases(bases.getBytes(), true);
}

public static boolean acceptableAlleleBases(String bases, boolean allowNsAsAcceptable) {
return acceptableAlleleBases(bases.getBytes(), allowNsAsAcceptable);
/**
* @param bases bases representing an allele
* @param isReferenceAllele is a reference allele
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(final String bases, boolean isReferenceAllele) {
return acceptableAlleleBases(bases.getBytes(), isReferenceAllele);
}

/**
* @param bases bases representing an allele
* @param bases bases representing a reference allele
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(byte[] bases) {
return acceptableAlleleBases(bases, true); // default: N bases are acceptable
public static boolean acceptableAlleleBases(final byte[] bases) {
return acceptableAlleleBases(bases, true);
}

public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {

/**
*
* @param bases bases representing an allele
* @param isReferenceAllele true if a reference allele
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(final byte[] bases, final boolean isReferenceAllele) {
if ( wouldBeNullAllele(bases) )
return false;

if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
return true;

if ( wouldBeStarAllele(bases) ) {
if ( isReferenceAllele )
return false;
else
return true;
}

for (byte base : bases ) {
switch (base) {
case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't':
case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't': case 'N' : case 'n' :
break;
case 'N' : case 'n' :
if (allowNsAsAcceptable)
break;
else
return false;
default:
return false;
}
Expand All @@ -326,7 +355,7 @@ public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAccep
* @param bases bases representing an allele
* @param isRef is this the reference allele?
*/
public static Allele create(String bases, boolean isRef) {
public static Allele create(final String bases, final boolean isRef) {
return create(bases.getBytes(), isRef);
}

Expand All @@ -336,7 +365,7 @@ public static Allele create(String bases, boolean isRef) {
*
* @param bases bases representing an allele
*/
public static Allele create(String bases) {
public static Allele create(final String bases) {
return create(bases, false);
}

Expand All @@ -345,7 +374,7 @@ public static Allele create(String bases) {
*
* @param bases bases representing an allele
*/
public static Allele create(byte[] bases) {
public static Allele create(final byte[] bases) {
return create(bases, false);
}

Expand Down Expand Up @@ -447,7 +476,7 @@ public int hashCode() {
* @param ignoreRefState if true, ignore ref state in comparison
* @return true if this and other are equal
*/
public boolean equals(Allele other, boolean ignoreRefState) {
public boolean equals(final Allele other, final boolean ignoreRefState) {
return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
}

Expand All @@ -456,21 +485,21 @@ public boolean equals(Allele other, boolean ignoreRefState) {
*
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
public boolean basesMatch(final byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }

/**
* @param test bases to test against
*
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
public boolean basesMatch(final String test) { return basesMatch(test.toUpperCase().getBytes()); }

/**
* @param test allele to test against
*
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
public boolean basesMatch(final Allele test) { return basesMatch(test.getBases()); }

/**
* @return the length of this allele. Null and NO_CALL alleles have 0 length.
Expand All @@ -485,7 +514,7 @@ public int length() {
//
// ---------------------------------------------------------------------------------------------------------

public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
public static Allele getMatchingAllele(final Collection<Allele> allAlleles, final byte[] alleleBases) {
for ( Allele a : allAlleles ) {
if ( a.basesMatch(alleleBases) ) {
return a;
Expand All @@ -498,7 +527,7 @@ public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] all
return null; // couldn't find anything
}

public int compareTo(Allele other) {
public int compareTo(final Allele other) {
if ( isReference() && other.isNonReference() )
return -1;
else if ( isNonReference() && other.isReference() )
Expand All @@ -507,14 +536,14 @@ else if ( isNonReference() && other.isReference() )
return getBaseString().compareTo(other.getBaseString()); // todo -- potential performance issue
}

public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
public static boolean oneIsPrefixOfOther(final Allele a1, final Allele a2) {
if ( a2.length() >= a1.length() )
return firstIsPrefixOfSecond(a1, a2);
else
return firstIsPrefixOfSecond(a2, a1);
}

private static boolean firstIsPrefixOfSecond(Allele a1, Allele a2) {
private static boolean firstIsPrefixOfSecond(final Allele a1, final Allele a2) {
String a1String = a1.getBaseString();
return a2.getBaseString().substring(0, a1String.length()).equals(a1String);
}
Expand Down
8 changes: 4 additions & 4 deletions src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ private VariantContext parseVCFLine(final String[] parts, final boolean includeG
}
builder.start(pos);

if ( parts[2].length() == 0 )
if ( parts[2].isEmpty() )
generateException("The VCF specification requires a valid ID field");
else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
builder.noID();
Expand Down Expand Up @@ -398,7 +398,7 @@ protected String getCachedString(String str) {
private Map<String, Object> parseInfo(String infoField) {
Map<String, Object> attributes = new HashMap<String, Object>();

if ( infoField.length() == 0 )
if ( infoField.isEmpty() )
generateException("The VCF specification requires a valid (non-zero length) info field");

if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
Expand Down Expand Up @@ -548,7 +548,7 @@ protected static List<Allele> parseAlleles(String ref, String alts, int lineNo)
* @param lineNo the line number for this record
*/
private static void checkAllele(String allele, boolean isRef, int lineNo) {
if ( allele == null || allele.length() == 0 )
if ( allele == null || allele.isEmpty() )
generateException(generateExceptionTextForBadAlleleBases(""), lineNo);

if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
Expand Down Expand Up @@ -580,7 +580,7 @@ private static void checkAllele(String allele, boolean isRef, int lineNo) {
* @return non-null exception text string
*/
private static String generateExceptionTextForBadAlleleBases(final String allele) {
if ( allele.length() == 0 )
if ( allele.isEmpty() )
return "empty alleles are not permitted in VCF records";
if ( allele.contains("[") || allele.contains("]") || allele.contains(":") || allele.contains(".") )
return "VCF support for complex rearrangements with breakends has not yet been implemented";
Expand Down
7 changes: 7 additions & 0 deletions src/java/htsjdk/variant/vcf/VCFConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public final class VCFConstants {
public static final String VALIDATED_KEY = "VALIDATED";
public static final String THOUSAND_GENOMES_KEY = "1000G";


// separators
public static final String FORMAT_FIELD_SEPARATOR = ":";
public static final String GENOTYPE_FIELD_SEPARATOR = ":";
Expand Down Expand Up @@ -93,6 +94,12 @@ public final class VCFConstants {
public static final char DELETION_ALLELE_v3 = 'D';
public static final char INSERTION_ALLELE_v3 = 'I';

// special alleles
public static final char SPANNING_DELETION_ALLELE = '*';
public static final char NO_CALL_ALLELE = '.';
public static final char NULL_ALLELE = '-';


// missing/default values
public static final String UNFILTERED = ".";
public static final String PASSES_FILTERS_v3 = "0";
Expand Down
Loading

0 comments on commit 287b33f

Please sign in to comment.