From 833a0e2cf55df42ce0b6d682137b84b5dc963c97 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 2 Aug 2019 14:41:23 -0400 Subject: [PATCH] fix bug in VariantContextBuilder (#1403) * Fix regression in VariantContextBuilder introduced in #1344 * Fixes #1404 * filter validation now fails with IllegalStateException instead of of NPE * Treat null filter array the same as null filter sets Co-authored-by: Yossi Farjoun Co-authored-by: Louis Bergelson --- .../variantcontext/VariantContext.java | 3 + .../variantcontext/VariantContextBuilder.java | 15 ++- .../java/htsjdk/variant/vcf/VCFEncoder.java | 106 +++++++++++------- .../VariantContextBuilderTest.java | 27 +++++ 4 files changed, 105 insertions(+), 46 deletions(-) diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java index 72c84c36af..ac2b44f1fe 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java @@ -393,6 +393,9 @@ private static void validateFilters(final VariantContext variantContext) { } for (String filter : filters) { + if ( filter == null) { + throw new IllegalStateException("'null' is not a valid filter string."); + } if (!VALID_FILTER.matcher(filter).matches()) { throw new IllegalStateException("Filter '" + filter + "' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER); diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java index 182b5fb9e4..fae8d81514 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java @@ -34,7 +34,6 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -354,7 +353,7 @@ private void makeFiltersModifiable() { if (!filtersCanBeModified) { this.filtersCanBeModified = true; final Set tempFilters = filters; - this.filters = new HashSet<>(); + this.filters = new LinkedHashSet<>(); if (tempFilters != null) { this.filters.addAll(tempFilters); } @@ -376,7 +375,7 @@ public VariantContextBuilder filters(final Set filters) { unfiltered(); } else { this.filtersCanBeModified = true; - filtersAsIs(new HashSet<>(filters)); + filtersAsIs(new LinkedHashSet<>(filters)); } return this; } @@ -398,11 +397,16 @@ private void filtersAsIs(final Set filters) { /** * {@link #filters} * - * @param filters Set of strings to set as the filters for this builder + * @param filters Strings to set as the filters for this builder * @return this builder */ public VariantContextBuilder filters(final String ... filters) { - filtersAsIs(new LinkedHashSet<>(Arrays.asList(filters))); + if(filters == null){ + this.unfiltered(); + } else { + this.filtersCanBeModified = true; + filtersAsIs(new LinkedHashSet<>(Arrays.asList(filters))); + } return this; } @@ -435,6 +439,7 @@ public VariantContextBuilder passFilters() { */ public VariantContextBuilder unfiltered() { this.filters = null; + this.filtersCanBeModified = false; return this; } diff --git a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java index 00d80c4791..ac857fe428 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java +++ b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java @@ -44,7 +44,9 @@ public class VCFEncoder { * allowing missing fields in the header. */ public VCFEncoder(final VCFHeader header, final boolean allowMissingFieldsInHeader, final boolean outputTrailingFormatFields) { - if (header == null) throw new NullPointerException("The VCF header must not be null."); + if (header == null) { + throw new NullPointerException("The VCF header must not be null."); + } this.header = header; this.allowMissingFieldsInHeader = allowMissingFieldsInHeader; this.outputTrailingFormatFields = outputTrailingFormatFields; @@ -128,8 +130,11 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro vcfOutput.append(VCFConstants.FIELD_SEPARATOR); // QUAL - if ( ! context.hasLog10PError()) vcfOutput.append(VCFConstants.MISSING_VALUE_v4); - else vcfOutput.append(formatQualValue(context.getPhredScaledQual())); + if ( !context.hasLog10PError()) { + vcfOutput.append(VCFConstants.MISSING_VALUE_v4); + } else { + vcfOutput.append(formatQualValue(context.getPhredScaledQual())); + } vcfOutput.append(VCFConstants.FIELD_SEPARATOR) // FILTER .append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR); @@ -137,11 +142,14 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro // INFO final Map infoFields = new TreeMap<>(); for (final Map.Entry field : context.getAttributes().entrySet()) { - if (!this.header.hasInfoLine(field.getKey())) + if (!this.header.hasInfoLine(field.getKey())) { fieldIsMissingFromHeaderError(context, field.getKey(), "INFO"); + } final String outputValue = formatVCFField(field.getValue()); - if (outputValue != null) infoFields.put(field.getKey(), outputValue); + if (outputValue != null) { + infoFields.put(field.getKey(), outputValue); + } } writeInfoString(infoFields, vcfOutput); @@ -152,11 +160,12 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro vcfOutput.append(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString()); } else { final List genotypeAttributeKeys = context.calcVCFGenotypeKeys(this.header); - if ( ! genotypeAttributeKeys.isEmpty()) { - for (final String format : genotypeAttributeKeys) - if ( ! this.header.hasFormatLine(format)) + if ( !genotypeAttributeKeys.isEmpty()) { + for (final String format : genotypeAttributeKeys) { + if (!this.header.hasFormatLine(format)) { fieldIsMissingFromHeaderError(context, format, "FORMAT"); - + } + } final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); vcfOutput.append(VCFConstants.FIELD_SEPARATOR); @@ -166,8 +175,6 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro appendGenotypeData(context, alleleStrings, genotypeAttributeKeys, vcfOutput); } } - - } VCFHeader getVCFHeader() { @@ -181,52 +188,60 @@ boolean getAllowMissingFieldsInHeader() { private String getFilterString(final VariantContext vc) { if (vc.isFiltered()) { for (final String filter : vc.getFilters()) { - if (!this.header.hasFilterLine(filter)) fieldIsMissingFromHeaderError(vc, filter, "FILTER"); + if (!this.header.hasFilterLine(filter)) { + fieldIsMissingFromHeaderError(vc, filter, "FILTER"); + } } return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())); - } else if (vc.filtersWereApplied()) return VCFConstants.PASSES_FILTERS_v4; - else return VCFConstants.UNFILTERED; + } else { + return vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED; + } } private static String formatQualValue(final double qual) { String s = String.format(QUAL_FORMAT_STRING, qual); - if (s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM)) + if (s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM)) { s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length()); + } return s; } private void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) { - if (!allowMissingFieldsInHeader) + if (!allowMissingFieldsInHeader) { throw new IllegalStateException("Key " + id + " found in VariantContext field " + field - + " at " + vc.getContig() + ":" + vc.getStart() - + " but this key isn't defined in the VCFHeader. We require all VCFs to have" - + " complete VCF headers by default."); + + " at " + vc.getContig() + ":" + vc.getStart() + + " but this key isn't defined in the VCFHeader. We require all VCFs to have" + + " complete VCF headers by default."); + } } @SuppressWarnings("rawtypes") String formatVCFField(final Object val) { final String result; - if ( val == null ) + if (val == null) { result = VCFConstants.MISSING_VALUE_v4; - else if ( val instanceof Double ) + } else if (val instanceof Double) { result = formatVCFDouble((Double) val); - else if ( val instanceof Boolean ) - result = (Boolean)val ? "" : null; // empty string for true, null for false - else if ( val instanceof List ) { - result = formatVCFField(((List)val).toArray()); - } else if ( val.getClass().isArray() ) { + } else if (val instanceof Boolean) { + result = (Boolean) val ? "" : null; // empty string for true, null for false + } else if (val instanceof List) { + result = formatVCFField(((List) val).toArray()); + } else if (val.getClass().isArray()) { final int length = Array.getLength(val); - if ( length == 0 ) + if (length == 0) { return formatVCFField(null); - final StringBuilder sb = new StringBuilder(formatVCFField(Array.get(val, 0))); - for ( int i = 1; i < length; i++) { + } + final StringBuilder sb = new StringBuilder( + formatVCFField(Array.get(val, 0))); + for (int i = 1; i < length; i++) { sb.append(','); sb.append(formatVCFField(Array.get(val, i))); } result = sb.toString(); - } else + } else { result = val.toString(); + } return result; } @@ -245,9 +260,9 @@ public static String formatVCFDouble(final double d) { final String format; if (d < 1) { if (d < 0.01) { - if (Math.abs(d) >= 1e-20) + if (Math.abs(d) >= 1e-20) { format = "%.3e"; - else { + } else { // return a zero format return "0.00"; } @@ -299,7 +314,9 @@ public void addGenotypeData(final VariantContext vc, final Map a vcfoutput.append(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); - if (g == null) g = GenotypeBuilder.createMissing(sample, ploidy); + if (g == null) { + g = GenotypeBuilder.createMissing(sample, ploidy); + } final List attrs = new ArrayList<>(genotypeFormatKeys.size()); for (final String field : genotypeFormatKeys) { @@ -323,11 +340,11 @@ public void addGenotypeData(final VariantContext vc, final Map a final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); if (accessor != null) { final int[] intValues = accessor.getValues(g); - if (intValues == null) + if (intValues == null) { outputValue = VCFConstants.MISSING_VALUE_v4; - else if (intValues.length == 1) // fast path + } else if (intValues.length == 1) { // fast path outputValue = Integer.toString(intValues[0]); - else { + } else { final StringBuilder sb = new StringBuilder(); sb.append(intValues[0]); for (int i = 1; i < intValues.length; i++) { @@ -342,16 +359,20 @@ else if (intValues.length == 1) // fast path } } - if (outputValue != null) + if (outputValue != null) { attrs.add(outputValue); + } } } // strip off trailing missing values if (!outputTrailingFormatFields) { for (int i = attrs.size() - 1; i >= 0; i--) { - if (isMissingValue(attrs.get(i))) attrs.remove(i); - else break; + if (isMissingValue(attrs.get(i))) { + attrs.remove(i); + } else { + break; + } } } @@ -375,8 +396,11 @@ private void writeInfoString(final Map infoFields, final Appenda boolean isFirst = true; for (final Map.Entry entry : infoFields.entrySet()) { - if (isFirst) isFirst = false; - else vcfoutput.append(VCFConstants.INFO_FIELD_SEPARATOR); + if (isFirst) { + isFirst = false; + } else { + vcfoutput.append(VCFConstants.INFO_FIELD_SEPARATOR); + } vcfoutput.append(entry.getKey()); diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java index df309d5166..c8871bd2be 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java @@ -314,4 +314,31 @@ public void testGenotypesUnaffectedByClonedVariants(final VCBuilderScheme builde Assert.assertNotEquals(vc2.getGenotypes(), vc1.getGenotypes(), "The two genotype lists should be different. only saw " + vc1.getGenotypes().toString()); } + + @Test + public void testCanResetFilters() { + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST"); + builder.unfiltered(); + builder.filter("mayIPlease?"); + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testCantCreateNullFilter(){ + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST"); + builder.filters((String)null); + builder.make(); + } + + @Test + public void testNullFilterArray(){ + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST"); + builder.filters((String[])null); + } + + @Test + public void testNullFilterSet(){ + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST"); + builder.filters((Set)null); + builder.make(); + } }