From 5a2d7a75b824aab483d201055ea70296a6b9ac10 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Fri, 9 Dec 2016 16:21:11 -0500 Subject: [PATCH] adding fine control over jexl expression behavior with missing values (#772) * adding JexlMissingValueTreatment to control how values in a jexl expression that are missing in the evaluated context are treated * adding overloads to VariantContextUtils.match to allow control of missing value behavior * minor refactoring in JEXLMap * making enum method package private * responding to comments * updated tests * moving JexlMissingValueTreatment to top level * fixing typo --- .../variant/variantcontext/JEXLMap.java | 115 ++++++++++++------ .../JexlMissingValueTreatment.java | 39 ++++++ .../variantcontext/VariantContextUtils.java | 35 +++++- .../VariantJEXLContextUnitTest.java | 52 ++++++-- 4 files changed, 193 insertions(+), 48 deletions(-) create mode 100644 src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java diff --git a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java index b8e13c75b5..33ec595142 100644 --- a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java +++ b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java @@ -5,11 +5,7 @@ import org.apache.commons.jexl2.JexlException; import org.apache.commons.jexl2.MapContext; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values. @@ -17,49 +13,87 @@ */ class JEXLMap implements Map { + /** + * If a JEXL expression contains values that are not available in the given context, the default behavior is to + * treat that expression as a miss match. + */ + public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH; + // our variant context and/or Genotype private final VariantContext vc; private final Genotype g; - // our context - private JexlContext jContext = null; + private final JexlMissingValueTreatment howToTreatMissingValues; /** * our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL} * for previously un-cached {@link JexlVCMatchExp}. */ - private Map jexl; + private final Map jexl; - public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g) { - initialize(jexlCollection); + // our context + private JexlContext jContext = null; + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * @param g genotype to evaluate expressions against, may be null + * @param howToTreatMissingValues how missing values in vc and g should be treated + */ + public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) { + this.jexl = initializeMap(jexlCollection); this.vc = vc; this.g = g; + this.howToTreatMissingValues = howToTreatMissingValues; } + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * @param g genotype to evaluate expressions against, may be null + * + * missing values are treated as false + */ + public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g) { + this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT); + } + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific VariantContext + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * + * missing values are treated as non matches (false) + */ public JEXLMap(final Collection jexlCollection, final VariantContext vc) { - this(jexlCollection, vc, null); + this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT); } /** * Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression. * - * @throws IllegalArgumentException when {@code o} is {@code null} or + * @throws IllegalArgumentException when {@code key} is {@code null} or * when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions. */ - public Boolean get(Object o) { - if (o == null) { + public Boolean get(Object key) { + if (key == null) { throw new IllegalArgumentException("Query key is null"); } // if we've already determined the value, return it - if (jexl.containsKey(o) && jexl.get(o) != null) { - return jexl.get(o); + final Boolean value = jexl.get(key); + if (jexl.containsKey(key) && value != null) { + return value; } // otherwise cast the expression and try again - final JexlVCMatchExp e = (JexlVCMatchExp) o; - evaluateExpression(e); - return jexl.get(e); + final JexlVCMatchExp exp = (JexlVCMatchExp) key; + final boolean matches = evaluateExpression(exp); + jexl.put(exp, matches); + return matches; } /** @@ -87,9 +121,7 @@ public Set keySet() { */ public Collection values() { for (final JexlVCMatchExp exp : jexl.keySet()) { - if (jexl.get(exp) == null) { - evaluateExpression(exp); - } + jexl.computeIfAbsent(exp, k -> evaluateExpression(exp)); } return jexl.values(); } @@ -112,38 +144,42 @@ public void putAll(Map map) { } /** - * Initializes all keys with null values indicating that they have not yet been evaluated. + * Initializes a map and give all keys with null values indicating that they have not yet been evaluated. * The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}. + * + * @return an initialized map of jexlExpression -> null */ - private void initialize(Collection jexlCollection) { - jexl = new HashMap<>(); + private static Map initializeMap(final Collection jexlCollection) { + final Map jexlMap = new HashMap<>(jexlCollection.size()); for (final JexlVCMatchExp exp: jexlCollection) { - jexl.put(exp, null); + jexlMap.put(exp, null); } + + return jexlMap; } /** * Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}). * * @param exp the {@link JexlVCMatchExp} to evaluate - * + * @return true if the expression matched the context * @throws IllegalArgumentException when {@code exp} is {@code null}, or * when the Jexl expression in {@code exp} fails to evaluate the JexlContext * constructed with the input VC or genotype. */ - private void evaluateExpression(final JexlVCMatchExp exp) { + private boolean evaluateExpression(final JexlVCMatchExp exp) { // if the context is null, we need to create it to evaluate the JEXL expression if (this.jContext == null) { - createContext(); + jContext = createContext(); } try { + //TODO figure out of this can ever evaluate to null or if that isn't actually possible final Boolean value = (Boolean) exp.exp.evaluate(jContext); - // treat errors as no match - jexl.put(exp, value == null ? false : value); + return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value; } catch (final JexlException.Variable e) { - // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE - jexl.put(exp,false); + //this occurs when the jexl expression contained a literal that didn't match anything in the given context + return howToTreatMissingValues.getMissingValueOrExplode(); } catch (final JexlException e) { // todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values() throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e); @@ -151,16 +187,17 @@ private void evaluateExpression(final JexlVCMatchExp exp) { } /** - * Create the internal JexlContext, only when required. + * Create a new JexlContext * This code is where new JEXL context variables should get added. + * @return a new jexl context initialized appropriately */ - private void createContext() { + private JexlContext createContext() { if (vc == null) { - jContext = new MapContext(Collections.emptyMap()); + return new MapContext(Collections.emptyMap()); } else if (g == null) { - jContext = new VariantJEXLContext(vc); + return new VariantJEXLContext(vc); } else { - jContext = new GenotypeJEXLContext(vc, g); + return new GenotypeJEXLContext(vc, g); } } @@ -181,7 +218,7 @@ public Boolean remove(Object o) { public Set> entrySet() { - throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); + throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap"); } // nope diff --git a/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java new file mode 100644 index 0000000000..204cc3f2c2 --- /dev/null +++ b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java @@ -0,0 +1,39 @@ +package htsjdk.variant.variantcontext; + +import java.util.function.Supplier; + +/** + * How to treat values that appear in a jexl expression but are missing in the context it's applied to + */ +public enum JexlMissingValueTreatment { + /** + * Treat expressions with a missing value as a mismatch and evaluate to false + */ + TREAT_AS_MISMATCH(() -> false), + + /** + * Treat expressions with a missing value as a match and evaluate to true + */ + TREAT_AS_MATCH(() -> true), + + /** + * Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException} + */ + THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");}); + + private final Supplier resultSupplier; + + JexlMissingValueTreatment(final Supplier resultSupplier){ + this.resultSupplier = resultSupplier; + } + + /** + * get the missing value that corresponds to this option or throw an exception + * @return the value that should be used in case of a missing value + * @throws IllegalArgumentException if this should be treated as an error + */ + boolean getMissingValueOrExplode(){ + return resultSupplier.get(); + } + +} diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java index 96eaa64e34..face55bb9c 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java @@ -307,6 +307,7 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) { * This the best way to apply JEXL expressions to {@link VariantContext} records. * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions. * + * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching * @param vc variant context * @param exps expressions * @return true if there is a match @@ -324,7 +325,20 @@ public static Map match(VariantContext vc, Collection match(VariantContext vc, Genotype g, Collection exps) { - return new JEXLMap(exps,vc,g); + return match(vc, g, exps, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT); + } + + /** + * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g}, + * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't). + * This the best way to apply JEXL expressions to {@link VariantContext} records. + * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions. + * + * @param vc variant context + * @param g genotype + * @param exps expressions + * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context + * @return true if there is a match + */ + public static Map match(VariantContext vc, Genotype g, Collection exps, JexlMissingValueTreatment howToTreatMissingValues) { + return new JEXLMap(exps, vc, g, howToTreatMissingValues); } /** diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java index bebd39384c..78bf565a71 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java @@ -31,14 +31,10 @@ import htsjdk.variant.vcf.VCFConstants; import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -55,6 +51,10 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest { private static final VariantContextUtils.JexlVCMatchExp exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0")); + private static final JexlVCMatchExp missingValueExpression = new VariantContextUtils.JexlVCMatchExp( + "Zis10", VariantContextUtils.engine.get().createExpression("Z==10")); + + // SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC. private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10); private static final Allele Aref = Allele.create("A", true); @@ -87,7 +87,45 @@ public void testGetValue() { // eval our known expression Assert.assertTrue(!jexlMap.get(exp)); } - + + @Test(dataProvider = "getMissingValueTestData") + public void testMissingBehaviorThroughMatch(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class expectedException){ + if(expectedException == null) { + Assert.assertEquals(VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment), expected); + } else { + Assert.assertThrows(expectedException, () -> VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment)); + } + } + + @Test(dataProvider = "getMissingValueTestData") + public void testMissingBehavior(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class expectedException){ + final JEXLMap jexlMap = new JEXLMap(Collections.singletonList(missingValueExpression), vc, null, missingValueTreatment); + if(expectedException == null) { + Assert.assertEquals((boolean) jexlMap.get(missingValueExpression), expected); + } else { + Assert.assertThrows(expectedException, () -> jexlMap.get(missingValueExpression)); + } + } + + @DataProvider + public Object[][] getMissingValueTestData(){ + final List alleles = Arrays.asList(Aref, Talt); + VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles); + VariantContext noZ = vcb.make(); + VariantContext hasZ = vcb.attribute("Z", 0).make(); + + return new Object[][]{ + {noZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, + {hasZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, //the value isn't missing but the expression is false + {noZ, JexlMissingValueTreatment.TREAT_AS_MATCH, true, null}, + {hasZ, JexlMissingValueTreatment.TREAT_AS_MATCH, false, null}, //the value isn't missing but the expression is false + {noZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null}, + {hasZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null}, + {noZ, JexlMissingValueTreatment.THROW, false, IllegalArgumentException.class}, + {hasZ, JexlMissingValueTreatment.THROW, false, null} + }; + } + // Testing the new 'FT' and 'isPassFT' expressions in the JEXL map @Test public void testJEXLGenotypeFilters() {