Skip to content

Commit

Permalink
adding fine control over jexl expression behavior with missing values (
Browse files Browse the repository at this point in the history
…#772)

* adding JexlMissingValueTreatment to control how values in a jexl expression that are missing in the evaluated context are treated
* adding overloads to VariantContextUtils.match to allow control of missing value behavior
* minor refactoring in JEXLMap

* making enum method package private

* responding to comments

* updated tests

* moving JexlMissingValueTreatment to top level

* fixing typo
  • Loading branch information
lbergelson authored Dec 9, 2016
1 parent e69aff0 commit 5a2d7a7
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 48 deletions.
115 changes: 76 additions & 39 deletions src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,95 @@
import org.apache.commons.jexl2.JexlException;
import org.apache.commons.jexl2.MapContext;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.*;

/**
* This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values.
* It lazily initializes each value as requested to save as much processing time as possible.
*/

class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
/**
* If a JEXL expression contains values that are not available in the given context, the default behavior is to
* treat that expression as a miss match.
*/
public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH;

// our variant context and/or Genotype
private final VariantContext vc;
private final Genotype g;

// our context
private JexlContext jContext = null;
private final JexlMissingValueTreatment howToTreatMissingValues;

/**
* our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL}
* for previously un-cached {@link JexlVCMatchExp}.
*/
private Map<JexlVCMatchExp,Boolean> jexl;
private final Map<JexlVCMatchExp,Boolean> jexl;

public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
initialize(jexlCollection);
// our context
private JexlContext jContext = null;

/**
* Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
* @param jexlCollection collection of expressions to be evaluated
* @param vc VariantContext to evaluate expressions against
* @param g genotype to evaluate expressions against, may be null
* @param howToTreatMissingValues how missing values in vc and g should be treated
*/
public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) {
this.jexl = initializeMap(jexlCollection);
this.vc = vc;
this.g = g;
this.howToTreatMissingValues = howToTreatMissingValues;
}


/**
* Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
* @param jexlCollection collection of expressions to be evaluated
* @param vc VariantContext to evaluate expressions against
* @param g genotype to evaluate expressions against, may be null
*
* missing values are treated as false
*/
public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT);
}

/**
* Construct a new JEXLMap which can evaluate expressions against a specific VariantContext
* @param jexlCollection collection of expressions to be evaluated
* @param vc VariantContext to evaluate expressions against
*
* missing values are treated as non matches (false)
*/
public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc) {
this(jexlCollection, vc, null);
this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT);
}

/**
* Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression.
*
* @throws IllegalArgumentException when {@code o} is {@code null} or
* @throws IllegalArgumentException when {@code key} is {@code null} or
* when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions.
*/
public Boolean get(Object o) {
if (o == null) {
public Boolean get(Object key) {
if (key == null) {
throw new IllegalArgumentException("Query key is null");
}

// if we've already determined the value, return it
if (jexl.containsKey(o) && jexl.get(o) != null) {
return jexl.get(o);
final Boolean value = jexl.get(key);
if (jexl.containsKey(key) && value != null) {
return value;
}

// otherwise cast the expression and try again
final JexlVCMatchExp e = (JexlVCMatchExp) o;
evaluateExpression(e);
return jexl.get(e);
final JexlVCMatchExp exp = (JexlVCMatchExp) key;
final boolean matches = evaluateExpression(exp);
jexl.put(exp, matches);
return matches;
}

/**
Expand Down Expand Up @@ -87,9 +121,7 @@ public Set<JexlVCMatchExp> keySet() {
*/
public Collection<Boolean> values() {
for (final JexlVCMatchExp exp : jexl.keySet()) {
if (jexl.get(exp) == null) {
evaluateExpression(exp);
}
jexl.computeIfAbsent(exp, k -> evaluateExpression(exp));
}
return jexl.values();
}
Expand All @@ -112,55 +144,60 @@ public void putAll(Map<? extends JexlVCMatchExp, ? extends Boolean> map) {
}

/**
* Initializes all keys with null values indicating that they have not yet been evaluated.
* Initializes a map and give all keys with null values indicating that they have not yet been evaluated.
* The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}.
*
* @return an initialized map of jexlExpression -> null
*/
private void initialize(Collection<JexlVCMatchExp> jexlCollection) {
jexl = new HashMap<>();
private static Map<JexlVCMatchExp,Boolean> initializeMap(final Collection<JexlVCMatchExp> jexlCollection) {
final Map<JexlVCMatchExp,Boolean> jexlMap = new HashMap<>(jexlCollection.size());
for (final JexlVCMatchExp exp: jexlCollection) {
jexl.put(exp, null);
jexlMap.put(exp, null);
}

return jexlMap;
}

/**
* Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}).
*
* @param exp the {@link JexlVCMatchExp} to evaluate
*
* @return true if the expression matched the context
* @throws IllegalArgumentException when {@code exp} is {@code null}, or
* when the Jexl expression in {@code exp} fails to evaluate the JexlContext
* constructed with the input VC or genotype.
*/
private void evaluateExpression(final JexlVCMatchExp exp) {
private boolean evaluateExpression(final JexlVCMatchExp exp) {
// if the context is null, we need to create it to evaluate the JEXL expression
if (this.jContext == null) {
createContext();
jContext = createContext();
}

try {
//TODO figure out of this can ever evaluate to null or if that isn't actually possible
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
// treat errors as no match
jexl.put(exp, value == null ? false : value);
return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value;
} catch (final JexlException.Variable e) {
// if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
jexl.put(exp,false);
//this occurs when the jexl expression contained a literal that didn't match anything in the given context
return howToTreatMissingValues.getMissingValueOrExplode();
} catch (final JexlException e) {
// todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values()
throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e);
}
}

/**
* Create the internal JexlContext, only when required.
* Create a new JexlContext
* This code is where new JEXL context variables should get added.
* @return a new jexl context initialized appropriately
*/
private void createContext() {
private JexlContext createContext() {
if (vc == null) {
jContext = new MapContext(Collections.emptyMap());
return new MapContext(Collections.emptyMap());
} else if (g == null) {
jContext = new VariantJEXLContext(vc);
return new VariantJEXLContext(vc);
} else {
jContext = new GenotypeJEXLContext(vc, g);
return new GenotypeJEXLContext(vc, g);
}
}

Expand All @@ -181,7 +218,7 @@ public Boolean remove(Object o) {


public Set<Entry<JexlVCMatchExp, Boolean>> entrySet() {
throw new UnsupportedOperationException("clear() not supported on a JEXLMap");
throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap");
}

// nope
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package htsjdk.variant.variantcontext;

import java.util.function.Supplier;

/**
* How to treat values that appear in a jexl expression but are missing in the context it's applied to
*/
public enum JexlMissingValueTreatment {
/**
* Treat expressions with a missing value as a mismatch and evaluate to false
*/
TREAT_AS_MISMATCH(() -> false),

/**
* Treat expressions with a missing value as a match and evaluate to true
*/
TREAT_AS_MATCH(() -> true),

/**
* Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException}
*/
THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");});

private final Supplier<Boolean> resultSupplier;

JexlMissingValueTreatment(final Supplier<Boolean> resultSupplier){
this.resultSupplier = resultSupplier;
}

/**
* get the missing value that corresponds to this option or throw an exception
* @return the value that should be used in case of a missing value
* @throws IllegalArgumentException if this should be treated as an error
*/
boolean getMissingValueOrExplode(){
return resultSupplier.get();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
* Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param exps expressions
* @return true if there is a match
Expand All @@ -324,7 +325,20 @@ public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<J
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
return match(vc,g, Collections.singletonList(exp)).get(exp);
return match(vc, g, Collections.singletonList(exp), JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT).get(exp);
}

/**
* Returns true if {@code exp} match {@code vc}, {@code g}.
* See {@link #match(VariantContext, Genotype, Collection)} for full docs.
* @param vc variant context
* @param g genotype
* @param exp expression
* @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp, JexlMissingValueTreatment howToTreatMissingValues) {
return match(vc, g, Collections.singletonList(exp), howToTreatMissingValues).get(exp);
}

/**
Expand All @@ -333,13 +347,30 @@ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
* Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param g genotype
* @param exps expressions
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(exps,vc,g);
return match(vc, g, exps, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT);
}

/**
* Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g},
* and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't).
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
* @param vc variant context
* @param g genotype
* @param exps expressions
* @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps, JexlMissingValueTreatment howToTreatMissingValues) {
return new JEXLMap(exps, vc, g, howToTreatMissingValues);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,10 @@

import htsjdk.variant.vcf.VCFConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.*;


/**
Expand All @@ -55,6 +51,10 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
private static final VariantContextUtils.JexlVCMatchExp exp
= new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0"));

private static final JexlVCMatchExp missingValueExpression = new VariantContextUtils.JexlVCMatchExp(
"Zis10", VariantContextUtils.engine.get().createExpression("Z==10"));


// SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC.
private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10);
private static final Allele Aref = Allele.create("A", true);
Expand Down Expand Up @@ -87,7 +87,45 @@ public void testGetValue() {
// eval our known expression
Assert.assertTrue(!jexlMap.get(exp));
}


@Test(dataProvider = "getMissingValueTestData")
public void testMissingBehaviorThroughMatch(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
if(expectedException == null) {
Assert.assertEquals(VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment), expected);
} else {
Assert.assertThrows(expectedException, () -> VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment));
}
}

@Test(dataProvider = "getMissingValueTestData")
public void testMissingBehavior(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
final JEXLMap jexlMap = new JEXLMap(Collections.singletonList(missingValueExpression), vc, null, missingValueTreatment);
if(expectedException == null) {
Assert.assertEquals((boolean) jexlMap.get(missingValueExpression), expected);
} else {
Assert.assertThrows(expectedException, () -> jexlMap.get(missingValueExpression));
}
}

@DataProvider
public Object[][] getMissingValueTestData(){
final List<Allele> alleles = Arrays.asList(Aref, Talt);
VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles);
VariantContext noZ = vcb.make();
VariantContext hasZ = vcb.attribute("Z", 0).make();

return new Object[][]{
{noZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null},
{hasZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, //the value isn't missing but the expression is false
{noZ, JexlMissingValueTreatment.TREAT_AS_MATCH, true, null},
{hasZ, JexlMissingValueTreatment.TREAT_AS_MATCH, false, null}, //the value isn't missing but the expression is false
{noZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
{hasZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
{noZ, JexlMissingValueTreatment.THROW, false, IllegalArgumentException.class},
{hasZ, JexlMissingValueTreatment.THROW, false, null}
};
}

// Testing the new 'FT' and 'isPassFT' expressions in the JEXL map
@Test
public void testJEXLGenotypeFilters() {
Expand Down

0 comments on commit 5a2d7a7

Please sign in to comment.