Skip to content

Commit

Permalink
Added IEQUAL operator to support case insensitive searches (#11501)
Browse files Browse the repository at this point in the history
Co-authored-by: david-leifker <[email protected]>
  • Loading branch information
Nbagga14 and david-leifker authored Oct 4, 2024
1 parent 7c6d31c commit cc63f53
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 10 deletions.
6 changes: 6 additions & 0 deletions datahub-graphql-core/src/main/resources/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,11 @@ enum FilterOperator {
"""
EQUAL

"""
Represent the relation: field = value (case-insensitive), e.g. platform = HDFS
"""
IEQUAL

"""
* Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"]
"""
Expand Down Expand Up @@ -575,6 +580,7 @@ enum FilterOperator {
Represent the relation: URN field matches any nested child or parent in addition to the given URN
"""
RELATED_INCL

}

"""
Expand Down
1 change: 1 addition & 0 deletions docs/api/restli/restli-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,7 @@ where valid conditions include
- CONTAIN
- END_WITH
- EQUAL
- IEQUAL (Supports case insensitive equals)
- GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO
- LESS_THAN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand(
return orQueryBuilder;
}

private static boolean isCaseInsensitiveSearchEnabled(Condition condition) {
return condition == Condition.IEQUAL;
}

@Nonnull
private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
@Nonnull Criterion criterion,
Expand All @@ -564,6 +568,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
final AspectRetriever aspectRetriever = opContext.getAspectRetriever();
final String fieldName = toParentField(criterion.getField(), aspectRetriever);

boolean enableCaseInsensitiveSearch;

if (condition == Condition.IS_NULL) {
return QueryBuilders.boolQuery()
.mustNot(QueryBuilders.existsQuery(fieldName))
Expand All @@ -573,9 +579,15 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
.must(QueryBuilders.existsQuery(fieldName))
.queryName(queryName != null ? queryName : fieldName);
} else if (criterion.hasValues()) {
if (condition == Condition.EQUAL) {
if (condition == Condition.EQUAL || condition == Condition.IEQUAL) {
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch)
.queryName(queryName != null ? queryName : fieldName);
} else if (RANGE_QUERY_CONDITIONS.contains(condition)) {
return buildRangeQueryFromCriterion(
Expand All @@ -596,7 +608,7 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
return buildEndsWithConditionFromCriterion(
fieldName, criterion, queryName, isTimeseries, aspectRetriever);
} else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) {

enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return QueryFilterRewriterContext.builder()
.queryFilterRewriteChain(queryFilterRewriteChain)
.condition(condition)
Expand All @@ -605,7 +617,12 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
.rewrite(
opContext,
buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever))
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch))
.queryName(queryName != null ? queryName : fieldName);
}
}
Expand Down Expand Up @@ -670,9 +687,15 @@ private static QueryBuilder buildEqualsConditionFromCriterion(
@Nonnull final Criterion criterion,
final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) {
@Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
return buildEqualsConditionFromCriterionWithValues(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever);
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch);
}

/**
Expand All @@ -684,7 +707,8 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues(
@Nonnull final Criterion criterion,
final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) {
@Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
if (fieldTypes.size() > 1) {
log.warn(
Expand All @@ -704,6 +728,21 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues(
criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList());
return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName);
}

if (enableCaseInsensitiveSearch) {
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
criterion
.getValues()
.forEach(
value ->
boolQuery.should(
QueryBuilders.termQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
value.trim())
.caseInsensitive(true)));
return boolQuery;
}

return QueryBuilders.termsQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
criterion.getValues())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() {
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";

Assert.assertEquals(result.toString(), expected);

final Criterion multiValueCriterion =
Expand Down Expand Up @@ -150,6 +151,85 @@ public void testGetQueryBuilderFromCriterionEqualsValues() {
Assert.assertEquals(result.toString(), expected);
}

@Test
public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches

final Criterion singleValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1");

QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);

String expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";

Assert.assertEquals(result.toString(), expected);

final Criterion multiValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2");

result =
ESUtils.getQueryBuilderFromCriterion(
multiValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);

expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " },\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value2\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";

Assert.assertEquals(result.toString(), expected);
}

@Test
public void testGetQueryBuilderFromCriterionContain() {
final Criterion singleValueCriterion =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ enum Condition {
*/
EQUAL

/**
* Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs
*/
IEQUAL

/**
* Represent the relation: field is null, e.g. platform is null
*/
Expand Down
1 change: 1 addition & 0 deletions metadata-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,7 @@ where valid conditions include
- CONTAIN
- END_WITH
- EQUAL
- IEQUAL (support case insensitive values)
- GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO
- LESS_THAN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@
"type" : "enum",
"name" : "Condition",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,14 @@
"type" : "enum",
"name" : "Condition",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6057,13 +6057,14 @@
"name" : "Condition",
"namespace" : "com.linkedin.metadata.query.filter",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
Expand Down

0 comments on commit cc63f53

Please sign in to comment.