diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java index e4c9dd55a3b4a..fac08c7e20646 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java @@ -3,8 +3,11 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; import com.linkedin.metadata.models.annotation.EntityAnnotation; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** A specification of a DataHub Entity */ @@ -36,6 +39,18 @@ default List getSearchableFieldSpecs() { .collect(Collectors.toList()); } + default Map> getSearchableFieldSpecMap() { + return getSearchableFieldSpecs().stream() + .collect( + Collectors.toMap( + searchableFieldSpec -> searchableFieldSpec.getSearchableAnnotation().getFieldName(), + searchableFieldSpec -> new HashSet<>(Collections.singleton(searchableFieldSpec)), + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); + } + default List getSearchScoreFieldSpecs() { return getAspectSpecs().stream() .map(AspectSpec::getSearchScoreFieldSpecs) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java index 580134f566871..54f2206798da0 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java @@ -248,9 +248,9 @@ public AspectSpec buildAspectSpec( // Extract SearchScore Field Specs final SearchScoreFieldSpecExtractor searchScoreFieldSpecExtractor = new SearchScoreFieldSpecExtractor(); - final DataSchemaRichContextTraverser searcScoreFieldSpecTraverser = + final DataSchemaRichContextTraverser searchScoreFieldSpecTraverser = new DataSchemaRichContextTraverser(searchScoreFieldSpecExtractor); - searcScoreFieldSpecTraverser.traverse(processedSearchScoreResult.getResultSchema()); + searchScoreFieldSpecTraverser.traverse(processedSearchScoreResult.getResultSchema()); final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedRelationshipResult = SchemaAnnotationProcessor.process( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index 41043995a3b77..9aed29ab8595e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -91,7 +91,7 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) .filter(Files::isRegularFile) .filter(f -> f.endsWith("entity-registry.yml") || f.endsWith("entity-registry.yaml")) .collect(Collectors.toList()); - if (yamlFiles.size() == 0) { + if (yamlFiles.isEmpty()) { throw new EntityRegistryException( String.format( "Did not find an entity registry (entity_registry.yaml/yml) under %s", diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 650a1cd41066e..0dcd0420d4df8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -58,7 +58,7 @@ private void validateEntitySpec(EntitySpec entitySpec, final ValidationResult va validationResult.setValid(false); validationResult .getValidationFailures() - .add(String.format("Key aspect is missing in entity {}", entitySpec.getName())); + .add(String.format("Key aspect is missing in entity %s", entitySpec.getName())); } } @@ -86,7 +86,7 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) } // Merge Event Specs - if (patchEntityRegistry.getEventSpecs().size() > 0) { + if (!patchEntityRegistry.getEventSpecs().isEmpty()) { eventNameToSpec.putAll(patchEntityRegistry.getEventSpecs()); } // TODO: Validate that the entity registries don't have conflicts among each other @@ -116,19 +116,18 @@ private void checkMergeable( if (existingEntitySpec != null) { existingEntitySpec .getAspectSpecMap() - .entrySet() .forEach( - aspectSpecEntry -> { - if (newEntitySpec.hasAspect(aspectSpecEntry.getKey())) { + (key, value) -> { + if (newEntitySpec.hasAspect(key)) { CompatibilityResult result = CompatibilityChecker.checkCompatibility( - aspectSpecEntry.getValue().getPegasusSchema(), - newEntitySpec.getAspectSpec(aspectSpecEntry.getKey()).getPegasusSchema(), + value.getPegasusSchema(), + newEntitySpec.getAspectSpec(key).getPegasusSchema(), new CompatibilityOptions()); if (result.isError()) { log.error( "{} schema is not compatible with previous schema due to {}", - aspectSpecEntry.getKey(), + key, result.getMessages()); // we want to continue processing all aspects to collect all failures validationResult.setValid(false); @@ -137,11 +136,11 @@ private void checkMergeable( .add( String.format( "%s schema is not compatible with previous schema due to %s", - aspectSpecEntry.getKey(), result.getMessages())); + key, result.getMessages())); } else { log.info( "{} schema is compatible with previous schema due to {}", - aspectSpecEntry.getKey(), + key, result.getMessages()); } } @@ -222,7 +221,7 @@ public PluginFactory getPluginFactory() { @Setter @Getter - private class ValidationResult { + private static class ValidationResult { boolean valid = true; List validationFailures = new ArrayList<>(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java index b82b905c50004..b4fc4193e7263 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java @@ -71,19 +71,17 @@ public class PatchEntityRegistry implements EntityRegistry { @Override public String toString() { StringBuilder sb = new StringBuilder("PatchEntityRegistry[" + "identifier=" + identifier + ';'); - entityNameToSpec.entrySet().stream() - .forEach( - entry -> - sb.append("[entityName=") - .append(entry.getKey()) - .append(";aspects=[") - .append( - entry.getValue().getAspectSpecs().stream() - .map(spec -> spec.getName()) - .collect(Collectors.joining(","))) - .append("]]")); - eventNameToSpec.entrySet().stream() - .forEach(entry -> sb.append("[eventName=").append(entry.getKey()).append("]")); + entityNameToSpec.forEach( + (key1, value1) -> + sb.append("[entityName=") + .append(key1) + .append(";aspects=[") + .append( + value1.getAspectSpecs().stream() + .map(AspectSpec::getName) + .collect(Collectors.joining(","))) + .append("]]")); + eventNameToSpec.forEach((key, value) -> sb.append("[eventName=").append(key).append("]")); return sb.toString(); } @@ -119,7 +117,7 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) .filter(Files::isRegularFile) .filter(f -> f.endsWith("entity-registry.yml") || f.endsWith("entity-registry.yaml")) .collect(Collectors.toList()); - if (yamlFiles.size() == 0) { + if (yamlFiles.isEmpty()) { throw new EntityRegistryException( String.format( "Did not find an entity registry (entity-registry.yaml/yml) under %s", @@ -175,7 +173,7 @@ private PatchEntityRegistry( entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); } catch (IOException e) { - e.printStackTrace(); + log.error("Unable to read Patch configuration.", e); throw new IllegalArgumentException( String.format( "Error while reading config file in path %s: %s", configFileStream, e.getMessage())); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index 8fefa2fe00ae8..22aeddb6ac65f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -120,7 +120,7 @@ public AspectTemplateEngine getAspectTemplateEngine() { } @Override - public EventSpec getEventSpec(final String ignored) { + public EventSpec getEventSpec(@Nonnull final String ignored) { return null; } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java index d9cf8fd2603a8..8b043569dd16a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java @@ -189,7 +189,7 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { testEntityInfo.getPegasusSchema().getFullName()); // Assert on Searchable Fields - assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 11); + assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 12); assertEquals( "customProperties", testEntityInfo @@ -340,6 +340,20 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { .get(new PathSpec("doubleField").toString()) .getSearchableAnnotation() .getFieldType()); + assertEquals( + "removed", + testEntityInfo + .getSearchableFieldSpecMap() + .get(new PathSpec("removed").toString()) + .getSearchableAnnotation() + .getFieldName()); + assertEquals( + SearchableAnnotation.FieldType.BOOLEAN, + testEntityInfo + .getSearchableFieldSpecMap() + .get(new PathSpec("removed").toString()) + .getSearchableAnnotation() + .getFieldType()); // Assert on Relationship Fields assertEquals(4, testEntityInfo.getRelationshipFieldSpecs().size()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 3c71a2dfd9180..d610ea4b4e028 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; @@ -33,6 +34,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -554,7 +556,8 @@ private QueryBuilder buildQueryStringV2( queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal)); - queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter)); + queryBuilder.filter( + SearchRequestHandler.getFilterQuery(filter, entitySpec.getSearchableFieldSpecMap())); return queryBuilder; } @@ -580,7 +583,18 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal)); - queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter)); + Map> searchableFields = + entitySpecs.stream() + .flatMap(entitySpec -> entitySpec.getSearchableFieldSpecMap().entrySet().stream()) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); + queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter, searchableFields)); return queryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 0eb44edfb11de..1ec90ed6f61e2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -78,7 +78,8 @@ public long docCount(@Nonnull String entityName) { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); CountRequest countRequest = new CountRequest(indexConvention.getIndexName(entitySpec)) - .query(SearchRequestHandler.getFilterQuery(null)); + .query( + SearchRequestHandler.getFilterQuery(null, entitySpec.getSearchableFieldSpecMap())); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { return client.count(countRequest, RequestOptions.DEFAULT).getCount(); } catch (IOException e) { @@ -315,9 +316,17 @@ public Map aggregateByValue( @Nonnull String field, @Nullable Filter requestParams, int limit) { + List entitySpecs; + if (entityNames == null || entityNames.isEmpty()) { + entitySpecs = new ArrayList<>(entityRegistry.getEntitySpecs().values()); + } else { + entitySpecs = + entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + } final SearchRequest searchRequest = - SearchRequestHandler.getAggregationRequest( - field, transformFilterForEntities(requestParams, indexConvention), limit); + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + .getAggregationRequest( + field, transformFilterForEntities(requestParams, indexConvention), limit); if (entityNames == null) { String indexName = indexConvention.getAllEntityIndicesPattern(); searchRequest.indices(indexName); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index cdcdae2f3d311..333d9602734d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.utils.ESUtils; import java.net.URISyntaxException; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -40,19 +41,33 @@ public class AutocompleteRequestHandler { private final List _defaultAutocompleteFields; + private final Map> searchableFields; private static final Map AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); public AutocompleteRequestHandler(@Nonnull EntitySpec entitySpec) { + List fieldSpecs = entitySpec.getSearchableFieldSpecs(); _defaultAutocompleteFields = Stream.concat( - entitySpec.getSearchableFieldSpecs().stream() + fieldSpecs.stream() .map(SearchableFieldSpec::getSearchableAnnotation) .filter(SearchableAnnotation::isEnableAutocomplete) .map(SearchableAnnotation::getFieldName), Stream.of("urn")) .collect(Collectors.toList()); + searchableFields = + fieldSpecs.stream() + .collect( + Collectors.toMap( + searchableFieldSpec -> + searchableFieldSpec.getSearchableAnnotation().getFieldName(), + searchableFieldSpec -> + new HashSet<>(Collections.singleton(searchableFieldSpec)), + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); } public static AutocompleteRequestHandler getBuilder(@Nonnull EntitySpec entitySpec) { @@ -66,7 +81,7 @@ public SearchRequest getSearchRequest( SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(limit); searchSourceBuilder.query(getQuery(input, field)); - searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter, false)); + searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter, false, searchableFields)); searchSourceBuilder.highlighter(getHighlights(field)); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index c5a5ade216bf7..e6ee909c80dae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -97,6 +97,7 @@ public class SearchRequestHandler { private final SearchConfiguration _configs; private final SearchQueryBuilder _searchQueryBuilder; private final AggregationQueryBuilder _aggregationQueryBuilder; + private final Map> searchableFields; private SearchRequestHandler( @Nonnull EntitySpec entitySpec, @@ -121,6 +122,17 @@ private SearchRequestHandler( _searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); _aggregationQueryBuilder = new AggregationQueryBuilder(configs, annotations); _configs = configs; + searchableFields = + _entitySpecs.stream() + .flatMap(entitySpec -> entitySpec.getSearchableFieldSpecMap().entrySet().stream()) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); } public static SearchRequestHandler getBuilder( @@ -169,8 +181,13 @@ private BinaryOperator mapMerger() { }; } - public static BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { - BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter, false); + public BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { + return getFilterQuery(filter, searchableFields); + } + + public static BoolQueryBuilder getFilterQuery( + @Nullable Filter filter, Map> searchableFields) { + BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter, false, searchableFields); return filterSoftDeletedByDefault(filter, filterQuery); } @@ -354,7 +371,7 @@ public SearchRequest getFilterRequest( * @return {@link SearchRequest} that contains the aggregation query */ @Nonnull - public static SearchRequest getAggregationRequest( + public SearchRequest getAggregationRequest( @Nonnull String field, @Nullable Filter filter, int limit) { SearchRequest searchRequest = new SearchRequest(); BoolQueryBuilder filterQuery = getFilterQuery(filter); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index aa854149de43a..77a67f100895c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -7,7 +7,6 @@ import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -18,11 +17,13 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -32,6 +33,7 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.FieldSortBuilder; @@ -76,6 +78,13 @@ public class ESUtils { SearchableAnnotation.FieldType.BROWSE_PATH_V2, SearchableAnnotation.FieldType.URN, SearchableAnnotation.FieldType.URN_PARTIAL); + + public static final Set RANGE_QUERY_CONDITIONS = + Set.of( + Condition.GREATER_THAN, + Condition.GREATER_THAN_OR_EQUAL_TO, + Condition.LESS_THAN, + Condition.LESS_THAN_OR_EQUAL_TO); public static final String ENTITY_NAME_FIELD = "_entityName"; public static final String NAME_SUGGESTION = "nameSuggestion"; @@ -100,9 +109,6 @@ public class ESUtils { } }; - // TODO - This has been expanded for has* in another branch - public static final Set BOOLEAN_FIELDS = ImmutableSet.of("removed"); - /* * Refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html for list of reserved * characters in an Elasticsearch regular expression. @@ -123,7 +129,10 @@ private ESUtils() {} * @return built filter query */ @Nonnull - public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean isTimeseries) { + public static BoolQueryBuilder buildFilterQuery( + @Nullable Filter filter, + boolean isTimeseries, + final Map> searchableFields) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; @@ -134,7 +143,8 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean .getOr() .forEach( or -> - finalQueryBuilder.should(ESUtils.buildConjunctiveFilterQuery(or, isTimeseries))); + finalQueryBuilder.should( + ESUtils.buildConjunctiveFilterQuery(or, isTimeseries, searchableFields))); } else if (filter.getCriteria() != null) { // Otherwise, build boolean query from the deprecated "criteria" field. log.warn("Received query Filter with a deprecated field 'criteria'. Use 'or' instead."); @@ -146,7 +156,8 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean if (!criterion.getValue().trim().isEmpty() || criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { - andQueryBuilder.must(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.must( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFields)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -156,7 +167,9 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean @Nonnull public static BoolQueryBuilder buildConjunctiveFilterQuery( - @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries) { + @Nonnull ConjunctiveCriterion conjunctiveCriterion, + boolean isTimeseries, + Map> searchableFields) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -167,9 +180,11 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( || criterion.hasValues()) { if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) - andQueryBuilder.filter(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.filter( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFields)); } else { - andQueryBuilder.mustNot(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.mustNot( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFields)); } } }); @@ -205,7 +220,9 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( */ @Nonnull public static QueryBuilder getQueryBuilderFromCriterion( - @Nonnull final Criterion criterion, boolean isTimeseries) { + @Nonnull final Criterion criterion, + boolean isTimeseries, + final Map> searchableFields) { final String fieldName = toFacetField(criterion.getField()); if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD)) { criterion.setField(fieldName); @@ -224,10 +241,10 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries); + maybeFieldToExpand.get(), criterion, isTimeseries, searchableFields); } - return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries); + return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries, searchableFields); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -378,7 +395,7 @@ public static String toFacetField(@Nonnull final String filterField) { @Nonnull public static String toKeywordField( - @Nonnull final String filterField, @Nonnull final boolean skipKeywordSuffix) { + @Nonnull final String filterField, final boolean skipKeywordSuffix) { return skipKeywordSuffix || KEYWORD_FIELDS.contains(filterField) || PATH_HIERARCHY_FIELDS.contains(filterField) @@ -428,7 +445,8 @@ public static void setSearchAfter( private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final List fields, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + final Map> searchableFields) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -442,14 +460,17 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( } criterionToQuery.setField(toKeywordField(field, isTimeseries)); orQueryBuilder.should( - getQueryBuilderFromCriterionForSingleField(criterionToQuery, isTimeseries)); + getQueryBuilderFromCriterionForSingleField( + criterionToQuery, isTimeseries, searchableFields)); } return orQueryBuilder; } @Nonnull private static QueryBuilder getQueryBuilderFromCriterionForSingleField( - @Nonnull Criterion criterion, @Nonnull boolean isTimeseries) { + @Nonnull Criterion criterion, + boolean isTimeseries, + final Map> searchableFields) { final Condition condition = criterion.getCondition(); final String fieldName = toFacetField(criterion.getField()); @@ -463,24 +484,11 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( .queryName(fieldName); } else if (criterion.hasValues() || criterion.hasValue()) { if (condition == Condition.EQUAL) { - return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries); - // TODO: Support multi-match on the following operators (using new 'values' field) - } else if (condition == Condition.GREATER_THAN) { - return QueryBuilders.rangeQuery(criterion.getField()) - .gt(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) { - return QueryBuilders.rangeQuery(criterion.getField()) - .gte(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.LESS_THAN) { - return QueryBuilders.rangeQuery(criterion.getField()) - .lt(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) { - return QueryBuilders.rangeQuery(criterion.getField()) - .lte(criterion.getValue().trim()) - .queryName(fieldName); + return buildEqualsConditionFromCriterion( + fieldName, criterion, isTimeseries, searchableFields); + } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { + return buildRangeQueryFromCriterion( + criterion, fieldName, searchableFields, condition, isTimeseries); } else if (condition == Condition.CONTAIN) { return QueryBuilders.wildcardQuery( toKeywordField(criterion.getField(), isTimeseries), @@ -504,13 +512,15 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( private static QueryBuilder buildEqualsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + final Map> searchableFields) { /* * If the newer 'values' field of Criterion.pdl is set, then we * handle using the following code to allow multi-match. */ if (!criterion.getValues().isEmpty()) { - return buildEqualsConditionFromCriterionWithValues(fieldName, criterion, isTimeseries); + return buildEqualsConditionFromCriterionWithValues( + fieldName, criterion, isTimeseries, searchableFields); } /* * Otherwise, we are likely using the deprecated 'value' field. @@ -526,21 +536,95 @@ private static QueryBuilder buildEqualsConditionFromCriterion( private static QueryBuilder buildEqualsConditionFromCriterionWithValues( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { - if (BOOLEAN_FIELDS.contains(fieldName) && criterion.getValues().size() == 1) { - // Handle special-cased Boolean fields. - // here we special case boolean fields we recognize the names of and hard-cast - // the first provided value to a boolean to do the comparison. - // Ideally, we should detect the type of the field from the entity-registry in order - // to determine how to cast. + final boolean isTimeseries, + final Map> searchableFields) { + Set fieldTypes = getFieldTypes(searchableFields, fieldName); + if (fieldTypes.size() > 1) { + log.warn( + "Multiple field types for field name {}, determining best fit for set: {}", + fieldName, + fieldTypes); + } + if (fieldTypes.contains(BOOLEAN_FIELD_TYPE) && criterion.getValues().size() == 1) { return QueryBuilders.termQuery(fieldName, Boolean.parseBoolean(criterion.getValues().get(0))) .queryName(fieldName); + } else if (fieldTypes.contains(LONG_FIELD_TYPE) || fieldTypes.contains(DATE_FIELD_TYPE)) { + List longValues = + criterion.getValues().stream().map(Long::parseLong).collect(Collectors.toList()); + return QueryBuilders.termsQuery(fieldName, longValues).queryName(fieldName); + } else if (fieldTypes.contains(DOUBLE_FIELD_TYPE)) { + List doubleValues = + criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList()); + return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } return QueryBuilders.termsQuery( toKeywordField(criterion.getField(), isTimeseries), criterion.getValues()) .queryName(fieldName); } + private static Set getFieldTypes( + Map> searchableFields, String fieldName) { + Set fieldSpecs = + searchableFields.getOrDefault(fieldName, Collections.emptySet()); + Set fieldTypes = + fieldSpecs.stream() + .map(SearchableFieldSpec::getSearchableAnnotation) + .map(SearchableAnnotation::getFieldType) + .map(ESUtils::getElasticTypeForFieldType) + .collect(Collectors.toSet()); + if (fieldTypes.size() > 1) { + log.warn( + "Multiple field types for field name {}, determining best fit for set: {}", + fieldName, + fieldTypes); + } + return fieldTypes; + } + + private static RangeQueryBuilder buildRangeQueryFromCriterion( + Criterion criterion, + String fieldName, + Map> searchableFields, + Condition condition, + boolean isTimeseries) { + Set fieldTypes = getFieldTypes(searchableFields, fieldName); + + // Determine criterion value, range query only accepts single value so take first value in + // values if multiple + String criterionValueString; + if (!criterion.getValues().isEmpty()) { + criterionValueString = criterion.getValues().get(0).trim(); + } else { + criterionValueString = criterion.getValue().trim(); + } + Object criterionValue; + String documentFieldName; + if (fieldTypes.contains(BOOLEAN_FIELD_TYPE)) { + criterionValue = Boolean.parseBoolean(criterionValueString); + documentFieldName = criterion.getField(); + } else if (fieldTypes.contains(LONG_FIELD_TYPE) || fieldTypes.contains(DATE_FIELD_TYPE)) { + criterionValue = Long.parseLong(criterionValueString); + documentFieldName = criterion.getField(); + } else if (fieldTypes.contains(DOUBLE_FIELD_TYPE)) { + criterionValue = Double.parseDouble(criterionValueString); + documentFieldName = criterion.getField(); + } else { + criterionValue = criterionValueString; + documentFieldName = toKeywordField(criterion.getField(), isTimeseries); + } + + // Set up QueryBuilder based on condition + if (condition == Condition.GREATER_THAN) { + return QueryBuilders.rangeQuery(documentFieldName).gt(criterionValue).queryName(fieldName); + } else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) { + return QueryBuilders.rangeQuery(documentFieldName).gte(criterionValue).queryName(fieldName); + } else if (condition == Condition.LESS_THAN) { + return QueryBuilders.rangeQuery(documentFieldName).lt(criterionValue).queryName(fieldName); + } else /*if (condition == Condition.LESS_THAN_OR_EQUAL_TO)*/ { + return QueryBuilders.rangeQuery(documentFieldName).lte(criterionValue).queryName(fieldName); + } + } + /** * Builds an instance of {@link QueryBuilder} representing an EQUALS condition which was created * using the deprecated 'value' field of Criterion.pdl model. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index a2b36b7d8ddb8..6cf8e92d61929 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; @@ -290,7 +291,12 @@ public long countByFilter( @Nullable final Filter filter) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery() + .must( + ESUtils.buildFilterQuery( + filter, + true, + _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap())); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -313,8 +319,10 @@ public List getAspectValues( @Nullable final Integer limit, @Nullable final Filter filter, @Nullable final SortCriterion sort) { + Map> searchableFields = + _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap(); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter, true, searchableFields)); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -324,7 +332,8 @@ public List getAspectValues( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); - filterQueryBuilder.must(ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true)); + filterQueryBuilder.must( + ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFields)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -332,7 +341,8 @@ public List getAspectValues( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); - filterQueryBuilder.must(ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true)); + filterQueryBuilder.must( + ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFields)); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -400,7 +410,9 @@ public GenericTable getAggregatedStats( public DeleteAspectValuesResult deleteAspectValues( @Nonnull String entityName, @Nonnull String aspectName, @Nonnull Filter filter) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap()); final Optional result = _bulkProcessor @@ -426,7 +438,9 @@ public String deleteAspectValuesAsync( @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap()); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -450,7 +464,9 @@ public String reindexAsync( @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap()); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -498,8 +514,11 @@ public TimeseriesScrollResult scrollAspects( int count, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) { + + Map> searchableFields = + _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap(); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().filter(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery().filter(ESUtils.buildFilterQuery(filter, true, searchableFields)); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -507,7 +526,8 @@ public TimeseriesScrollResult scrollAspects( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); - filterQueryBuilder.filter(ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true)); + filterQueryBuilder.filter( + ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFields)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -515,7 +535,8 @@ public TimeseriesScrollResult scrollAspects( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); - filterQueryBuilder.filter(ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true)); + filterQueryBuilder.filter( + ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFields)); } SearchResponse response = @@ -537,7 +558,7 @@ public TimeseriesScrollResult scrollAspects( } private SearchResponse executeScrollSearchQuery( - @Nonnull final String entityNname, + @Nonnull final String entityName, @Nonnull final String aspectName, @Nonnull final QueryBuilder query, @Nonnull List sortCriterion, @@ -560,7 +581,7 @@ private SearchResponse executeScrollSearchQuery( searchRequest.source(searchSourceBuilder); ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); - searchRequest.indices(_indexConvention.getTimeseriesAspectIndexName(entityNname, aspectName)); + searchRequest.indices(_indexConvention.getTimeseriesAspectIndexName(entityName, aspectName)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "scrollAspects_search").time()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 539e5dfbaa1d0..f8b2cd8552357 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -377,7 +377,9 @@ public GenericTable getAggregatedStats( @Nullable GroupingBucket[] groupingBuckets) { // Setup the filter query builder using the input filter provided. - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldSpecMap()); AspectSpec aspectSpec = getTimeseriesAspectSpec(entityName, aspectName); // Build and attach the grouping aggregations diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java index d2aef982750bd..4c125065deb4d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java @@ -1,18 +1,27 @@ package com.linkedin.metadata.search.fixtures; +import static com.linkedin.metadata.Constants.*; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossCustomEntities; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; -import static org.testng.Assert.assertTrue; +import static org.testng.Assert.*; import static org.testng.AssertJUnit.assertNotNull; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.MatchedFieldArray; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -169,6 +178,35 @@ public void testNameMatchCustomerOrders() { assertTrue(firstResultScore > secondResultScore); } + @Test + public void testFilterOnCountField() { + assertNotNull(getSearchService()); + Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + ImmutableList.of( + new Criterion() + .setField("rowCount") + .setValue("") + .setValues(new StringArray(ImmutableList.of("68")))))))); + SearchResult searchResult = + searchAcrossEntities( + getSearchService(), + "*", + SEARCHABLE_LONGTAIL_ENTITIES, + filter, + Collections.singletonList(DATASET_ENTITY_NAME)); + assertFalse(searchResult.getEntities().isEmpty()); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + assertEquals( + firstResultUrn.toString(), + "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.dogs_in_movies,PROD)"); + } + /* Tests that should pass but do not yet can be added below here, with the following annotation: @Test(enabled = false) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 6df31b35fecde..8d504c562c99c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -21,7 +21,7 @@ public void testMappingsBuilder() { Map result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec()); assertEquals(result.size(), 1); Map properties = (Map) result.get("properties"); - assertEquals(properties.size(), 20); + assertEquals(properties.size(), 21); assertEquals( properties.get("urn"), ImmutableMap.of( @@ -52,6 +52,7 @@ public void testMappingsBuilder() { assertEquals(properties.get("runId"), ImmutableMap.of("type", "keyword")); assertTrue(properties.containsKey("browsePaths")); assertTrue(properties.containsKey("browsePathV2")); + assertTrue(properties.containsKey("removed")); // KEYWORD Map keyPart3Field = (Map) properties.get("keyPart3"); assertEquals(keyPart3Field.get("type"), "keyword"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index daf2ac58002e0..02c9ea800f0af 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -614,7 +614,7 @@ public void testBrowsePathQueryFilter() { Filter filter = new Filter(); filter.setOr(conjunctiveCriterionArray); - BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter); + BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter, new HashMap<>()); assertEquals(test.should().size(), 1); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 980b82194536e..838df98fdce9c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -4,6 +4,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import java.util.HashMap; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,7 +22,8 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"terms\" : {\n" @@ -40,7 +42,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false); + result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false, new HashMap<>()); expected = "{\n" + " \"terms\" : {\n" @@ -60,7 +62,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"terms\" : {\n" @@ -80,7 +82,8 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion singleValueCriterion = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -103,7 +106,7 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -128,7 +131,8 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion singleValueCriterion = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -151,7 +155,7 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -182,7 +186,8 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1"))); // Ensure that the query is expanded! - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -220,7 +225,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1", "value2"))); // Ensure that the query is expanded without keyword. - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -262,7 +267,8 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"terms\" : {\n" @@ -281,7 +287,8 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -304,7 +311,7 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 8d7701f6d174f..23ca4a4a4247e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -485,6 +485,65 @@ public void testGetAggregatedStatsLatestStatForDay1() { _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); } + @Test( + groups = {"getAggregatedStats"}, + dependsOnGroups = {"upsert"}) + public void testGetAggregatedStatsLatestStatForDay1WithValues() { + // Filter is only on the urn + Criterion hasUrnCriterion = + new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion startTimeCriterion = + new Criterion() + .setField(ES_FIELD_TIMESTAMP) + .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) + .setValues(new StringArray(_startTime.toString())) + .setValue(""); + Criterion endTimeCriterion = + new Criterion() + .setField(ES_FIELD_TIMESTAMP) + .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) + .setValues(new StringArray(String.valueOf(_startTime + 23 * TIME_INCREMENT))) + .setValue(""); + + Filter filter = + QueryUtils.getFilterFromCriteria( + ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion)); + + // Aggregate on latest stat value + AggregationSpec latestStatAggregationSpec = + new AggregationSpec().setAggregationType(AggregationType.LATEST).setFieldPath("stat"); + + // Grouping bucket is only timestamp filed. + GroupingBucket timestampBucket = + new GroupingBucket() + .setKey(ES_FIELD_TIMESTAMP) + .setType(GroupingBucketType.DATE_GROUPING_BUCKET) + .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); + + GenericTable resultTable = + _elasticSearchTimeseriesAspectService.getAggregatedStats( + ENTITY_NAME, + ASPECT_NAME, + new AggregationSpec[] {latestStatAggregationSpec}, + filter, + new GroupingBucket[] {timestampBucket}); + // Validate column names + assertEquals( + resultTable.getColumnNames(), + new StringArray(ES_FIELD_TIMESTAMP, "latest_" + ES_FIELD_STAT)); + // Validate column types + assertEquals(resultTable.getColumnTypes(), new StringArray("long", "long")); + // Validate rows + assertNotNull(resultTable.getRows()); + assertEquals(resultTable.getRows().size(), 1); + assertEquals( + resultTable.getRows(), + new StringArrayArray( + new StringArray( + _startTime.toString(), + _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); + } + @Test( groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index a22a774065852..f3689f9b5d04a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.ScrollResult; @@ -70,6 +71,23 @@ public static SearchResult searchAcrossEntities( facets); } + public static SearchResult searchAcrossEntities( + SearchService searchService, + String query, + @Nullable List facets, + Filter filter, + List entityNames) { + return searchService.searchAcrossEntities( + entityNames, + query, + filter, + null, + 0, + 100, + new SearchFlags().setFulltext(true).setSkipCache(true), + facets); + } + public static SearchResult searchAcrossCustomEntities( SearchService searchService, String query, List searchableEntities) { return searchService.searchAcrossEntities( diff --git a/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz index dd48fe240cdf2..5a412ff4b14e0 100644 Binary files a/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz and b/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz differ diff --git a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl index 3b8aa4f39f7b7..d1daa7b8d4593 100644 --- a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl +++ b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl @@ -97,4 +97,10 @@ record TestEntityInfo includes CustomProperties { "fieldType": "DOUBLE" } doubleField: optional double + + @Searchable = { + "fieldName": "removed", + "fieldType": "BOOLEAN" + } + removed: optional boolean }