From b837e8d1b1a0a5a7e581d2e5ee74efd7e1902593 Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Wed, 11 Oct 2023 20:15:01 -0300 Subject: [PATCH] fix(search): Detect field type for use in defining the sort order --- .../indexbuilder/MappingsBuilder.java | 48 +++++------- .../query/request/SearchRequestHandler.java | 8 +- .../metadata/search/utils/ESUtils.java | 74 ++++++++++++++++++- .../fixtures/SampleDataFixtureTestBase.java | 64 ++++++++++++++-- 4 files changed, 154 insertions(+), 40 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index 004b2e0a2adc4..1edc77bbd214c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.models.SearchScoreFieldSpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType; +import com.linkedin.metadata.search.utils.ESUtils; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,15 +32,6 @@ public static Map getPartialNgramConfigWithOverrides(Map KEYWORD_TYPE_MAP = ImmutableMap.of(TYPE, KEYWORD); - // Field Types - public static final String BOOLEAN = "boolean"; - public static final String DATE = "date"; - public static final String DOUBLE = "double"; - public static final String LONG = "long"; - public static final String OBJECT = "object"; - public static final String TEXT = "text"; - public static final String TOKEN_COUNT = "token_count"; - // Subfields public static final String DELIMITED = "delimited"; public static final String LENGTH = "length"; @@ -74,7 +66,7 @@ public static Map getMappings(@Nonnull final EntitySpec entitySp private static Map getMappingsForUrn() { Map subFields = new HashMap<>(); subFields.put(DELIMITED, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, URN_ANALYZER, SEARCH_ANALYZER, URN_SEARCH_ANALYZER, SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER) @@ -85,13 +77,13 @@ private static Map getMappingsForUrn() { ) )); return ImmutableMap.builder() - .put(TYPE, KEYWORD) + .put(TYPE, ESUtils.KEYWORD_FIELD_TYPE) .put(FIELDS, subFields) .build(); } private static Map getMappingsForRunId() { - return ImmutableMap.builder().put(TYPE, KEYWORD).build(); + return ImmutableMap.builder().put(TYPE, ESUtils.KEYWORD_FIELD_TYPE).build(); } private static Map getMappingsForField(@Nonnull final SearchableFieldSpec searchableFieldSpec) { @@ -104,23 +96,23 @@ private static Map getMappingsForField(@Nonnull final Searchable } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) { mappingForField.putAll(getMappingsForSearchText(fieldType)); } else if (fieldType == FieldType.BROWSE_PATH) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(FIELDS, ImmutableMap.of(LENGTH, ImmutableMap.of( - TYPE, TOKEN_COUNT, + TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE, ANALYZER, SLASH_PATTERN_ANALYZER))); mappingForField.put(ANALYZER, BROWSE_PATH_HIERARCHY_ANALYZER); mappingForField.put(FIELDDATA, true); } else if (fieldType == FieldType.BROWSE_PATH_V2) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(FIELDS, ImmutableMap.of(LENGTH, ImmutableMap.of( - TYPE, TOKEN_COUNT, + TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE, ANALYZER, UNIT_SEPARATOR_PATTERN_ANALYZER))); mappingForField.put(ANALYZER, BROWSE_PATH_V2_HIERARCHY_ANALYZER); mappingForField.put(FIELDDATA, true); } else if (fieldType == FieldType.URN || fieldType == FieldType.URN_PARTIAL) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(ANALYZER, URN_ANALYZER); mappingForField.put(SEARCH_ANALYZER, URN_SEARCH_ANALYZER); mappingForField.put(SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER); @@ -135,13 +127,13 @@ private static Map getMappingsForField(@Nonnull final Searchable subFields.put(KEYWORD, KEYWORD_TYPE_MAP); mappingForField.put(FIELDS, subFields); } else if (fieldType == FieldType.BOOLEAN) { - mappingForField.put(TYPE, BOOLEAN); + mappingForField.put(TYPE, ESUtils.BOOLEAN_FIELD_TYPE); } else if (fieldType == FieldType.COUNT) { - mappingForField.put(TYPE, LONG); + mappingForField.put(TYPE, ESUtils.LONG_FIELD_TYPE); } else if (fieldType == FieldType.DATETIME) { - mappingForField.put(TYPE, DATE); + mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE); } else if (fieldType == FieldType.OBJECT) { - mappingForField.put(TYPE, OBJECT); + mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE); } else { log.info("FieldType {} has no mappings implemented", fieldType); } @@ -149,10 +141,10 @@ private static Map getMappingsForField(@Nonnull final Searchable searchableFieldSpec.getSearchableAnnotation() .getHasValuesFieldName() - .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, BOOLEAN))); + .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.BOOLEAN_FIELD_TYPE))); searchableFieldSpec.getSearchableAnnotation() .getNumValuesFieldName() - .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, LONG))); + .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.LONG_FIELD_TYPE))); mappings.putAll(getMappingsForFieldNameAliases(searchableFieldSpec)); return mappings; @@ -160,7 +152,7 @@ private static Map getMappingsForField(@Nonnull final Searchable private static Map getMappingsForKeyword() { Map mappingForField = new HashMap<>(); - mappingForField.put(TYPE, KEYWORD); + mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE); mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER); // Add keyword subfield without lowercase filter mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP)); @@ -169,7 +161,7 @@ private static Map getMappingsForKeyword() { private static Map getMappingsForSearchText(FieldType fieldType) { Map mappingForField = new HashMap<>(); - mappingForField.put(TYPE, KEYWORD); + mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE); mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER); Map subFields = new HashMap<>(); if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) { @@ -186,14 +178,14 @@ private static Map getMappingsForSearchText(FieldType fieldType) String fieldName = entry.getKey(); String analyzerName = entry.getValue(); subFields.put(fieldName, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, analyzerName )); } } } subFields.put(DELIMITED, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, TEXT_ANALYZER, SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER, SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER)); @@ -206,7 +198,7 @@ private static Map getMappingsForSearchText(FieldType fieldType) private static Map getMappingsForSearchScoreField( @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) { return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(), - ImmutableMap.of(TYPE, DOUBLE)); + ImmutableMap.of(TYPE, ESUtils.DOUBLE_FIELD_TYPE)); } private static Map getMappingsForFieldNameAliases(@Nonnull final SearchableFieldSpec searchableFieldSpec) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 5fcc10b7af5cf..c06907e800d5e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -202,7 +202,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi if (!finalSearchFlags.isSkipHighlighting()) { searchSourceBuilder.highlighter(_highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); if (finalSearchFlags.isGetSuggestions()) { ESUtils.buildNameSuggestions(searchSourceBuilder, input); @@ -243,7 +243,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery)); _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation); searchSourceBuilder.highlighter(getHighlights()); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); log.debug("Search request is: " + searchRequest); searchRequest.indicesOptions(null); @@ -270,7 +270,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQuery); searchSourceBuilder.from(from).size(size); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -301,7 +301,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr searchSourceBuilder.size(size); ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 9a7d9a1b4c420..53765acb8e29e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; @@ -49,7 +52,28 @@ public class ESUtils { public static final int MAX_RESULT_SIZE = 10000; public static final String OPAQUE_ID_HEADER = "X-Opaque-Id"; public static final String HEADER_VALUE_DELIMITER = "|"; - public static final String KEYWORD_TYPE = "keyword"; + + // Field types + public static final String KEYWORD_FIELD_TYPE = "keyword"; + public static final String BOOLEAN_FIELD_TYPE = "boolean"; + public static final String DATE_FIELD_TYPE = "date"; + public static final String DOUBLE_FIELD_TYPE = "double"; + public static final String LONG_FIELD_TYPE = "long"; + public static final String OBJECT_FIELD_TYPE = "object"; + public static final String TEXT_FIELD_TYPE = "text"; + public static final String TOKEN_COUNT_FIELD_TYPE = "token_count"; + // End of field types + + public static final Set FIELD_TYPES_STORED_AS_KEYWORD = Set.of( + SearchableAnnotation.FieldType.KEYWORD, + SearchableAnnotation.FieldType.TEXT, + SearchableAnnotation.FieldType.TEXT_PARTIAL, + SearchableAnnotation.FieldType.WORD_GRAM); + public static final Set FIELD_TYPES_STORED_AS_TEXT = Set.of( + SearchableAnnotation.FieldType.BROWSE_PATH, + SearchableAnnotation.FieldType.BROWSE_PATH_V2, + SearchableAnnotation.FieldType.URN, + SearchableAnnotation.FieldType.URN_PARTIAL); public static final String ENTITY_NAME_FIELD = "_entityName"; public static final String NAME_SUGGESTION = "nameSuggestion"; @@ -174,6 +198,25 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries); } + public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { + if (FIELD_TYPES_STORED_AS_KEYWORD.contains(fieldType)) { + return KEYWORD_FIELD_TYPE; + } else if (FIELD_TYPES_STORED_AS_TEXT.contains(fieldType)) { + return TEXT_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.BOOLEAN) { + return BOOLEAN_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.COUNT) { + return LONG_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.DATETIME) { + return DATE_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.OBJECT) { + return OBJECT_FIELD_TYPE; + } else { + log.warn("FieldType {} has no mappings implemented", fieldType); + return null; + } + } + /** * Populates source field of search query with the sort order as per the criterion provided. * @@ -189,14 +232,39 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion * @param sortCriterion {@link SortCriterion} to be applied to the search results */ public static void buildSortOrder(@Nonnull SearchSourceBuilder searchSourceBuilder, - @Nullable SortCriterion sortCriterion) { + @Nullable SortCriterion sortCriterion, List entitySpecs) { if (sortCriterion == null) { searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); } else { + Optional fieldTypeForDefault = Optional.empty(); + for (EntitySpec entitySpec : entitySpecs) { + List fieldSpecs = entitySpec.getSearchableFieldSpecs(); + for (SearchableFieldSpec fieldSpec : fieldSpecs) { + SearchableAnnotation annotation = fieldSpec.getSearchableAnnotation(); + if (annotation.getFieldName().equals(sortCriterion.getField()) + || annotation.getFieldNameAliases().contains(sortCriterion.getField())) { + fieldTypeForDefault = Optional.of(fieldSpec.getSearchableAnnotation().getFieldType()); + break; + } + } + if (fieldTypeForDefault.isPresent()) { + break; + } + } + if (fieldTypeForDefault.isEmpty()) { + log.warn("Sort criterion field " + sortCriterion.getField() + " was not found in any entity spec to be searched"); + } final SortOrder esSortOrder = (sortCriterion.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) ? SortOrder.ASC : SortOrder.DESC; - searchSourceBuilder.sort(new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder).unmappedType(KEYWORD_TYPE)); + FieldSortBuilder sortBuilder = new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder); + if (fieldTypeForDefault.isPresent()) { + String esFieldtype = getElasticTypeForFieldType(fieldTypeForDefault.get()); + if (esFieldtype != null) { + sortBuilder.unmappedType(esFieldtype); + } + } + searchSourceBuilder.sort(sortBuilder); } if (sortCriterion == null || !sortCriterion.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)) { searchSourceBuilder.sort(new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 1660504810296..69dd5c80bef1d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -22,12 +22,15 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.AggregationMetadata; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; +import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.r2.RemoteInvocationException; import org.junit.Assert; import org.opensearch.client.RequestOptions; @@ -36,6 +39,9 @@ import org.opensearch.client.indices.AnalyzeResponse; import org.opensearch.client.indices.GetMappingsRequest; import org.opensearch.client.indices.GetMappingsResponse; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortBuilder; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; @@ -54,11 +60,7 @@ import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; -import static io.datahubproject.test.search.SearchTestUtils.autocomplete; -import static io.datahubproject.test.search.SearchTestUtils.scroll; -import static io.datahubproject.test.search.SearchTestUtils.search; -import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; -import static io.datahubproject.test.search.SearchTestUtils.searchStructured; +import static io.datahubproject.test.search.SearchTestUtils.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -174,6 +176,48 @@ public void testSearchFieldConfig() throws IOException { } } + @Test + public void testGetSortOrder() { + String dateFieldName = "lastOperationTime"; + List entityNamesToTestSearch = List.of("dataset", "chart", "corpgroup"); + List entitySpecs = entityNamesToTestSearch.stream().map( + name -> getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); + SearchSourceBuilder builder = new SearchSourceBuilder(); + SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.DESCENDING).setField(dateFieldName); + ESUtils.buildSortOrder(builder, sortCriterion, entitySpecs); + List> sorts = builder.sorts(); + assertEquals(sorts.size(), 2); // sort by last modified and then by urn + for (SortBuilder sort : sorts) { + assertTrue(sort instanceof FieldSortBuilder); + FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort; + if (fieldSortBuilder.getFieldName().equals(dateFieldName)) { + assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.DESC); + assertEquals(fieldSortBuilder.unmappedType(), "date"); + } else { + assertEquals(fieldSortBuilder.getFieldName(), "urn"); + } + } + + // Test alias field + String entityNameField = "_entityName"; + SearchSourceBuilder nameBuilder = new SearchSourceBuilder(); + SortCriterion nameCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField(entityNameField); + ESUtils.buildSortOrder(nameBuilder, nameCriterion, entitySpecs); + sorts = nameBuilder.sorts(); + assertEquals(sorts.size(), 2); + for (SortBuilder sort : sorts) { + assertTrue(sort instanceof FieldSortBuilder); + FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort; + if (fieldSortBuilder.getFieldName().equals(entityNameField)) { + assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.ASC); + assertEquals(fieldSortBuilder.unmappedType(), "keyword"); + } else { + assertEquals(fieldSortBuilder.getFieldName(), "urn"); + } + } + } + @Test public void testDatasetHasTags() throws IOException { GetMappingsRequest req = new GetMappingsRequest() @@ -1454,6 +1498,16 @@ public void testColumnExactMatch() { "Expected table with column name exact match first"); } + @Test + public void testSortOrdering() { + String query = "unit_data"; + SortCriterion criterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime"); + SearchResult result = getSearchService().searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, criterion, 0, + 100, new SearchFlags().setFulltext(true).setSkipCache(true), null); + assertTrue(result.getEntities().size() > 2, + String.format("%s - Expected search results to have at least two results", query)); + } + private Stream getTokens(AnalyzeRequest request) throws IOException { return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream(); }