Skip to content

Commit

Permalink
FIX #19386 & #19388: Fixing Data Insights index mapping (#19423)
Browse files Browse the repository at this point in the history
* Fixing Data Insights index mapping

* Add OpenMetadataOperations cli endpoint to reindex data insights

* Improve IndexMapTemplate building

* Improve the code a bit

* Fix test

(cherry picked from commit 901063b)
  • Loading branch information
IceS2 authored and OpenMetadata Release Bot committed Jan 23, 2025
1 parent 54f7985 commit 95501d8
Show file tree
Hide file tree
Showing 16 changed files with 396 additions and 264 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,19 @@ private void deleteDataQualityDataIndex() {
private void createDataAssetsDataStream() {
DataInsightsSearchInterface searchInterface = getSearchInterface();

ElasticSearchConfiguration config = searchRepository.getElasticSearchConfiguration();
String language =
config != null && config.getSearchIndexMappingLanguage() != null
? config.getSearchIndexMappingLanguage().value()
: "en";

try {
for (String dataAssetType : dataAssetTypes) {
IndexMapping dataAssetIndex = searchRepository.getIndexMapping(dataAssetType);
String dataStreamName = getDataStreamName(dataAssetType);
if (!searchInterface.dataAssetDataStreamExists(dataStreamName)) {
searchInterface.createDataAssetsDataStream(dataStreamName);
searchInterface.createDataAssetsDataStream(
dataStreamName, dataAssetType, dataAssetIndex, language);
}
}
} catch (IOException ex) {
Expand Down Expand Up @@ -312,7 +320,13 @@ private WorkflowStats processCostAnalysis() {
private WorkflowStats processDataAssets() {
DataAssetsWorkflow workflow =
new DataAssetsWorkflow(
timestamp, batchSize, backfill, dataAssetTypes, collectionDAO, searchRepository);
timestamp,
batchSize,
backfill,
dataAssetTypes,
collectionDAO,
searchRepository,
getSearchInterface());
WorkflowStats workflowStats = workflow.getWorkflowStats();

try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.openmetadata.service.apps.bundles.insights.search;

import java.util.List;
import java.util.Map;
import lombok.Getter;

@Getter
public class DataInsightsSearchConfiguration {
private Map<String, List<String>> mappingFields;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.search.models.IndexMapping;
import org.openmetadata.service.util.JsonUtils;

public interface DataInsightsSearchInterface {
String DATA_INSIGHTS_SEARCH_CONFIG_PATH = "/dataInsights/config.json";

void createLifecyclePolicy(String name, String policy) throws IOException;

Expand All @@ -23,7 +27,63 @@ default String readResource(String resourceFile) {
}
}

void createDataAssetsDataStream(String name) throws IOException;
default String buildMapping(
String entityType,
IndexMapping entityIndexMapping,
String language,
String indexMappingTemplateStr) {
IndexMappingTemplate indexMappingTemplate =
JsonUtils.readOrConvertValue(indexMappingTemplateStr, IndexMappingTemplate.class);
EntityIndexMap entityIndexMap =
JsonUtils.readOrConvertValue(
readResource(
String.format(entityIndexMapping.getIndexMappingFile(), language.toLowerCase())),
EntityIndexMap.class);

DataInsightsSearchConfiguration dataInsightsSearchConfiguration =
readDataInsightsSearchConfiguration();
List<String> entityAttributeFields =
getEntityAttributeFields(dataInsightsSearchConfiguration, entityType);

indexMappingTemplate
.getTemplate()
.getSettings()
.put("analysis", entityIndexMap.getSettings().get("analysis"));

for (String attribute : entityAttributeFields) {
if (!indexMappingTemplate
.getTemplate()
.getMappings()
.getProperties()
.containsKey(attribute)) {
Object value = entityIndexMap.getMappings().getProperties().get(attribute);
if (value != null) {
indexMappingTemplate.getTemplate().getMappings().getProperties().put(attribute, value);
}
}
}

return JsonUtils.pojoToJson(indexMappingTemplate);
}

default DataInsightsSearchConfiguration readDataInsightsSearchConfiguration() {
return JsonUtils.readOrConvertValue(
readResource(DATA_INSIGHTS_SEARCH_CONFIG_PATH), DataInsightsSearchConfiguration.class);
}

default List<String> getEntityAttributeFields(
DataInsightsSearchConfiguration dataInsightsSearchConfiguration, String entityType) {
List<String> entityAttributeFields =
dataInsightsSearchConfiguration.getMappingFields().get("common");
entityAttributeFields.addAll(
dataInsightsSearchConfiguration.getMappingFields().get(entityType));

return entityAttributeFields;
}

void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException;

void deleteDataAssetDataStream(String name) throws IOException;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.openmetadata.service.apps.bundles.insights.search;

import java.util.Map;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
public class EntityIndexMap {
private Map<String, Object> settings;
private Mappings mappings;

@Getter
@Setter
public static class Mappings {
private Map<String, Object> properties;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package org.openmetadata.service.apps.bundles.insights.search;

import lombok.Getter;

@Getter
public class IndexMappingTemplate {
private EntityIndexMap template;
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import es.org.elasticsearch.client.RestClient;
import java.io.IOException;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchInterface;
import org.openmetadata.service.search.models.IndexMapping;

public class ElasticSearchDataInsightsClient implements DataInsightsSearchInterface {
private final RestClient client;
Expand Down Expand Up @@ -52,7 +53,9 @@ public Boolean dataAssetDataStreamExists(String name) throws IOException {
}

@Override
public void createDataAssetsDataStream(String name) throws IOException {
public void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException {
String resourcePath = "/dataInsights/elasticsearch";
createLifecyclePolicy(
"di-data-assets-lifecycle",
Expand All @@ -62,7 +65,11 @@ public void createDataAssetsDataStream(String name) throws IOException {
readResource(String.format("%s/indexSettingsTemplate.json", resourcePath)));
createComponentTemplate(
"di-data-assets-mapping",
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
buildMapping(
entityType,
entityIndexMapping,
language,
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath))));
createIndexTemplate(
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
createDataStream(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.IOException;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchInterface;
import org.openmetadata.service.search.models.IndexMapping;
import os.org.opensearch.client.Request;
import os.org.opensearch.client.Response;
import os.org.opensearch.client.ResponseException;
Expand Down Expand Up @@ -63,14 +64,20 @@ public Boolean dataAssetDataStreamExists(String name) throws IOException {
}

@Override
public void createDataAssetsDataStream(String name) throws IOException {
public void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException {
String resourcePath = "/dataInsights/opensearch";
createLifecyclePolicy(
"di-data-assets-lifecycle",
readResource(String.format("%s/indexLifecyclePolicy.json", resourcePath)));
createComponentTemplate(
"di-data-assets-mapping",
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
buildMapping(
entityType,
entityIndexMapping,
language,
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath))));
createIndexTemplate(
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
createDataStream(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.openmetadata.schema.system.StepStats;
import org.openmetadata.schema.type.Include;
import org.openmetadata.service.apps.bundles.insights.DataInsightsApp;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchConfiguration;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchInterface;
import org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils;
import org.openmetadata.service.apps.bundles.insights.workflows.WorkflowStats;
import org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.processors.DataInsightsElasticSearchProcessor;
Expand All @@ -43,6 +45,7 @@
@Slf4j
public class DataAssetsWorkflow {
public static final String DATA_STREAM_KEY = "DataStreamKey";
public static final String ENTITY_TYPE_FIELDS_KEY = "EnityTypeFields";
private final int retentionDays = 30;
private final Long startTimestamp;
private final Long endTimestamp;
Expand All @@ -51,6 +54,8 @@ public class DataAssetsWorkflow {
private final CollectionDAO collectionDAO;
private final List<PaginatedEntitiesSource> sources = new ArrayList<>();
private final Set<String> entityTypes;
private final DataInsightsSearchConfiguration dataInsightsSearchConfiguration;
private final DataInsightsSearchInterface searchInterface;

private DataInsightsEntityEnricherProcessor entityEnricher;
private Processor entityProcessor;
Expand All @@ -63,7 +68,8 @@ public DataAssetsWorkflow(
Optional<DataInsightsApp.Backfill> backfill,
Set<String> entityTypes,
CollectionDAO collectionDAO,
SearchRepository searchRepository) {
SearchRepository searchRepository,
DataInsightsSearchInterface searchInterface) {
if (backfill.isPresent()) {
Long oldestPossibleTimestamp =
TimestampUtils.getStartOfDayTimestamp(
Expand Down Expand Up @@ -95,6 +101,8 @@ public DataAssetsWorkflow(
this.searchRepository = searchRepository;
this.collectionDAO = collectionDAO;
this.entityTypes = entityTypes;
this.searchInterface = searchInterface;
this.dataInsightsSearchConfiguration = searchInterface.readDataInsightsSearchConfiguration();
}

private void initialize() {
Expand Down Expand Up @@ -146,6 +154,10 @@ public void process() throws SearchIndexException {
deleteDataBeforeInserting(getDataStreamName(source.getEntityType()));
contextData.put(DATA_STREAM_KEY, getDataStreamName(source.getEntityType()));
contextData.put(ENTITY_TYPE_KEY, source.getEntityType());
contextData.put(
ENTITY_TYPE_FIELDS_KEY,
searchInterface.getEntityAttributeFields(
dataInsightsSearchConfiguration, source.getEntityType()));

while (!source.isDone().get()) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static org.openmetadata.schema.EntityInterface.ENTITY_TYPE_TO_CLASS_MAP;
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.END_TIMESTAMP_KEY;
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.START_TIMESTAMP_KEY;
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.ENTITY_TYPE_FIELDS_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.ENTITY_TYPE_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.TIMESTAMP_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getUpdatedStats;
Expand Down Expand Up @@ -141,6 +142,8 @@ private Map<String, Object> enrichEntity(
Long endTimestamp = (Long) entityVersionMap.get("endTimestamp");

Map<String, Object> entityMap = JsonUtils.getMap(entity);
entityMap.keySet().retainAll((List<String>) contextData.get(ENTITY_TYPE_FIELDS_KEY));

String entityType = (String) contextData.get(ENTITY_TYPE_KEY);
List<Class<?>> interfaces = List.of(entity.getClass().getInterfaces());

Expand Down
Loading

0 comments on commit 95501d8

Please sign in to comment.