diff --git a/build.gradle b/build.gradle index 1b6b82d51c2d4..0a94991b131aa 100644 --- a/build.gradle +++ b/build.gradle @@ -8,7 +8,7 @@ buildscript { ext.openTelemetryVersion = '1.18.0' ext.neo4jVersion = '4.4.9' ext.testContainersVersion = '1.17.4' - ext.elasticsearchVersion = '7.10.2' + ext.elasticsearchVersion = '2.9.0' // ES 7.10, Opensearch 1.x, 2.x ext.jacksonVersion = '2.15.2' ext.jettyVersion = '9.4.46.v20220331' ext.playVersion = '2.8.18' @@ -90,15 +90,15 @@ project.ext.externalDependency = [ 'ebean': 'io.ebean:ebean:' + ebeanVersion, 'ebeanAgent': 'io.ebean:ebean-agent:' + ebeanVersion, 'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion, - 'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:' + elasticsearchVersion, - 'elasticSearchTransport': 'org.elasticsearch.client:transport:' + elasticsearchVersion, + 'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion, + 'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0', 'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1', 'graphqlJava': 'com.graphql-java:graphql-java:19.5', 'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:19.1', 'gson': 'com.google.code.gson:gson:2.8.9', 'guice': 'com.google.inject:guice:4.2.3', 'guava': 'com.google.guava:guava:32.1.2-jre', - 'h2': 'com.h2database:h2:2.1.214', + 'h2': 'com.h2database:h2:2.2.224', 'hadoopCommon':'org.apache.hadoop:hadoop-common:2.7.2', 'hadoopMapreduceClient':'org.apache.hadoop:hadoop-mapreduce-client-core:2.7.2', "hadoopClient": "org.apache.hadoop:hadoop-client:$hadoop3Version", @@ -202,13 +202,15 @@ project.ext.externalDependency = [ 'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion", 'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.1.12', 'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.41', - 'testng': 'org.testng:testng:7.3.0', + 'testngJava8': 'org.testng:testng:7.5.1', + 'testng': 'org.testng:testng:7.8.0', 'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion, 'testContainersJunit': 'org.testcontainers:junit-jupiter:' + testContainersVersion, 'testContainersPostgresql':'org.testcontainers:postgresql:' + testContainersVersion, 'testContainersElasticsearch': 'org.testcontainers:elasticsearch:' + testContainersVersion, 'testContainersCassandra': 'org.testcontainers:cassandra:' + testContainersVersion, 'testContainersKafka': 'org.testcontainers:kafka:' + testContainersVersion, + 'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0', 'typesafeConfig':'com.typesafe:config:1.4.1', 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', 'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14', @@ -257,7 +259,6 @@ subprojects { plugins.withType(JavaPlugin) { dependencies { - testImplementation externalDependency.testng constraints { implementation('io.netty:netty-all:4.1.86.Final') implementation('org.apache.commons:commons-compress:1.21') @@ -268,12 +269,6 @@ subprojects { } } - tasks.withType(Test) { - if (!name.startsWith('integ')) { - useTestNG() - } - } - checkstyle { configDirectory = file("${project.rootDir}/gradle/checkstyle") sourceSets = [ getProject().sourceSets.main, getProject().sourceSets.test ] @@ -292,6 +287,13 @@ subprojects { javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(11) } + // https://docs.gradle.org/current/userguide/performance.html + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + + if (project.configurations.getByName("testImplementation").getDependencies() + .any{ it.getName() == "testng" }) { + useTestNG() + } } afterEvaluate { diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index eb95078b1a640..98f3b82285eda 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -11,16 +11,19 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.util.Configuration; +import config.ConfigurationProvider; import controllers.SsoCallbackController; + import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; + import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; @@ -34,6 +37,7 @@ import org.pac4j.play.store.PlayCookieSessionStore; import org.pac4j.play.store.PlaySessionStore; import org.pac4j.play.store.ShiroAesDataEncrypter; +import org.springframework.context.annotation.AnnotationConfigApplicationContext; import play.Environment; import play.cache.SyncCacheApi; import utils.ConfigUtil; @@ -104,7 +108,7 @@ protected void configure() { bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor( SsoManager.class, Authentication.class, - EntityClient.class, + SystemEntityClient.class, AuthServiceClient.class, com.typesafe.config.Config.class)); } catch (NoSuchMethodException | SecurityException e) { @@ -161,10 +165,19 @@ protected Authentication provideSystemAuthentication() { @Provides @Singleton - protected EntityClient provideEntityClient() { - return new RestliEntityClient(buildRestliClient(), + protected ConfigurationProvider provideConfigurationProvider() { + AnnotationConfigApplicationContext context = new AnnotationConfigApplicationContext(ConfigurationProvider.class); + return context.getBean(ConfigurationProvider.class); + } + + @Provides + @Singleton + protected SystemEntityClient provideEntityClient(final Authentication systemAuthentication, + final ConfigurationProvider configurationProvider) { + return new SystemRestliEntityClient(buildRestliClient(), new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)), - _configs.getInt(ENTITY_CLIENT_NUM_RETRIES)); + _configs.getInt(ENTITY_CLIENT_NUM_RETRIES), systemAuthentication, + configurationProvider.getCache().getClient().getEntityClient()); } @Provides diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java index 85139d1db0868..4bde0872fc082 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java @@ -13,7 +13,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.SetMode; import com.linkedin.entity.Entity; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.identity.CorpGroupInfo; import com.linkedin.identity.CorpUserEditableInfo; @@ -78,13 +78,14 @@ public class OidcCallbackLogic extends DefaultCallbackLogic { private final SsoManager _ssoManager; - private final EntityClient _entityClient; + private final SystemEntityClient _entityClient; private final Authentication _systemAuthentication; private final AuthServiceClient _authClient; private final CookieConfigs _cookieConfigs; public OidcCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication, - final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { + final SystemEntityClient entityClient, final AuthServiceClient authClient, + final CookieConfigs cookieConfigs) { _ssoManager = ssoManager; _systemAuthentication = systemAuthentication; _entityClient = entityClient; diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java new file mode 100644 index 0000000000000..00a5472ec3476 --- /dev/null +++ b/datahub-frontend/app/config/ConfigurationProvider.java @@ -0,0 +1,27 @@ +package config; + +import com.linkedin.metadata.config.cache.CacheConfiguration; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; +import lombok.Data; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.PropertySource; + + +/** + * Minimal sharing between metadata-service and frontend + * Initially for use of client caching configuration. + * Does not use the factories module to avoid transitive dependencies. + */ +@EnableConfigurationProperties +@PropertySource(value = "application.yml", factory = YamlPropertySourceFactory.class) +@ConfigurationProperties +@Data +public class ConfigurationProvider { + + /** + * Configuration for caching + */ + private CacheConfiguration cache; +} diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 5a36d833deceb..7a4b5585cc21a 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -3,7 +3,7 @@ import auth.CookieConfigs; import client.AuthServiceClient; import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; @@ -40,7 +40,7 @@ public class SsoCallbackController extends CallbackController { public SsoCallbackController( @Nonnull SsoManager ssoManager, @Nonnull Authentication systemAuthentication, - @Nonnull EntityClient entityClient, + @Nonnull SystemEntityClient entityClient, @Nonnull AuthServiceClient authClient, @Nonnull com.typesafe.config.Config configs) { _ssoManager = ssoManager; @@ -79,7 +79,7 @@ public class SsoCallbackLogic implements CallbackLogic { private final OidcCallbackLogic _oidcCallbackLogic; SsoCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication, - final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { + final SystemEntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { _oidcCallbackLogic = new OidcCallbackLogic(ssoManager, systemAuthentication, entityClient, authClient, cookieConfigs); } diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index e40f8e3eeb96d..daecba16cbf72 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -16,9 +16,6 @@ dependencies { implementation project(':datahub-web-react') constraints { - play(externalDependency.springCore) - play(externalDependency.springBeans) - play(externalDependency.springContext) play(externalDependency.jacksonDataBind) play('com.nimbusds:oauth2-oidc-sdk:8.36.2') play('com.nimbusds:nimbus-jose-jwt:8.18') @@ -35,7 +32,12 @@ dependencies { implementation project(":metadata-service:restli-client") implementation project(":metadata-service:auth-config") + implementation project(":metadata-service:configuration") + implementation externalDependency.springCore + implementation externalDependency.springBeans + implementation externalDependency.springContext + implementation externalDependency.springBootAutoconfigure implementation externalDependency.jettyJaas implementation externalDependency.graphqlJava implementation externalDependency.antlr4Runtime diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index 89ba8f17b6aeb..fba0031351b58 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -24,6 +24,7 @@ dependencies { annotationProcessor externalDependency.lombok testImplementation externalDependency.mockito + testImplementation externalDependency.testng } graphqlCodegen { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index d86234cf59306..3ba0cc1f747e3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -302,6 +302,7 @@ import com.linkedin.datahub.graphql.types.test.TestType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -364,6 +365,7 @@ public class GmsGraphQLEngine { private final EntityClient entityClient; + private final SystemEntityClient systemEntityClient; private final GraphClient graphClient; private final UsageClient usageClient; private final SiblingGraphService siblingGraphService; @@ -476,6 +478,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.graphQLPlugins.forEach(plugin -> plugin.init(args)); this.entityClient = args.entityClient; + this.systemEntityClient = args.systemEntityClient; this.graphClient = args.graphClient; this.usageClient = args.usageClient; this.siblingGraphService = args.siblingGraphService; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index cbcf42c4f93d9..157fb10ce7078 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -38,6 +39,7 @@ @Data public class GmsGraphQLEngineArgs { EntityClient entityClient; + SystemEntityClient systemEntityClient; GraphClient graphClient; UsageClient usageClient; AnalyticsService analyticsService; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java index 44b1779f8b006..4135a7b0da148 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java @@ -20,25 +20,25 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.Aggregations; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; -import org.elasticsearch.search.aggregations.bucket.filter.Filter; -import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.Cardinality; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.opensearch.search.aggregations.bucket.filter.Filter; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.histogram.Histogram; +import org.opensearch.search.aggregations.bucket.terms.Terms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.metrics.Cardinality; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java index f27fd604a746f..23be49c7e7140 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java @@ -1,13 +1,16 @@ package com.linkedin.datahub.graphql.resolvers.dataset; +import com.datahub.authorization.ResourceSpec; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.usage.UsageClient; import com.linkedin.usage.UsageTimeRange; import com.linkedin.usage.UserUsageCounts; @@ -15,6 +18,7 @@ import graphql.schema.DataFetchingEnvironment; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -55,8 +59,15 @@ public CompletableFuture get(DataFetchingEnvironment enviro try { + if (!isAuthorized(resourceUrn, context)) { + log.debug("User {} is not authorized to view profile information for dataset {}", + context.getActorUrn(), + resourceUrn.toString()); + return null; + } + com.linkedin.usage.UsageQueryResult - usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH, context.getAuthentication()); + usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH); final DatasetStatsSummary result = new DatasetStatsSummary(); result.setQueryCountLast30Days(usageQueryResult.getAggregations().getTotalSqlQueries()); @@ -90,4 +101,10 @@ private CorpUser createPartialUser(final Urn userUrn) { result.setUrn(userUrn.toString()); return result; } + + private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) { + return AuthorizationUtils.isAuthorized(context, + Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())), + PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java index 0476963b92e9a..20361830ad5a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java @@ -9,12 +9,10 @@ import com.linkedin.datahub.graphql.generated.UsageQueryResult; import com.linkedin.datahub.graphql.types.usage.UsageQueryResultMapper; import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.r2.RemoteInvocationException; import com.linkedin.usage.UsageClient; import com.linkedin.usage.UsageTimeRange; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; -import java.net.URISyntaxException; import java.util.Optional; import java.util.concurrent.CompletableFuture; import lombok.extern.slf4j.Slf4j; @@ -44,10 +42,10 @@ public CompletableFuture get(DataFetchingEnvironment environme } try { com.linkedin.usage.UsageQueryResult - usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range, context.getAuthentication()); + usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range); return UsageQueryResultMapper.map(usageQueryResult); - } catch (RemoteInvocationException | URISyntaxException e) { - throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn.toString()), e); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn), e); } }); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java index c391615db9268..7cd548a4790ba 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java @@ -15,7 +15,6 @@ import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import graphql.schema.DataFetchingEnvironment; -import junit.framework.TestCase; import org.testng.annotations.Test; import org.mockito.Mockito; @@ -24,9 +23,10 @@ import java.util.stream.Collectors; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static org.testng.AssertJUnit.assertEquals; -public class ResolverUtilsTest extends TestCase { +public class ResolverUtilsTest { @Test public void testCriterionFromFilter() throws Exception { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index 8c23335b7e9d3..52d06f73dcfab 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.auth; +import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.TestUtils; @@ -8,17 +9,21 @@ import com.linkedin.datahub.graphql.generated.ListAccessTokenResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetchingEnvironment; import java.util.Collections; -import junit.framework.TestCase; import org.mockito.Mockito; +import org.testng.annotations.Test; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; -public class ListAccessTokensResolverTest extends TestCase { +public class ListAccessTokensResolverTest { -// @Test + @Test public void testGetSuccess() throws Exception { final DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); final QueryContext mockAllowContext = TestUtils.getMockAllowContext(); @@ -36,14 +41,17 @@ public void testGetSuccess() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); final EntityClient mockClient = Mockito.mock(EntityClient.class); - Mockito.when(mockClient.filter( - Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), - Mockito.eq(buildFilter(filters, Collections.emptyList())), - Mockito.notNull(), - Mockito.eq(input.getStart()), - Mockito.eq(input.getCount()), - Mockito.eq(getAuthentication(mockEnv)))) - .thenReturn(null); + final Authentication testAuth = getAuthentication(mockEnv); + Mockito.when(mockClient.search( + Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), + Mockito.eq(""), + Mockito.eq(buildFilter(filters, Collections.emptyList())), + Mockito.any(SortCriterion.class), + Mockito.eq(input.getStart()), + Mockito.eq(input.getCount()), + Mockito.eq(testAuth), + Mockito.any(SearchFlags.class))) + .thenReturn(new SearchResult().setFrom(0).setNumEntities(0).setPageSize(0).setEntities(new SearchEntityArray())); final ListAccessTokensResolver resolver = new ListAccessTokensResolver(mockClient); final ListAccessTokenResult listAccessTokenResult = resolver.get(mockEnv).get(); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java index 163628c1bc590..6a9617ea41b44 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java @@ -117,8 +117,7 @@ public void testGetException() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DASHBOARD_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenThrow(RuntimeException.class); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java index bd3edf65bf7ad..013e23b779c51 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java @@ -1,6 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dataset; import com.datahub.authentication.Authentication; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -53,13 +55,18 @@ public void testGetSuccess() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenReturn(testResult); // Execute resolver DatasetStatsSummaryResolver resolver = new DatasetStatsSummaryResolver(mockClient); QueryContext mockContext = Mockito.mock(QueryContext.class); + Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); + Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); + AuthorizationResult mockAuthorizerResult = Mockito.mock(AuthorizationResult.class); + Mockito.when(mockAuthorizerResult.getType()).thenReturn(AuthorizationResult.Type.ALLOW); + Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(mockAuthorizerResult); + Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getSource()).thenReturn(TEST_SOURCE); @@ -79,8 +86,7 @@ public void testGetSuccess() throws Exception { newResult.setAggregations(new UsageQueryResultAggregations()); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenReturn(newResult); // Then verify that the new result is _not_ returned (cache hit) @@ -116,8 +122,7 @@ public void testGetException() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenThrow(RuntimeException.class); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java index 2d9f43029c479..ebafd1782e000 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java @@ -14,11 +14,12 @@ import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; import java.util.HashSet; + import org.mockito.Mockito; import org.testng.annotations.Test; -import static org.testng.Assert.*; import static com.linkedin.datahub.graphql.resolvers.ingest.IngestTestUtils.*; +import static org.testng.Assert.assertThrows; public class GetIngestionSourceResolverTest { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java index d8325e9a74740..1adf7b1200574 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java @@ -6,7 +6,6 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.SiblingsUtils; import com.linkedin.metadata.entity.EntityService; -import junit.framework.TestCase; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -14,8 +13,9 @@ import java.util.Optional; import static com.linkedin.metadata.Constants.SIBLINGS_ASPECT_NAME; +import static org.testng.AssertJUnit.assertEquals; -public class SiblingsUtilsTest extends TestCase { +public class SiblingsUtilsTest { private static final String TEST_DATASET_URN1 = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)"; private static final String TEST_DATASET_URN2 = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created2,PROD)"; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java index 989ebc18e9f6c..0a58ff88586c6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java @@ -1,12 +1,13 @@ package com.linkedin.datahub.graphql.utils; import com.linkedin.datahub.graphql.util.DateUtil; -import junit.framework.TestCase; import org.joda.time.DateTime; import org.mockito.Mockito; import org.testng.annotations.Test; -public class DateUtilTest extends TestCase { +import static org.testng.AssertJUnit.assertEquals; + +public class DateUtilTest { private DateTime setTimeParts(int dayOfMonth, boolean zeroTime) { DateTime result = new DateTime() diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java index e205fd2f5c20e..270aa11c7b070 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java @@ -1,11 +1,10 @@ package com.linkedin.datahub.upgrade.common.steps; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import java.util.function.Function; import lombok.RequiredArgsConstructor; @@ -13,8 +12,7 @@ @RequiredArgsConstructor public class GMSDisableWriteModeStep implements UpgradeStep { - private final Authentication _systemAuthentication; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; @Override public String id() { @@ -30,7 +28,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - _entityClient.setWritable(false, _systemAuthentication); + _entityClient.setWritable(false); } catch (Exception e) { e.printStackTrace(); context.report().addLine("Failed to turn write mode off in GMS"); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java index 270eff8df227c..8df02123983e8 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java @@ -1,20 +1,17 @@ package com.linkedin.datahub.upgrade.common.steps; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import java.util.function.Function; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor public class GMSEnableWriteModeStep implements UpgradeStep { - - private final Authentication _systemAuthentication; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; @Override public String id() { @@ -30,7 +27,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - _entityClient.setWritable(true, _systemAuthentication); + _entityClient.setWritable(true); } catch (Exception e) { e.printStackTrace(); context.report().addLine("Failed to turn write mode back on in GMS"); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java index 0fb8b0eb6e20f..23ea81009fa1d 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java @@ -5,7 +5,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import io.ebean.Database; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java index 30175c6fa78c8..cd264e529e9a5 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java @@ -1,8 +1,7 @@ package com.linkedin.datahub.upgrade.config; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.models.registry.EntityRegistry; import io.ebean.Database; @@ -21,15 +20,14 @@ public class NoCodeUpgradeConfig { ApplicationContext applicationContext; @Bean(name = "noCodeUpgrade") - @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "entityRegistry"}) + @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "entityRegistry"}) @Nonnull public NoCodeUpgrade createInstance() { final Database ebeanServer = applicationContext.getBean(Database.class); final EntityService entityService = applicationContext.getBean(EntityService.class); - final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class); - return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient); + return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, entityClient); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java index 9b0fcf279abf5..97a08800534de 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java @@ -1,8 +1,7 @@ package com.linkedin.datahub.upgrade.config; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -22,19 +21,18 @@ public class RestoreBackupConfig { ApplicationContext applicationContext; @Bean(name = "restoreBackup") - @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "graphService", + @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "graphService", "searchService", "entityRegistry"}) @Nonnull public RestoreBackup createInstance() { final Database ebeanServer = applicationContext.getBean(Database.class); final EntityService entityService = applicationContext.getBean(EntityService.class); - final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final GraphService graphClient = applicationContext.getBean(GraphService.class); final EntitySearchService searchClient = applicationContext.getBean(EntitySearchService.class); final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class); - return new RestoreBackup(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient, + return new RestoreBackup(ebeanServer, entityService, entityRegistry, entityClient, graphClient, searchClient); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java index ee4a3bc504e77..a299deb874721 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java @@ -1,13 +1,12 @@ package com.linkedin.datahub.upgrade.nocode; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableMap; import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep; import com.linkedin.datahub.upgrade.common.steps.GMSQualificationStep; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.models.registry.EntityRegistry; import io.ebean.Database; @@ -30,12 +29,10 @@ public NoCodeUpgrade( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient) { + final SystemRestliEntityClient entityClient) { _steps = buildUpgradeSteps( server, entityService, entityRegistry, - systemAuthentication, entityClient); _cleanupSteps = buildCleanupSteps(); } @@ -63,15 +60,14 @@ private List buildUpgradeSteps( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient) { + final SystemRestliEntityClient entityClient) { final List steps = new ArrayList<>(); steps.add(new RemoveAspectV2TableStep(server)); steps.add(new GMSQualificationStep(ImmutableMap.of("noCode", "true"))); steps.add(new UpgradeQualificationStep(server)); steps.add(new CreateAspectTableStep(server)); steps.add(new DataMigrationStep(server, entityService, entityRegistry)); - steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSEnableWriteModeStep(entityClient)); return steps; } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java index 15bbe40d1e566..9a64d5fe1810c 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java @@ -7,9 +7,9 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import java.util.function.Function; import lombok.RequiredArgsConstructor; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; // Do we need SQL-tech specific migration paths? diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java index 2b5e23c5f8269..a5d8d6ce9b666 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java @@ -9,7 +9,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; public class NoCodeCleanupUpgrade implements Upgrade { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java index 67718a6739beb..9175ad606e3c8 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java @@ -1,6 +1,5 @@ package com.linkedin.datahub.upgrade.restorebackup; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; @@ -9,7 +8,7 @@ import com.linkedin.datahub.upgrade.common.steps.ClearSearchServiceStep; import com.linkedin.datahub.upgrade.common.steps.GMSDisableWriteModeStep; import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -27,11 +26,10 @@ public RestoreBackup( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient, + final SystemRestliEntityClient entityClient, final GraphService graphClient, final EntitySearchService searchClient) { - _steps = buildSteps(server, entityService, entityRegistry, systemAuthentication, entityClient, graphClient, searchClient); + _steps = buildSteps(server, entityService, entityRegistry, entityClient, graphClient, searchClient); } @Override @@ -48,17 +46,16 @@ private List buildSteps( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient, + final SystemRestliEntityClient entityClient, final GraphService graphClient, final EntitySearchService searchClient) { final List steps = new ArrayList<>(); - steps.add(new GMSDisableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSDisableWriteModeStep(entityClient)); steps.add(new ClearSearchServiceStep(searchClient, true)); steps.add(new ClearGraphServiceStep(graphClient, true)); steps.add(new ClearAspectV2TableStep(server)); steps.add(new RestoreStorageStep(entityService, entityRegistry)); - steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSEnableWriteModeStep(entityClient)); return steps; } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java index 465a5fe342667..2feca1f27e625 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java @@ -16,8 +16,8 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; -import org.elasticsearch.client.RequestOptions; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.client.RequestOptions; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java index 6f2f3a8bd727c..82b9428c89fb8 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java @@ -19,10 +19,10 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.indices.ResizeRequest; +import org.opensearch.OpenSearchStatusException; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.indices.ResizeRequest; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; @@ -97,7 +97,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc ack = _esComponents.getSearchClient().indices() .putSettings(request, RequestOptions.DEFAULT).isAcknowledged(); log.info("Updated index {} with new settings. Settings: {}, Acknowledged: {}", indexName, indexSettings, ack); - } catch (ElasticsearchStatusException | IOException ese) { + } catch (OpenSearchStatusException | IOException ese) { // Cover first run case, indices won't exist so settings updates won't work nor will the rest of the preConfigure steps. // Since no data are in there they are skippable. // Have to hack around HighLevelClient not sending the actual Java type nor having an easy way to extract it :( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java index f60aa283c0140..bb042bac6df95 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java @@ -9,7 +9,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import java.util.List; import java.util.function.Function; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java index fa414798ccfea..4b04feac62cbf 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java @@ -4,12 +4,12 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.NotImplementedException; -import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; -import org.elasticsearch.client.GetAliasesResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest; +import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest; +import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.client.GetAliasesResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; import java.io.IOException; import java.util.ArrayList; diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx index 486169c3559d3..0e5c035df00c1 100644 --- a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx +++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx @@ -17,6 +17,7 @@ const PageWrapper = styled.div` flex: 1; display: flex; flex-direction: column; + overflow: hidden; `; const Header = styled.div` diff --git a/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx b/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx index e36bf1972a56e..d9314df7e11ae 100644 --- a/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx +++ b/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx @@ -138,6 +138,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod placeholder="John Smith" value={data.name} onChange={(event) => setData({ ...data, name: event.target.value })} + disabled={readOnlyModeEnabled} /> setData({ ...data, title: event.target.value })} + disabled={readOnlyModeEnabled} /> setData({ ...data, team: event.target.value })} + disabled={readOnlyModeEnabled} /> setData({ ...data, email: event.target.value })} + disabled={readOnlyModeEnabled} /> setData({ ...data, slack: event.target.value })} + disabled={readOnlyModeEnabled} /> setData({ ...data, phone: event.target.value })} + disabled={readOnlyModeEnabled} /> diff --git a/datahub-web-react/src/app/settings/SettingsPage.tsx b/datahub-web-react/src/app/settings/SettingsPage.tsx index 339cc0cf44bac..06592656ac719 100644 --- a/datahub-web-react/src/app/settings/SettingsPage.tsx +++ b/datahub-web-react/src/app/settings/SettingsPage.tsx @@ -89,12 +89,13 @@ export const SettingsPage = () => { const isPoliciesEnabled = config?.policiesConfig.enabled; const isIdentityManagementEnabled = config?.identityManagementConfig.enabled; const isViewsEnabled = config?.viewsConfig.enabled; + const { readOnlyModeEnabled } = config.featureFlags; const showPolicies = (isPoliciesEnabled && me && me?.platformPrivileges?.managePolicies) || false; const showUsersGroups = (isIdentityManagementEnabled && me && me?.platformPrivileges?.manageIdentities) || false; const showViews = isViewsEnabled || false; const showOwnershipTypes = me && me?.platformPrivileges?.manageOwnershipTypes; - const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements; + const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements && !readOnlyModeEnabled; return ( diff --git a/docker/build.gradle b/docker/build.gradle index ae101fe1defc5..0faea626e982d 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -38,6 +38,16 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') { // environment "ACTIONS_VERSION", 'alpine3.17-slim' // environment "DATAHUB_ACTIONS_IMAGE", 'nginx' + // Elastic + // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch' + // environment "DATAHUB_SEARCH_TAG", '7.10.1' + + // OpenSearch + environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch' + environment "DATAHUB_SEARCH_TAG", '2.9.0' + environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true' + environment "USE_AWS_ELASTICSEARCH", 'true' + def cmd = [ 'source ../metadata-ingestion/venv/bin/activate && ', 'datahub docker quickstart', diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index 08f8cc1ec9c45..9543e67da07f2 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -96,6 +96,9 @@ services: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile env_file: elasticsearch-setup/env/docker.env + environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} depends_on: elasticsearch: condition: service_healthy @@ -117,13 +120,13 @@ services: elasticsearch: container_name: elasticsearch hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - 9200:9200 env_file: elasticsearch/env/docker.env environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} healthcheck: test: curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s start_period: 5s diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 0b2e4f76b8fa9..022362782f742 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -81,6 +81,9 @@ services: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile env_file: elasticsearch-setup/env/docker.env + environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} depends_on: elasticsearch: condition: service_healthy @@ -104,13 +107,13 @@ services: elasticsearch: container_name: elasticsearch hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 env_file: elasticsearch/env/docker.env environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} deploy: resources: limits: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index d07ea5fa88f8b..a486689e050a2 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -83,6 +83,9 @@ services: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile env_file: elasticsearch-setup/env/docker.env + environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} depends_on: elasticsearch: condition: service_healthy @@ -109,13 +112,13 @@ services: elasticsearch: container_name: elasticsearch hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 env_file: elasticsearch/env/docker.env environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} deploy: resources: limits: diff --git a/docker/elasticsearch/env/docker.env b/docker/elasticsearch/env/docker.env index 4b1f0215ea6c8..46b5836dedd28 100644 --- a/docker/elasticsearch/env/docker.env +++ b/docker/elasticsearch/env/docker.env @@ -1 +1,2 @@ ES_JAVA_OPTS="-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true" +OPENSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true" \ No newline at end of file diff --git a/docker/postgres-setup/init.sh b/docker/postgres-setup/init.sh index 6c0adc8c69bdd..afc9bdfe4c668 100755 --- a/docker/postgres-setup/init.sh +++ b/docker/postgres-setup/init.sh @@ -1,8 +1,13 @@ #!/bin/sh export PGPASSWORD=$POSTGRES_PASSWORD +POSTGRES_CREATE_DB=${POSTGRES_CREATE_DB:-true} +POSTGRES_CREATE_DB_CONNECTION_DB=${POSTGRES_CREATE_DB_CONNECTION_DB:-postgres} + # workaround create database if not exists, check https://stackoverflow.com/a/36591842 -psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -c "CREATE DATABASE ${DATAHUB_DB_NAME}" +if [ "$POSTGRES_CREATE_DB" = true ]; then + psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -c "CREATE DATABASE ${DATAHUB_DB_NAME}" +fi sed -e "s/DATAHUB_DB_NAME/${DATAHUB_DB_NAME}/g" /init.sql | tee -a /tmp/init-final.sql -psql -d $DATAHUB_DB_NAME -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT < /tmp/init-final.sql +psql -d "$DATAHUB_DB_NAME" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" < /tmp/init-final.sql diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 38418bc8c41b9..89e9aaa0defd6 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -161,8 +161,9 @@ services: memory: 1G environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true healthcheck: interval: 1s retries: 3 @@ -170,7 +171,7 @@ services: test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: @@ -181,6 +182,8 @@ services: elasticsearch: condition: service_healthy environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index cf879faa6a3f0..f6284edc83648 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -154,8 +154,9 @@ services: memory: 1G environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true healthcheck: interval: 1s retries: 3 @@ -163,7 +164,7 @@ services: test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: @@ -174,6 +175,8 @@ services: elasticsearch: condition: service_healthy environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 007830078d2b4..4e3503e35c0db 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -154,8 +154,9 @@ services: memory: 1G environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true healthcheck: interval: 1s retries: 3 @@ -163,7 +164,7 @@ services: test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: @@ -174,6 +175,8 @@ services: elasticsearch: condition: service_healthy environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 390543b92123f..e2f52064389e0 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -161,8 +161,9 @@ services: memory: 1G environment: - discovery.type=single-node - - xpack.security.enabled=false + - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false} - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true healthcheck: interval: 1s retries: 3 @@ -170,7 +171,7 @@ services: test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch - image: elasticsearch:7.10.1 + image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} ports: - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: @@ -181,6 +182,8 @@ services: elasticsearch: condition: service_healthy environment: + - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false} + - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false} - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http diff --git a/docs-website/markdown-link-check-config.json b/docs-website/markdown-link-check-config.json index 26e040edde6f7..2f5a51ada324e 100644 --- a/docs-website/markdown-link-check-config.json +++ b/docs-website/markdown-link-check-config.json @@ -1,50 +1,41 @@ { "ignorePatterns": [ { - "pattern": "^http://demo\\.datahubproject\\.io" + "pattern": "^https?://demo\\.datahubproject\\.io" }, { - "pattern": "^http://localhost" + "pattern": "^http://localhost" }, { - "pattern": "^http://www.famfamfam.com" + "pattern": "^/docs" }, { - "pattern": "^http://www.linkedin.com" + "pattern": "^/integrations" }, { - "pattern": "\\.md$" + "pattern": "^https?://www.linkedin.com" }, { - "pattern":"\\.json$" + "pattern": "\\.md(#.*)?$" }, { - "pattern":"\\.txt$" + "pattern": "\\.json$" }, { - "pattern": "\\.java$" + "pattern": "\\.txt$" }, { - "pattern": "\\.md#.*$" + "pattern": "\\.java$" }, { - "pattern": "^https://oauth2.googleapis.com/token" + "pattern": "^https://oauth2.googleapis.com/token" }, { - "pattern": "^https://login.microsoftonline.com/common/oauth2/na$" + "pattern": "^https://login.microsoftonline.com/common/oauth2/na$" }, { - "pattern": "#v(\\d+)-(\\d+)-(\\d+)" - }, - { - "pattern": "^https://github.com/mohdsiddique$" - }, - { - "pattern": "^https://github.com/2x$" - }, - { - "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$" + "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$" } ], - "aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403] -} \ No newline at end of file + "aliveStatusCodes": [200, 206, 0, 999] +} diff --git a/docs-website/package.json b/docs-website/package.json index 1722f92169692..eca6e5814d3c6 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -17,8 +17,10 @@ "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs", "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs", "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js", - "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json", - "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js" + "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js", + "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js", + "_list-link-check-files": "find ./genDocs -name '*.md' -not \\( -path './genDocs/python-sdk/*' -o -path './genDocs/releases.md' \\)", + "check-links": "yarn run -s _list-link-check-files -print0 | xargs -0 -n1 -t markdown-link-check -q -c markdown-link-check-config.json" }, "dependencies": { "@ant-design/icons": "^4.7.0", diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 43d881cd641a6..7b94f77a0aa8d 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -444,6 +444,7 @@ module.exports = { Observability: [ "docs/managed-datahub/observe/freshness-assertions", "docs/managed-datahub/observe/volume-assertions", + "docs/managed-datahub/observe/custom-assertions", ], }, ], diff --git a/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss b/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss new file mode 100644 index 0000000000000..fcd3666d03ddc --- /dev/null +++ b/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss @@ -0,0 +1,24 @@ +.flexCol { + display: flex; +} + +.ctaCard { + flex-direction: row; + align-items: flex-start; + justify-content: space-between; + row-gap: 1rem; + padding: 1rem; + &:hover { + text-decoration: none; + border: 1px solid var(--ifm-color-primary); + background-color: var(--ifm-background-surface-color); + } + margin-bottom: 1rem; + flex: 1; +} + +.ctaHeading { + margin-bottom: 0; + display: flex; + align-items: center; +} diff --git a/docs-website/src/pages/_components/CardCTAs/index.js b/docs-website/src/pages/_components/CardCTAs/index.js new file mode 100644 index 0000000000000..d87c803b42818 --- /dev/null +++ b/docs-website/src/pages/_components/CardCTAs/index.js @@ -0,0 +1,52 @@ +import React from "react"; +import clsx from "clsx"; +import styles from "./cardCTAs.module.scss"; +import useBaseUrl from "@docusaurus/useBaseUrl"; +import { ArrowRightOutlined } from "@ant-design/icons"; + +const cardsContent = [ + { + label: "Data Mesh", + title: "Data Products, Delivered", + url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know", + }, + { + label: "Data Contracts", + title: "End-to-end Reliability in Data", + url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management", + }, + { + label: "Shift Left", + title: "Developer-friendly Data Governance", + url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2", + }, +]; + +const Card = ({ label, title, url }) => { + return ( + + ); +}; + +const CardCTAs = () => + cardsContent?.length > 0 ? ( +
+
+
+ {cardsContent.map((props, idx) => ( + + ))} +
+
+
+ ) : null; + +export default CardCTAs; diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js index b5fa04c80faee..22b406dce037e 100644 --- a/docs-website/src/pages/_components/Hero/index.js +++ b/docs-website/src/pages/_components/Hero/index.js @@ -7,6 +7,7 @@ import { useColorMode } from "@docusaurus/theme-common"; import { QuestionCircleOutlined } from "@ant-design/icons"; import styles from "./hero.module.scss"; import CodeBlock from "@theme/CodeBlock"; +import CardCTAs from "../CardCTAs"; const HeroAnnouncement = ({ message, linkUrl, linkText }) => (
@@ -33,7 +34,12 @@ const Hero = ({}) => { complexity of your data ecosystem.

-Built with ❤️ by Acryl Data and LinkedIn. + Built with ❤️ by{" "} + {" "} + + Acryl Data + {" "} + and LinkedIn.

Get Started → @@ -43,6 +49,7 @@ Built with ❤️ by DataHub Flow Diagram

Get Started Now

diff --git a/docs-website/src/pages/docs/index.js b/docs-website/src/pages/docs/index.js index a0462091a046d..0e8bfdcf3b9d7 100644 --- a/docs-website/src/pages/docs/index.js +++ b/docs-website/src/pages/docs/index.js @@ -180,8 +180,8 @@ const quickLinkContent = [ { title: "Developer Guides", icon: , - description: "Interact with DataHub programmatically ", - to: "/docs/cli", + description: "Interact with DataHub programmatically", + to: "/docs/api/datahub-apis", }, { title: "Feature Guides", diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index 27d8b15e5a73a..e3606f2a3e48d 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -145,28 +145,31 @@ For example, the following resource filter will apply the policy to datasets, ch ```json { - "resource": { - "criteria": [ - { - "field": "resource_type", - "values": [ - "dataset", - "chart", - "dashboard" - ], - "condition": "EQUALS" - }, - { - "field": "domain", - "values": [ - "urn:li:domain:domain1" - ], - "condition": "EQUALS" + "resources": { + "filter": { + "criteria": [ + { + "field": "RESOURCE_TYPE", + "condition": "EQUALS", + "values": [ + "dataset", + "chart", + "dashboard" + ] + }, + { + "field": "DOMAIN", + "values": [ + "urn:li:domain:domain1" + ], + "condition": "EQUALS" + } + ] } - ] - } + } } ``` +Where `resources` is inside the `info` aspect of a Policy. Supported fields are as follows diff --git a/docs/datahub_lite.md b/docs/datahub_lite.md index 3918b8cee7830..de0a20eed1d01 100644 --- a/docs/datahub_lite.md +++ b/docs/datahub_lite.md @@ -7,7 +7,6 @@ import TabItem from '@theme/TabItem'; DataHub Lite is a lightweight embeddable version of DataHub with no external dependencies. It is intended to enable local developer tooling use-cases such as simple access to metadata for scripts and other tools. DataHub Lite is compatible with the DataHub metadata format and all the ingestion connectors that DataHub supports. -It was built as a reaction to [recap](https://github.com/recap-cloud/recap) to prove that a similar lightweight system could be built within DataHub quite easily. Currently DataHub Lite uses DuckDB under the covers as its default storage layer, but that might change in the future. ## Features diff --git a/docs/managed-datahub/observe/custom-assertions.md b/docs/managed-datahub/observe/custom-assertions.md new file mode 100644 index 0000000000000..e221cf1058fd0 --- /dev/null +++ b/docs/managed-datahub/observe/custom-assertions.md @@ -0,0 +1,315 @@ +--- +description: This page provides an overview of working with DataHub SQL Assertions +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + + +# Custom Assertions + + + + +> ⚠️ The **Custom Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a +> limited set of design partners. +> +> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success +> representative. + +## Introduction + +Can you remember a time when the meaning of Data Warehouse Table that you depended on fundamentally changed, with little or no notice? +If the answer is yes, how did you find out? We'll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when +a number looked a bit out of the ordinary. Perhaps your table initially tracked purchases made on your company's e-commerce web store, but suddenly began to include purchases made +through your company's new mobile app. + +There are many reasons why an important Table on Snowflake, Redshift, or BigQuery may change in its meaning - application code bugs, new feature rollouts, +changes to key metric definitions, etc. Often times, these changes break important assumptions made about the data used in building key downstream data products +like reporting dashboards or data-driven product features. + +What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data +issues _before_ anyone else? With Acryl DataHub **Custom Assertions**, you can. + +Acryl DataHub allows users to define complex expectations about a particular warehouse Table through custom SQL queries, and then monitor those expectations over time as the table grows and changes. + +In this article, we'll cover the basics of monitoring Custom Assertions - what they are, how to configure them, and more - so that you and your team can +start building trust in your most important data assets. + +Let's get started! + +## Support + +Custom Assertions are currently supported for: + +1. Snowflake +2. Redshift +3. BigQuery + +Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** +tab. + +> Note that SQL Assertions are not yet supported if you are connecting to your warehouse +> using the DataHub CLI or a Remote Ingestion Executor. + +## What is a Custom Assertion? + +A **Custom Assertion** is a highly configurable Data Quality rule used to monitor a Data Warehouse Table +for unexpected or sudden changes in its meaning. Custom Assertions are defined through a raw SQL query that is evaluated against +the Table. You have full control over the SQL query, and can use any SQL features supported by your Data Warehouse. +Custom Assertions can be particularly useful when you have complex tables or relationships +that are used to generate important metrics or reports, and where the meaning of the table is expected to be stable over time. +If you have existing SQL queries that you already use to monitor your data, you may find that Custom Assertions are an easy way to port them +to Acryl DataHub to get started. + +For example, imagine that you have a Table that tracks the number of purchases made on your company's e-commerce web store. +You have a SQL query that you use to calculate the number of purchases made in the past 24 hours, and you'd like to monitor this +metric over time to ensure that it is always greater than 1000. You can use a Custom Assertion to do this! + + +### Anatomy of a Custom Assertion + +At the most basic level, **Custom Assertions** consist of a few important parts: + +1. An **Evaluation Schedule** +2. A **Query** +3. An **Condition Type** +4. An **Assertion Description** + +In this section, we'll give an overview of each. + +#### 1. Evaluation Schedule + +The **Evaluation Schedule**: This defines how often to query the given warehouse Table. This should usually +be configured to match the expected change frequency of the Table, although it can also be less frequently depending +on the requirements. You can also specify specific days of the week, hours in the day, or even +minutes in an hour. + + +#### 2. Query + +The **Query**: This is the SQL query that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. This requires that the configured user account has read access to the asset. Make sure to use the fully qualified name of the Table in your query. + + +Use the "Try it out" button to test your query and ensure that it returns a single row with a single column. The query will be run against the Table in the context of the configured user account, so ensure that the user has read access to the Table. + + +#### 3. Condition Type + +The **Condition Type**: This defines the conditions under which the Assertion will **fail**. The list of supported operations is: +- **Is Equal To**: The assertion will fail if the query result is equal to the configured value +- **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value +- **Is Greater Than**: The assertion will fail if the query result is greater than the configured value +- **Is Less Than**: The assertion will fail if the query result is less than the configured value +- **Is outside a range**: The assertion will fail if the query result is outside the configured range +- **Grows More Than**: The assertion will fail if the query result grows more than the configured range. This can be either a percentage (**Percentage**) or a number (**Value**). +- **Grows Less Than**: The assertion will fail if the query result grows less than the configured percentage. This can be either a percentage (**Percentage**) or a number (**Value**). +- **Growth is outside a range**: The assertion will fail if the query result growth is outside the configured range. This can be either a percentage (**Percentage**) or a number (**Value**). + +Custom Assertions also have an off switch: they can be started or stopped at any time with the click of button. + +#### 4. Assertion Description + +The **Assertion Description**: This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. + + +## Creating a Custom Assertion + +### Prerequisites + +1. **Permissions**: To create or delete Custom Assertions for a specific entity on DataHub, you'll need to be granted the + `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + +2. **Data Platform Connection**: In order to create a Custom Assertion, you'll need to have an **Ingestion Source** configured to your + Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. + +Once these are in place, you're ready to create your Custom Assertions! + +### Steps + +1. Navigate to the Table you want to monitor +2. Click the **Validations** tab + +

+ +

+ +3. Click **+ Create Assertion** + +

+ +

+ +4. Choose **Custom** + +5. Configure the evaluation **schedule**. This is the frequency at which the assertion will be evaluated to produce a pass or fail result, and the times + when the query will be executed. + +6. Provide a SQL **query** that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. Make sure to use the fully qualified name of the Table in your query. + +

+ +

+ +7. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated. + +

+ +

+ +8. Add a **description** for the assertion. This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. + +

+ +

+ +9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type. + +

+ +

+ +10. Click **Next** +11. Configure actions that should be taken when the Custom Assertion passes or fails + +

+ +

+ +- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom Assertion is failing. This + may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when + an incident is created due to an Assertion failure. +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom Assertion. Note that + any other incidents will not be impacted. + +1. Click **Save**. + +And that's it! DataHub will now begin to monitor your Custom Assertion for the table. + +To view the time of the next Custom Assertion evaluation, simply click **Custom** and then click on your +new Assertion: + +

+ +

+ +Once your assertion has run, you will begin to see Success or Failure status for the Table + +

+ +

+ + +## Stopping a Custom Assertion + +In order to temporarily stop the evaluation of a Custom Assertion: + +1. Navigate to the **Validations** tab of the Table with the assertion +2. Click **Custom** to open the Custom Assertions list +3. Click the three-dot menu on the right side of the assertion you want to disable +4. Click **Stop** + +

+ +

+ +To resume the Custom Assertion, simply click **Turn On**. + +

+ +

+ + +## Creating Custom Assertions via API + +Under the hood, Acryl DataHub implements Custom Assertion Monitoring using two "entity" concepts: + +- **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours" + or "The table is changed on a schedule of every day by 8am". This is the "what". + +- **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific + mechanisms. This is the "how". + +Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the +`Edit Assertions` and `Edit Monitors` privileges for it. + +#### GraphQL + +In order to create a Custom Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 +GraphQL mutation queries to create a Custom Assertion entity and create an Assertion Monitor entity responsible for evaluating it. + +Start by creating the Custom Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity +you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. + +##### Examples + +To create a Custom Assertion Entity that checks whether a query result is greater than 100: + +```json +mutation createSqlAssertion { + createSqlAssertion( + input: { + entityUrn: "", + type: METRIC, + description: "", + statement: "", + operator: GREATER_THAN, + parameters: { + value: { + value: "100", + type: NUMBER + } + } + } + ) { + urn + } +} +``` + +The supported custom assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`, +you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values. +The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). +The supported parameter types are `NUMBER`. + +To create an Assertion Monitor Entity that evaluates the custom assertion every 8 hours: + +```json +mutation createAssertionMonitor { + createAssertionMonitor( + input: { + entityUrn: "", + assertionUrn: "", + schedule: { + cron: "0 */8 * * *", + timezone: "America/Los_Angeles" + }, + parameters: { + type: DATASET_SQL + } + } + ) { + urn + } +} +``` + +This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema). + +After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. + +You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`. + +### Tips + +:::info +**Authorization** + +Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows: + +``` +Authorization: Bearer +``` + +**Exploring GraphQL API** + +Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql` +::: diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index be2d7d795de70..ba101be16b98e 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -16,7 +16,6 @@ An important question that will arise once you've decided to extend the metadata

- The green lines represent pathways that will lead to lesser friction for you to maintain your code long term. The red lines represent higher risk of conflicts in the future. We are working hard to move the majority of model extension use-cases to no-code / low-code pathways to ensure that you can extend the core metadata model without having to maintain a custom fork of DataHub. We will refer to the two options as the **open-source fork** and **custom repository** approaches in the rest of the document below. @@ -92,10 +91,11 @@ the annotation model. Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity). Example: + ```yaml - - name: dashboard - doc: A container of related data assets. - keyAspect: dashboardKey +- name: dashboard + doc: A container of related data assets. + keyAspect: dashboardKey ``` - name: The entity name/type, this will be present as a part of the Urn. @@ -196,8 +196,8 @@ The Aspect has four key components: its properties, the @Aspect annotation, the can be defined as PDL primitives, enums, records, or collections ( see [pdl schema documentation](https://linkedin.github.io/rest.li/pdl_schema)) references to other entities, of type Urn or optionally `Urn` -- **@Aspect annotation**: Declares record is an Aspect and includes it when serializing an entity. Unlike the following - two annotations, @Aspect is applied to the entire record, rather than a specific field. Note, you can mark an aspect +- **@Aspect annotation**: Declares record is an Aspect and includes it when serializing an entity. Unlike the following + two annotations, @Aspect is applied to the entire record, rather than a specific field. Note, you can mark an aspect as a timeseries aspect. Check out this [doc](metadata-model.md#timeseries-aspects) for details. - **@Searchable annotation**: This annotation can be applied to any primitive field or a map field to indicate that it should be indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see @@ -205,7 +205,7 @@ The Aspect has four key components: its properties, the @Aspect annotation, the - **@Relationship annotation**: These annotations create edges between the Entity’s Urn and the destination of the annotated field when the entities are ingested. @Relationship annotations must be applied to fields of type Urn. In the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied - directly to an array of Urns. That’s why you see the use of an Annotation override (`”/*”:) to apply the @Relationship + directly to an array of Urns. That’s why you see the use of an Annotation override (`"/*":`) to apply the @Relationship annotation to the Urn directly. Read more about overrides in the annotation docs further down on this page. After you create your Aspect, you need to attach to all the entities that it applies to. @@ -231,7 +231,7 @@ entities: - keyAspect: dashBoardKey aspects: # the name of the aspect must be the same as that on the @Aspect annotation on the class - - dashboardInfo + - dashboardInfo ``` Previously, you were required to add all aspects for the entity into an Aspect union. You will see examples of this pattern throughout the code-base (e.g. `DatasetAspect`, `DashboardAspect` etc.). This is no longer required. @@ -251,14 +251,39 @@ Then, run `./gradlew build` from the repository root to rebuild Datahub with acc Then, re-deploy metadata-service (gms), and mae-consumer and mce-consumer (optionally if you are running them unbundled). See [docker development](../../docker/README.md) for details on how to deploy during development. This will allow Datahub to read and write your new entity or extensions to existing entities, along with serving search and graph queries for that entity type. -To emit proposals to ingest from the Datahub CLI tool, first install datahub cli -locally [following the instructions here](../../metadata-ingestion/developing.md). `./gradlew build` generated the avro -schemas your local ingestion cli tool uses earlier. After following the developing guide, you should be able to emit -your new event using the local datahub cli. +### (Optional) Step 7: Use custom models with the Python SDK + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + +If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI. + +Install the DataHub CLI locally by following the [developer instructions](../../metadata-ingestion/developing.md). +The `./gradlew build` command already generated the avro schemas for your local ingestion cli tool to use. +After following the developing guide, you should be able to emit your new event using the local DataHub CLI. + + + -Now you are ready to start ingesting metadata for your new entity! +If you want to use your custom models beyond your local machine without forking DataHub, then you can generate a custom model package that can be installed from other places. -### (Optional) Step 7: Extend the DataHub frontend to view your entity in GraphQL & React +This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones. + +```bash +cd metadata-ingestion +../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1" +``` + +This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI. +The command output will contain additional details and exact CLI commands you can use. + + + + +### (Optional) Step 8: Extend the DataHub frontend to view your entity in GraphQL & React If you are extending an entity with additional aspects, and you can use the auto-render specifications to automatically render these aspects to your satisfaction, you do not need to write any custom code. @@ -301,9 +326,9 @@ It takes the following parameters: - **autoRender**: boolean (optional) - defaults to false. When set to true, the aspect will automatically be displayed on entity pages in a tab using a default renderer. **_This is currently only supported for Charts, Dashboards, DataFlows, DataJobs, Datasets, Domains, and GlossaryTerms_**. - **renderSpec**: RenderSpec (optional) - config for autoRender aspects that controls how they are displayed. **_This is currently only supported for Charts, Dashboards, DataFlows, DataJobs, Datasets, Domains, and GlossaryTerms_**. Contains three fields: - - **displayType**: One of `tabular`, `properties`. Tabular should be used for a list of data elements, properties for a single data bag. - - **displayName**: How the aspect should be referred to in the UI. Determines the name of the tab on the entity page. - - **key**: For `tabular` aspects only. Specifies the key in which the array to render may be found. + - **displayType**: One of `tabular`, `properties`. Tabular should be used for a list of data elements, properties for a single data bag. + - **displayName**: How the aspect should be referred to in the UI. Determines the name of the tab on the entity page. + - **key**: For `tabular` aspects only. Specifies the key in which the array to render may be found. ##### Example @@ -329,34 +354,34 @@ It takes the following parameters: Thus far, we have implemented 11 fieldTypes: - 1. *KEYWORD* - Short text fields that only support exact matches, often used only for filtering + 1. _KEYWORD_ - Short text fields that only support exact matches, often used only for filtering + + 2. _TEXT_ - Text fields delimited by spaces/slashes/periods. Default field type for string variables. - 2. *TEXT* - Text fields delimited by spaces/slashes/periods. Default field type for string variables. + 3. _TEXT_PARTIAL_ - Text fields delimited by spaces/slashes/periods with partial matching support. Note, partial + matching is expensive, so this field type should not be applied to fields with long values (like description) - 3. *TEXT_PARTIAL* - Text fields delimited by spaces/slashes/periods with partial matching support. Note, partial - matching is expensive, so this field type should not be applied to fields with long values (like description) + 4. _WORD_GRAM_ - Text fields delimited by spaces, slashes, periods, dashes, or underscores with partial matching AND + word gram support. That is, the text will be split by the delimiters and can be matched with delimited queries + matching two, three, or four length tokens in addition to single tokens. As with partial match, this type is + expensive, so should not be applied to fields with long values such as description. - 4. *WORD_GRAM* - Text fields delimited by spaces, slashes, periods, dashes, or underscores with partial matching AND - word gram support. That is, the text will be split by the delimiters and can be matched with delimited queries - matching two, three, or four length tokens in addition to single tokens. As with partial match, this type is - expensive, so should not be applied to fields with long values such as description. + 5. _BROWSE_PATH_ - Field type for browse paths. Applies specific mappings for slash delimited paths. - 5. *BROWSE_PATH* - Field type for browse paths. Applies specific mappings for slash delimited paths. + 6. _URN_ - Urn fields where each sub-component inside the urn is indexed. For instance, for a data platform urn like + "urn:li:dataplatform:kafka", it will index the platform name "kafka" and ignore the common components - 6. *URN* - Urn fields where each sub-component inside the urn is indexed. For instance, for a data platform urn like - "urn:li:dataplatform:kafka", it will index the platform name "kafka" and ignore the common components + 7. _URN_PARTIAL_ - Urn fields where each sub-component inside the urn is indexed with partial matching support. - 7. *URN_PARTIAL* - Urn fields where each sub-component inside the urn is indexed with partial matching support. + 8. _BOOLEAN_ - Boolean fields used for filtering. - 8. *BOOLEAN* - Boolean fields used for filtering. + 9. _COUNT_ - Count fields used for filtering. - 9. *COUNT* - Count fields used for filtering. - - 10. *DATETIME* - Datetime fields used to represent timestamps. + 10. _DATETIME_ - Datetime fields used to represent timestamps. - 11. *OBJECT* - Each property in an object will become an extra column in Elasticsearch and can be referenced as - `field.property` in queries. You should be careful to not use it on objects with many properties as it can cause a - mapping explosion in Elasticsearch. + 11. _OBJECT_ - Each property in an object will become an extra column in Elasticsearch and can be referenced as + `field.property` in queries. You should be careful to not use it on objects with many properties as it can cause a + mapping explosion in Elasticsearch. - **fieldName**: string (optional) - The name of the field in search index document. Defaults to the field name where the annotation resides. @@ -401,13 +426,13 @@ Now, when Datahub ingests Dashboards, it will index the Dashboard’s title in E Dashboards, that query will be used to search on the title index and matching Dashboards will be returned. Note, when @Searchable annotation is applied to a map, it will convert it into a list with "key.toString() -=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed. +=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed. This way, the keys can be queried by `aMapField:key1=value1`. -You can change this behavior by specifying the fieldType as OBJECT in the @Searchable annotation. It will put each key -into a column in Elasticsearch instead of an array of serialized kay-value pairs. This way the query would look more +You can change this behavior by specifying the fieldType as OBJECT in the @Searchable annotation. It will put each key +into a column in Elasticsearch instead of an array of serialized kay-value pairs. This way the query would look more like `aMapField.key1:value1`. As this method will increase the number of columns with each unique key - large maps can -cause a mapping explosion in Elasticsearch. You should *not* use the object fieldType if you expect your maps to get +cause a mapping explosion in Elasticsearch. You should _not_ use the object fieldType if you expect your maps to get large. #### @Relationship diff --git a/li-utils/build.gradle b/li-utils/build.gradle index 8f526cffba094..1d5222e39185a 100644 --- a/li-utils/build.gradle +++ b/li-utils/build.gradle @@ -28,6 +28,7 @@ dependencies { testImplementation externalDependency.commonsIo testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') + testImplementation externalDependency.testngJava8 } idea { diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro-1.7/build.gradle index 82249d393578c..3b137965d6c19 100644 --- a/metadata-events/mxe-utils-avro-1.7/build.gradle +++ b/metadata-events/mxe-utils-avro-1.7/build.gradle @@ -5,6 +5,7 @@ dependencies { api project(':metadata-models') api spec.product.pegasus.dataAvro1_6 + testImplementation externalDependency.testng testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 18e605ae76ebd..47069f59c314d 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -80,7 +80,8 @@ def get_long_description(): entry_points = { - "airflow.plugins": "acryl-datahub-airflow-plugin = datahub_airflow_plugin.datahub_plugin:DatahubPlugin" + "airflow.plugins": "acryl-datahub-airflow-plugin = datahub_airflow_plugin.datahub_plugin:DatahubPlugin", + "apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"], } diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py index b2c45d3a1e75d..e4040e3a17dfd 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py @@ -18,4 +18,20 @@ def get_provider_info(): "package-name": f"{__package_name__}", "name": f"{__package_name__}", "description": "Datahub metadata collector plugin", + "connection-types": [ + { + "hook-class-name": "datahub_airflow_plugin.hooks.datahub.DatahubRestHook", + "connection-type": "datahub-rest", + }, + { + "hook-class-name": "datahub_airflow_plugin.hooks.datahub.DatahubKafkaHook", + "connection-type": "datahub-kafka", + }, + ], + # Deprecated method of providing connection types, kept for backwards compatibility. + # We can remove with Airflow 3. + "hook-class-names": [ + "datahub_airflow_plugin.hooks.datahub.DatahubRestHook", + "datahub_airflow_plugin.hooks.datahub.DatahubKafkaHook", + ], } diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py index aed858c6c4df0..8fb7363f8cad1 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py @@ -29,7 +29,7 @@ class DatahubRestHook(BaseHook): conn_name_attr = "datahub_rest_conn_id" default_conn_name = "datahub_rest_default" - conn_type = "datahub_rest" + conn_type = "datahub-rest" hook_name = "DataHub REST Server" def __init__(self, datahub_rest_conn_id: str = default_conn_name) -> None: @@ -50,6 +50,15 @@ def get_ui_field_behaviour() -> Dict: }, } + def test_connection(self) -> Tuple[bool, str]: + try: + emitter = self.make_emitter() + emitter.test_connection() + except Exception as e: + return False, str(e) + + return True, "Successfully connected to DataHub." + def _get_config(self) -> Tuple[str, Optional[str], Optional[int]]: conn: "Connection" = self.get_connection(self.datahub_rest_conn_id) @@ -99,7 +108,7 @@ class DatahubKafkaHook(BaseHook): conn_name_attr = "datahub_kafka_conn_id" default_conn_name = "datahub_kafka_default" - conn_type = "datahub_kafka" + conn_type = "datahub-kafka" hook_name = "DataHub Kafka Sink" def __init__(self, datahub_kafka_conn_id: str = default_conn_name) -> None: @@ -194,9 +203,15 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]: # We need to figure out the underlying hook type. First check the # conn_type. If that fails, attempt to guess using the conn id name. - if conn.conn_type == DatahubRestHook.conn_type: + if ( + conn.conn_type == DatahubRestHook.conn_type + or conn.conn_type == DatahubRestHook.conn_type.replace("-", "_") + ): return DatahubRestHook(self.datahub_conn_id) - elif conn.conn_type == DatahubKafkaHook.conn_type: + elif ( + conn.conn_type == DatahubKafkaHook.conn_type + or conn.conn_type == DatahubKafkaHook.conn_type.replace("-", "_") + ): return DatahubKafkaHook(self.datahub_conn_id) elif "rest" in self.datahub_conn_id: return DatahubRestHook(self.datahub_conn_id) diff --git a/metadata-ingestion/.gitignore b/metadata-ingestion/.gitignore index 673c8e0995872..acc15c4598869 100644 --- a/metadata-ingestion/.gitignore +++ b/metadata-ingestion/.gitignore @@ -8,6 +8,7 @@ bq_credentials.json junit.*.xml /tmp *.bak +custom-package/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 3b1aae0b24f88..a0fef614528cb 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -176,7 +176,7 @@ The `deploy` subcommand of the `ingest` command tree allows users to upload thei datahub ingest deploy -n -c recipe.yaml ``` -By default, no schedule is done unless explicitly configured with the `--schedule` parameter. Timezones are inferred from the system time, can be overriden with `--time-zone` flag. +By default, no schedule is done unless explicitly configured with the `--schedule` parameter. Schedule timezones are UTC by default and can be overriden with `--time-zone` flag. ```shell datahub ingest deploy -n test --schedule "0 * * * *" --time-zone "Europe/London" -c recipe.yaml ``` diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index c20d98cbcbb58..ea7990ab9c660 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -62,6 +62,14 @@ task codegen(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadat commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/codegen.sh" } +task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) { + def package_name = project.findProperty('package_name') + def package_version = project.findProperty('package_version') + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && " + + "./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'" +} + task install(dependsOn: [installPackage, codegen]) task installDev(type: Exec, dependsOn: [install]) { diff --git a/metadata-ingestion/docs/sources/kafka-connect/README.md b/metadata-ingestion/docs/sources/kafka-connect/README.md index 5031bff5a3fac..e4f64c62914c5 100644 --- a/metadata-ingestion/docs/sources/kafka-connect/README.md +++ b/metadata-ingestion/docs/sources/kafka-connect/README.md @@ -21,4 +21,4 @@ This ingestion source maps the following Source System Concepts to DataHub Conce Works only for - Source connectors: JDBC, Debezium, Mongo and Generic connectors with user-defined lineage graph -- Sink connectors: BigQuery +- Sink connectors: BigQuery, Confluent S3, Snowflake diff --git a/metadata-ingestion/docs/sources/kafka/kafka.md b/metadata-ingestion/docs/sources/kafka/kafka.md index 2e8baa9516d17..9fdfc3a3af1d0 100644 --- a/metadata-ingestion/docs/sources/kafka/kafka.md +++ b/metadata-ingestion/docs/sources/kafka/kafka.md @@ -130,3 +130,86 @@ message MessageWithMap { repeated Map1Entry map_1 = 1; } ``` + +### Enriching DataHub metadata with automated meta mapping + +:::note +Meta mapping is currently only available for Avro schemas +::: + +Avro schemas are permitted to have additional attributes not defined by the specification as arbitrary metadata. A common pattern is to utilize this for business metadata. The Kafka source has the ability to transform this directly into DataHub Owners, Tags and Terms. + +#### Simple tags + +If you simply have a list of tags embedded into an Avro schema (either at the top-level or for an individual field), you can use the `schema_tags_field` config. + +Example Avro schema: + +```json +{ + "name": "sampleRecord", + "type": "record", + "tags": ["tag1", "tag2"], + "fields": [{ + "name": "field_1", + "type": "string", + "tags": ["tag3", "tag4"] + }] +} +``` + +The name of the field containing a list of tags can be configured with the `schema_tags_field` property: + +```yaml +config: + schema_tags_field: tags +``` + +#### meta mapping + +You can also map specific Avro fields into Owners, Tags and Terms using meta +mapping. + +Example Avro schema: + +```json +{ + "name": "sampleRecord", + "type": "record", + "owning_team": "@Data-Science", + "data_tier": "Bronze", + "fields": [{ + "name": "field_1", + "type": "string", + "gdpr": { + "pii": true + } + }] +} +``` + +This can be mapped to DataHub metadata with `meta_mapping` config: + +```yaml +config: + meta_mapping: + owning_team: + match: "^@(.*)" + operation: "add_owner" + config: + owner_type: group + data_tier: + match: "Bronze|Silver|Gold" + operation: "add_term" + config: + term: "{{ $match }}" + field_meta_mapping: + gdpr.pii: + match: true + operation: "add_tag" + config: + tag: "pii" +``` + +The underlying implementation is similar to [dbt meta mapping](https://datahubproject.io/docs/generated/ingestion/sources/dbt#dbt-meta-automated-mappings), which has more detailed examples that can be used for reference. + diff --git a/metadata-ingestion/docs/sources/looker/lookml_post.md b/metadata-ingestion/docs/sources/looker/lookml_post.md index 818cb681c4e90..8ebbab4b9ed48 100644 --- a/metadata-ingestion/docs/sources/looker/lookml_post.md +++ b/metadata-ingestion/docs/sources/looker/lookml_post.md @@ -2,11 +2,11 @@ :::note -The integration can use an SQL parser to try to parse the tables the views depends on. +The integration can use an SQL parser to try to parse the tables the views depends on. ::: -This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package. +This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package. As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser` and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser. @@ -15,12 +15,14 @@ and must be made available to Datahub by ,for example, installing it. The config Looker projects support organization as multiple git repos, with [remote includes that can refer to projects that are stored in a different repo](https://cloud.google.com/looker/docs/importing-projects#include_files_from_an_imported_project). If your Looker implementation uses multi-project setup, you can configure the LookML source to pull in metadata from your remote projects as well. If you are using local or remote dependencies, you will see include directives in your lookml files that look like this: + ``` include: "//e_flights/views/users.view.lkml" include: "//e_commerce/public/orders.view.lkml" ``` Also, you will see projects that are being referred to listed in your `manifest.lkml` file. Something like this: + ``` project_name: this_project @@ -34,9 +36,9 @@ remote_dependency: ga_360_block { } ``` - To ingest Looker repositories that are including files defined in other projects, you will need to use the `project_dependencies` directive within the configuration section. Consider the following scenario: + - Your primary project refers to a remote project called `my_remote_project` - The remote project is homed in the GitHub repo `my_org/my_remote_project` - You have provisioned a GitHub deploy key and stored the credential in the environment variable (or UI secret), `${MY_REMOTE_PROJECT_DEPLOY_KEY}` @@ -71,6 +73,23 @@ source: :::note -This is not the same as ingesting the remote project as a primary Looker project because DataHub will not be processing the model files that might live in the remote project. If you want to additionally include the views accessible via the models in the remote project, create a second recipe where your remote project is the primary project. +This is not the same as ingesting the remote project as a primary Looker project because DataHub will not be processing the model files that might live in the remote project. If you want to additionally include the views accessible via the models in the remote project, create a second recipe where your remote project is the primary project. ::: + +### Debugging LookML Parsing Errors + +If you see messages like `my_file.view.lkml': "failed to load view file: Unable to find a matching expression for '' on line 5"` in the failure logs, it indicates a parsing error for the LookML file. + +The first thing to check is that the Looker IDE can validate the file without issues. You can check this by clicking this "Validate LookML" button in the IDE when in development mode. + +If that's not the issue, it might be because DataHub's parser, which is based on the [joshtemple/lkml](https://github.com/joshtemple/lkml) library, is slightly more strict than the official Looker parser. +Note that there's currently only one known discrepancy between the two parsers, and it's related to using [multiple colon characters](https://github.com/joshtemple/lkml/issues/82) when defining parameters. + +To check if DataHub can parse your LookML file syntax, you can use the `lkml` CLI tool. If this raises an exception, DataHub will fail to parse the file. + +```sh +pip install lkml + +lkml path/to/my_file.view.lkml +``` diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index 29ffa571c0ac8..a9b9b4b20f5ac 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -343,8 +343,15 @@ class AspectBag(TypedDict, total=False): "schemas_path", type=click.Path(exists=True, file_okay=False), required=True ) @click.argument("outdir", type=click.Path(), required=True) +@click.option("--check-unused-aspects", is_flag=True, default=False) +@click.option("--enable-custom-loader", is_flag=True, default=True) def generate( - entity_registry: str, pdl_path: str, schemas_path: str, outdir: str + entity_registry: str, + pdl_path: str, + schemas_path: str, + outdir: str, + check_unused_aspects: bool, + enable_custom_loader: bool, ) -> None: entities = load_entity_registry(Path(entity_registry)) schemas = load_schemas(schemas_path) @@ -388,10 +395,13 @@ def generate( aspect["Aspect"]["entityDoc"] = entity.doc # Check for unused aspects. We currently have quite a few. - # unused_aspects = set(aspects.keys()) - set().union( - # {entity.keyAspect for entity in entities}, - # *(set(entity.aspects) for entity in entities), - # ) + if check_unused_aspects: + unused_aspects = set(aspects.keys()) - set().union( + {entity.keyAspect for entity in entities}, + *(set(entity.aspects) for entity in entities), + ) + if unused_aspects: + raise ValueError(f"Unused aspects: {unused_aspects}") merged_schema = merge_schemas(list(schemas.values())) write_schema_files(merged_schema, outdir) @@ -404,6 +414,35 @@ def generate( Path(outdir) / "schema_classes.py", ) + if enable_custom_loader: + # Move schema_classes.py -> _schema_classes.py + # and add a custom loader. + (Path(outdir) / "_schema_classes.py").write_text( + (Path(outdir) / "schema_classes.py").read_text() + ) + (Path(outdir) / "schema_classes.py").write_text( + """ +# This is a specialized shim layer that allows us to dynamically load custom models from elsewhere. + +import importlib +from typing import TYPE_CHECKING + +from datahub.utilities._custom_package_loader import get_custom_models_package + +_custom_package_path = get_custom_models_package() + +if TYPE_CHECKING or not _custom_package_path: + from ._schema_classes import * + + # Required explicitly because __all__ doesn't include _ prefixed names. + from ._schema_classes import _Aspect, __SCHEMA_TYPES +else: + _custom_package = importlib.import_module(_custom_package_path) + globals().update(_custom_package.__dict__) + +""" + ) + # Keep a copy of a few raw avsc files. required_avsc_schemas = {"MetadataChangeEvent", "MetadataChangeProposal"} schema_save_dir = Path(outdir) / "schemas" diff --git a/metadata-ingestion/scripts/custom_package_codegen.py b/metadata-ingestion/scripts/custom_package_codegen.py new file mode 100644 index 0000000000000..4a674550d49df --- /dev/null +++ b/metadata-ingestion/scripts/custom_package_codegen.py @@ -0,0 +1,119 @@ +import re +import subprocess +import sys +from pathlib import Path + +import avro_codegen +import click + +if sys.version_info < (3, 10): + from importlib_metadata import version +else: + from importlib.metadata import version + +_avrogen_version = version("avro-gen3") + +autogen_header = """# Autogenerated by datahub's custom_package_codegen.py +# DO NOT EDIT THIS FILE DIRECTLY +""" + + +def python_package_name_normalize(name): + return re.sub(r"[-_.]+", "_", name).lower() + + +@click.command() +@click.argument( + "entity_registry", type=click.Path(exists=True, dir_okay=False), required=True +) +@click.argument( + "pdl_path", type=click.Path(exists=True, file_okay=False), required=True +) +@click.argument( + "schemas_path", type=click.Path(exists=True, file_okay=False), required=True +) +@click.argument("outdir", type=click.Path(), required=True) +@click.argument("package_name", type=str, required=True) +@click.argument("package_version", type=str, required=True) +@click.pass_context +def generate( + ctx: click.Context, + entity_registry: str, + pdl_path: str, + schemas_path: str, + outdir: str, + package_name: str, + package_version: str, +) -> None: + package_path = Path(outdir) / package_name + if package_path.is_absolute(): + raise click.UsageError("outdir must be a relative path") + + python_package_name = python_package_name_normalize(package_name) + click.echo( + f"Generating distribution {package_name} (package name {python_package_name}) at {package_path}" + ) + + src_path = package_path / "src" / python_package_name + src_path.mkdir(parents=True) + + ctx.invoke( + avro_codegen.generate, + entity_registry=entity_registry, + pdl_path=pdl_path, + schemas_path=schemas_path, + outdir=str(src_path / "models"), + enable_custom_loader=False, + ) + + (src_path / "__init__.py").write_text( + f"""{autogen_header} +__package_name__ = "{package_name}" +__version__ = "{package_version}" +""" + ) + + (package_path / "setup.py").write_text( + f"""{autogen_header} +from setuptools import setup + +_package_name = "{package_name}" +_package_version = "{package_version}" + +setup( + name=_package_name, + version=_package_version, + install_requires=[ + "avro-gen3=={_avrogen_version}", + "acryl-datahub", + ], + entry_points={{ + "datahub.custom_packages": [ + "models={python_package_name}.models.schema_classes", + ], + }}, +) +""" + ) + + # TODO add a README.md? + click.echo("Building package...") + subprocess.run(["python", "-m", "build", str(package_path)]) + + click.echo() + click.secho(f"Generated package at {package_path}", fg="green") + click.echo( + "This package should be installed alongside the main acryl-datahub package." + ) + click.echo() + click.echo(f"Install the custom package locally with `pip install {package_path}`") + click.echo( + f"To enable others to use it, share the file at {package_path}/dist/*.whl and have them install it with `pip install .whl`" + ) + click.echo( + f"Alternatively, publish it to PyPI with `twine upload {package_path}/dist/*`" + ) + + +if __name__ == "__main__": + generate() diff --git a/metadata-ingestion/scripts/custom_package_codegen.sh b/metadata-ingestion/scripts/custom_package_codegen.sh new file mode 100755 index 0000000000000..aec6293a4ef45 --- /dev/null +++ b/metadata-ingestion/scripts/custom_package_codegen.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -euo pipefail + +OUTDIR=./custom-package +PACKAGE_NAME="${1:?package name is required}" +PACKAGE_VERSION="${2:?package version is required}" + +# Note: this assumes that datahub has already been built with `./gradlew build`. +DATAHUB_ROOT=.. + +SCHEMAS_PDL="$DATAHUB_ROOT/metadata-models/src/main/pegasus/com/linkedin" +SCHEMAS_AVSC="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/renamed/avro/com/linkedin" +ENTITY_REGISTRY="$DATAHUB_ROOT/metadata-models/src/main/resources/entity-registry.yml" + +rm -r $OUTDIR 2>/dev/null || true +python scripts/custom_package_codegen.py $ENTITY_REGISTRY $SCHEMAS_PDL $SCHEMAS_AVSC $OUTDIR "$PACKAGE_NAME" "$PACKAGE_VERSION" diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index a119eba25be2a..2387e848e68a2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -36,10 +36,11 @@ def get_long_description(): "click-default-group", "PyYAML", "toml>=0.10.0", - "entrypoints", + # In Python 3.10+, importlib_metadata is included in the standard library. + "importlib_metadata>=4.0.0; python_version < '3.10'", "docker", "expandvars>=0.6.5", - "avro-gen3==0.7.10", + "avro-gen3==0.7.11", # "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3", "avro>=1.10.2,<1.11", "python-dateutil>=2.8.0", @@ -138,7 +139,7 @@ def get_long_description(): sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==18.0.2.dev15", + "acryl-sqlglot==18.5.2.dev45", } aws_common = { @@ -404,7 +405,12 @@ def get_long_description(): "types-pkg_resources", "types-six", "types-python-dateutil", - "types-requests>=2.28.11.6", + # We need to avoid 2.31.0.5 and 2.31.0.4 due to + # https://github.com/python/typeshed/issues/10764. Once that + # issue is resolved, we can remove the upper bound and change it + # to a != constraint. + # We have a PR up to fix the underlying issue: https://github.com/python/typeshed/pull/10776. + "types-requests>=2.28.11.6,<=2.31.0.3", "types-toml", "types-PyMySQL", "types-PyYAML", @@ -425,7 +431,6 @@ def get_long_description(): "types-termcolor>=1.0.0", "types-Deprecated", "types-protobuf>=4.21.0.1", - "types-tzlocal", "sqlalchemy2-stubs", } @@ -505,6 +510,7 @@ def get_long_description(): "nifi", "vertica", "mode", + "kafka-connect", ] if plugin for dependency in plugins[plugin] @@ -643,7 +649,7 @@ def get_long_description(): "datahub = datahub.ingestion.reporting.datahub_ingestion_run_summary_provider:DatahubIngestionRunSummaryProvider", "file = datahub.ingestion.reporting.file_reporter:FileReporter", ], - "apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"], + "datahub.custom_packages": [], } @@ -710,6 +716,7 @@ def get_long_description(): ] ) ), + "cloud": ["acryl-datahub-cloud"], "dev": list(dev_requirements), "testing-utils": list(test_api_requirements), # To import `datahub.testing` "integration-tests": list(full_test_dev_requirements), diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index f20272ecd9dbf..f7996900f7a7a 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -131,7 +131,7 @@ def plugins(verbose: bool) -> None: """List the enabled ingestion plugins.""" click.secho("Sources:", bold=True) - click.echo(source_registry.summary(verbose=verbose)) + click.echo(source_registry.summary(verbose=verbose, col_width=25)) click.echo() click.secho("Sinks:", bold=True) click.echo(sink_registry.summary(verbose=verbose)) diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 5931bf89b010b..9b5716408f3e4 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -10,7 +10,6 @@ import click import click_spinner -import tzlocal from click_default_group import DefaultGroup from tabulate import tabulate @@ -248,17 +247,17 @@ async def run_ingestion_and_check_upgrade() -> int: @click.option( "--time-zone", type=str, - help=f"Timezone for the schedule. By default uses the timezone of the current system: {tzlocal.get_localzone_name()}.", + help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.", required=False, - default=tzlocal.get_localzone_name(), + default="UTC", ) def deploy( name: str, config: str, - urn: str, + urn: Optional[str], executor_id: str, - cli_version: str, - schedule: str, + cli_version: Optional[str], + schedule: Optional[str], time_zone: str, ) -> None: """ @@ -276,8 +275,6 @@ def deploy( resolve_env_vars=False, ) - graphql_query: str - variables: dict = { "urn": urn, "name": name, @@ -296,7 +293,7 @@ def deploy( exit() logger.info("Found recipe URN, will update recipe.") - graphql_query = textwrap.dedent( + graphql_query: str = textwrap.dedent( """ mutation updateIngestionSource( $urn: String!, diff --git a/metadata-ingestion/src/datahub/ingestion/api/registry.py b/metadata-ingestion/src/datahub/ingestion/api/registry.py index 56ea716948199..7d8192aff83d5 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/registry.py +++ b/metadata-ingestion/src/datahub/ingestion/api/registry.py @@ -15,18 +15,17 @@ Union, ) -import entrypoints import typing_inspect from datahub import __package_name__ from datahub.configuration.common import ConfigurationError -T = TypeVar("T") +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points -# TODO: The `entrypoints` library is in maintenance mode and is not actively developed. -# We should switch to importlib.metadata once we drop support for Python 3.7. -# See https://entrypoints.readthedocs.io/en/latest/ and -# https://docs.python.org/3/library/importlib.metadata.html. +T = TypeVar("T") def _is_importable(path: str) -> bool: @@ -141,16 +140,8 @@ def register_from_entrypoint(self, entry_point_key: str) -> None: self._entrypoints.append(entry_point_key) def _load_entrypoint(self, entry_point_key: str) -> None: - entry_point: entrypoints.EntryPoint - for entry_point in entrypoints.get_group_all(entry_point_key): - name = entry_point.name - - if entry_point.object_name is None: - path = entry_point.module_name - else: - path = f"{entry_point.module_name}:{entry_point.object_name}" - - self.register_lazy(name, path) + for entry_point in entry_points(group=entry_point_key): + self.register_lazy(entry_point.name, entry_point.value) def _materialize_entrypoints(self) -> None: for entry_point_key in self._entrypoints: diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py index 75de18e9037ee..4acf99a50e50e 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py @@ -4,6 +4,7 @@ import avro.schema +from datahub.emitter import mce_builder from datahub.metadata.com.linkedin.pegasus2avro.schema import ( ArrayTypeClass, BooleanTypeClass, @@ -21,7 +22,7 @@ TimeTypeClass, UnionTypeClass, ) -from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass +from datahub.utilities.mapping import Constants, OperationProcessor """A helper file for Avro schema -> MCE schema transformations""" @@ -98,7 +99,14 @@ class AvroToMceSchemaConverter: "uuid": StringTypeClass, } - def __init__(self, is_key_schema: bool, default_nullable: bool = False) -> None: + def __init__( + self, + is_key_schema: bool, + default_nullable: bool = False, + meta_mapping_processor: Optional[OperationProcessor] = None, + schema_tags_field: Optional[str] = None, + tag_prefix: Optional[str] = None, + ) -> None: # Tracks the prefix name stack for nested name generation. self._prefix_name_stack: PrefixNameStack = [self.version_string] # Tracks the fields on the current path. @@ -112,6 +120,10 @@ def __init__(self, is_key_schema: bool, default_nullable: bool = False) -> None: if is_key_schema: # Helps maintain backwards-compatibility. Annotation for any field that is part of key-schema. self._prefix_name_stack.append("[key=True]") + # Meta mapping + self._meta_mapping_processor = meta_mapping_processor + self._schema_tags_field = schema_tags_field + self._tag_prefix = tag_prefix # Map of avro schema type to the conversion handler self._avro_type_to_mce_converter_map: Dict[ avro.schema.Schema, @@ -317,7 +329,25 @@ def emit(self) -> Generator[SchemaField, None, None]: merged_props.update(self._schema.other_props) merged_props.update(schema.other_props) - tags = None + # Parse meta_mapping + meta_aspects: Dict[str, Any] = {} + if self._converter._meta_mapping_processor: + meta_aspects = self._converter._meta_mapping_processor.process( + merged_props + ) + + tags: List[str] = [] + if self._converter._schema_tags_field: + for tag in merged_props.get(self._converter._schema_tags_field, []): + tags.append(self._converter._tag_prefix + tag) + + meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION) + if meta_tags_aspect: + tags += [ + tag_association.tag[len("urn:li:tag:") :] + for tag_association in meta_tags_aspect.tags + ] + if "deprecated" in merged_props: description = ( f"DEPRECATED: {merged_props['deprecated']}\n" @@ -325,9 +355,13 @@ def emit(self) -> Generator[SchemaField, None, None]: if description else "" ) - tags = GlobalTagsClass( - tags=[TagAssociationClass(tag="urn:li:tag:Deprecated")] - ) + tags.append("Deprecated") + + tags_aspect = None + if tags: + tags_aspect = mce_builder.make_global_tag_aspect_with_tag_list(tags) + + meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION) logical_type_name: Optional[str] = ( # logicalType nested inside type @@ -349,7 +383,8 @@ def emit(self) -> Generator[SchemaField, None, None]: recursive=False, nullable=self._converter._is_nullable(schema), isPartOfKey=self._converter._is_key_schema, - globalTags=tags, + globalTags=tags_aspect, + glossaryTerms=meta_terms_aspect, jsonProps=json.dumps(merged_props) if merged_props else None, ) yield field @@ -447,7 +482,9 @@ def _gen_from_non_field_nested_schemas( actual_schema = self._get_underlying_type_if_option_as_union(schema, schema) with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager( - schema, actual_schema, self + schema, + actual_schema, + self, ) as fe_schema: if isinstance( actual_schema, @@ -478,7 +515,9 @@ def _gen_non_nested_to_mce_fields( ) -> Generator[SchemaField, None, None]: """Handles generation of MCE SchemaFields for non-nested AVRO types.""" with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager( - schema, schema, self + schema, + schema, + self, ) as non_nested_emitter: yield from non_nested_emitter.emit() @@ -496,9 +535,12 @@ def _to_mce_fields( @classmethod def to_mce_fields( cls, - avro_schema_string: str, + avro_schema: avro.schema.Schema, is_key_schema: bool, default_nullable: bool = False, + meta_mapping_processor: Optional[OperationProcessor] = None, + schema_tags_field: Optional[str] = None, + tag_prefix: Optional[str] = None, ) -> Generator[SchemaField, None, None]: """ Converts a key or value type AVRO schema string to appropriate MCE SchemaFields. @@ -506,8 +548,14 @@ def to_mce_fields( :param is_key_schema: True if it is a key-schema. :return: An MCE SchemaField generator. """ - avro_schema = avro.schema.parse(avro_schema_string) - converter = cls(is_key_schema, default_nullable) + # avro_schema = avro.schema.parse(avro_schema) + converter = cls( + is_key_schema, + default_nullable, + meta_mapping_processor, + schema_tags_field, + tag_prefix, + ) yield from converter._to_mce_fields(avro_schema) @@ -516,28 +564,40 @@ def to_mce_fields( def avro_schema_to_mce_fields( - avro_schema_string: str, + avro_schema: Union[avro.schema.Schema, str], is_key_schema: bool = False, default_nullable: bool = False, + meta_mapping_processor: Optional[OperationProcessor] = None, + schema_tags_field: Optional[str] = None, + tag_prefix: Optional[str] = None, swallow_exceptions: bool = True, ) -> List[SchemaField]: """ Converts an avro schema into schema fields compatible with MCE. - :param avro_schema_string: String representation of the AVRO schema. + :param avro_schema: AVRO schema, either as a string or as an avro.schema.Schema object. :param is_key_schema: True if it is a key-schema. Default is False (value-schema). :param swallow_exceptions: True if the caller wants exceptions to be suppressed + :param action_processor: Optional OperationProcessor to be used for meta mappings :return: The list of MCE compatible SchemaFields. """ try: + if isinstance(avro_schema, str): + avro_schema = avro.schema.parse(avro_schema) + return list( AvroToMceSchemaConverter.to_mce_fields( - avro_schema_string, is_key_schema, default_nullable + avro_schema, + is_key_schema, + default_nullable, + meta_mapping_processor, + schema_tags_field, + tag_prefix, ) ) except Exception: if swallow_exceptions: - logger.exception(f"Failed to parse {avro_schema_string} into mce fields.") + logger.exception(f"Failed to parse {avro_schema} into mce fields.") return [] else: raise diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index b371ab181e133..38e965f7f6587 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -44,14 +44,17 @@ TelemetryClientIdClass, ) from datahub.utilities.perf_timer import PerfTimer -from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.urn import Urn, guess_entity_type if TYPE_CHECKING: from datahub.ingestion.source.state.entity_removal_state import ( GenericCheckpointState, ) - from datahub.utilities.sqlglot_lineage import SchemaResolver, SqlParsingResult + from datahub.utilities.sqlglot_lineage import ( + GraphQLSchemaMetadata, + SchemaResolver, + SqlParsingResult, + ) logger = logging.getLogger(__name__) @@ -543,129 +546,110 @@ def get_container_urns_by_filter( logger.debug(f"yielding {x['entity']}") yield x["entity"] - def get_urns_by_filter( + def _bulk_fetch_schema_info_by_filter( self, *, - entity_types: Optional[List[str]] = None, platform: Optional[str] = None, platform_instance: Optional[str] = None, env: Optional[str] = None, query: Optional[str] = None, container: Optional[str] = None, status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED, - batch_size: int = 10000, + batch_size: int = 100, extraFilters: Optional[List[SearchFilterRule]] = None, - ) -> Iterable[str]: - """Fetch all urns that match all of the given filters. + ) -> Iterable[Tuple[str, "GraphQLSchemaMetadata"]]: + """Fetch schema info for datasets that match all of the given filters. - Filters are combined conjunctively. If multiple filters are specified, the results will match all of them. - Note that specifying a platform filter will automatically exclude all entity types that do not have a platform. - The same goes for the env filter. + :return: An iterable of (urn, schema info) tuple that match the filters. + """ + types = [_graphql_entity_type("dataset")] - :param entity_types: List of entity types to include. If None, all entity types will be returned. - :param platform: Platform to filter on. If None, all platforms will be returned. - :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned. - :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned. - :param query: Query string to filter on. If None, all entities will be returned. - :param container: A container urn that entities must be within. - This works recursively, so it will include entities within sub-containers as well. - If None, all entities will be returned. - Note that this requires browsePathV2 aspects (added in 0.10.4+). - :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities. - :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters. + # Add the query default of * if no query is specified. + query = query or "*" - :return: An iterable of urns that match the filters. - """ + orFilters = self.generate_filter( + platform, platform_instance, env, container, status, extraFilters + ) - types: Optional[List[str]] = None - if entity_types is not None: - if not entity_types: - raise ValueError( - "entity_types cannot be an empty list; use None for all entities" - ) + graphql_query = textwrap.dedent( + """ + query scrollUrnsWithFilters( + $types: [EntityType!], + $query: String!, + $orFilters: [AndFilterInput!], + $batchSize: Int!, + $scrollId: String) { - types = [_graphql_entity_type(entity_type) for entity_type in entity_types] + scrollAcrossEntities(input: { + query: $query, + count: $batchSize, + scrollId: $scrollId, + types: $types, + orFilters: $orFilters, + searchFlags: { + skipHighlighting: true + skipAggregates: true + } + }) { + nextScrollId + searchResults { + entity { + urn + ... on Dataset { + schemaMetadata(version: 0) { + fields { + fieldPath + nativeDataType + } + } + } + } + } + } + } + """ + ) - # Add the query default of * if no query is specified. - query = query or "*" + variables = { + "types": types, + "query": query, + "orFilters": orFilters, + "batchSize": batch_size, + } + + for entity in self._scroll_across_entities(graphql_query, variables): + if entity.get("schemaMetadata"): + yield entity["urn"], entity["schemaMetadata"] + def generate_filter( + self, + platform: Optional[str], + platform_instance: Optional[str], + env: Optional[str], + container: Optional[str], + status: RemovedStatusFilter, + extraFilters: Optional[List[SearchFilterRule]], + ) -> List[Dict[str, List[SearchFilterRule]]]: andFilters: List[SearchFilterRule] = [] # Platform filter. if platform: - andFilters += [ - { - "field": "platform.keyword", - "values": [make_data_platform_urn(platform)], - "condition": "EQUAL", - } - ] + andFilters.append(self._get_platform_filter(platform)) # Platform instance filter. if platform_instance: - if platform: - # Massage the platform instance into a fully qualified urn, if necessary. - platform_instance = make_dataplatform_instance_urn( - platform, platform_instance - ) - - # Warn if platform_instance is not a fully qualified urn. - # TODO: Change this once we have a first-class data platform instance urn type. - if guess_entity_type(platform_instance) != "dataPlatformInstance": - raise ValueError( - f"Invalid data platform instance urn: {platform_instance}" - ) - - andFilters += [ - { - "field": "platformInstance", - "values": [platform_instance], - "condition": "EQUAL", - } - ] + andFilters.append( + self._get_platform_instance_filter(platform, platform_instance) + ) # Browse path v2 filter. if container: - # Warn if container is not a fully qualified urn. - # TODO: Change this once we have a first-class container urn type. - if guess_entity_type(container) != "container": - raise ValueError(f"Invalid container urn: {container}") - - andFilters += [ - { - "field": "browsePathV2", - "values": [container], - "condition": "CONTAIN", - } - ] + andFilters.append(self._get_container_filter(container)) # Status filter. - if status == RemovedStatusFilter.NOT_SOFT_DELETED: - # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the - # removed field is simply not present in the ElasticSearch document. Ideally this - # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to - # use a negated filter. - andFilters.append( - { - "field": "removed", - "values": ["true"], - "condition": "EQUAL", - "negated": True, - } - ) - elif status == RemovedStatusFilter.ONLY_SOFT_DELETED: - andFilters.append( - { - "field": "removed", - "values": ["true"], - "condition": "EQUAL", - } - ) - elif status == RemovedStatusFilter.ALL: - # We don't need to add a filter for this case. - pass - else: - raise ValueError(f"Invalid status filter: {status}") + status_filter = self._get_status_filer(status) + if status_filter: + andFilters.append(status_filter) # Extra filters. if extraFilters: @@ -673,33 +657,9 @@ def get_urns_by_filter( orFilters: List[Dict[str, List[SearchFilterRule]]] = [{"and": andFilters}] - # Env filter. + # Env filter if env: - # The env filter is a bit more tricky since it's not always stored - # in the same place in ElasticSearch. - - envOrConditions: List[SearchFilterRule] = [ - # For most entity types, we look at the origin field. - { - "field": "origin", - "value": env, - "condition": "EQUAL", - }, - # For containers, we look at the customProperties field. - # For any containers created after https://github.com/datahub-project/datahub/pull/8027, - # we look for the "env" property. Otherwise, we use the "instance" property. - { - "field": "customProperties", - "value": f"env={env}", - }, - { - "field": "customProperties", - "value": f"instance={env}", - }, - # Note that not all entity types have an env (e.g. dashboards / charts). - # If the env filter is specified, these will be excluded. - ] - + envOrConditions = self._get_env_or_conditions(env) # This matches ALL of the andFilters and at least one of the envOrConditions. orFilters = [ {"and": andFilters["and"] + [extraCondition]} @@ -707,6 +667,52 @@ def get_urns_by_filter( for andFilters in orFilters ] + return orFilters + + def get_urns_by_filter( + self, + *, + entity_types: Optional[List[str]] = None, + platform: Optional[str] = None, + platform_instance: Optional[str] = None, + env: Optional[str] = None, + query: Optional[str] = None, + container: Optional[str] = None, + status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED, + batch_size: int = 10000, + extraFilters: Optional[List[SearchFilterRule]] = None, + ) -> Iterable[str]: + """Fetch all urns that match all of the given filters. + + Filters are combined conjunctively. If multiple filters are specified, the results will match all of them. + Note that specifying a platform filter will automatically exclude all entity types that do not have a platform. + The same goes for the env filter. + + :param entity_types: List of entity types to include. If None, all entity types will be returned. + :param platform: Platform to filter on. If None, all platforms will be returned. + :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned. + :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned. + :param query: Query string to filter on. If None, all entities will be returned. + :param container: A container urn that entities must be within. + This works recursively, so it will include entities within sub-containers as well. + If None, all entities will be returned. + Note that this requires browsePathV2 aspects (added in 0.10.4+). + :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities. + :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters. + + :return: An iterable of urns that match the filters. + """ + + types = self._get_types(entity_types) + + # Add the query default of * if no query is specified. + query = query or "*" + + # Env filter. + orFilters = self.generate_filter( + platform, platform_instance, env, container, status, extraFilters + ) + graphql_query = textwrap.dedent( """ query scrollUrnsWithFilters( @@ -738,18 +744,26 @@ def get_urns_by_filter( """ ) + variables = { + "types": types, + "query": query, + "orFilters": orFilters, + "batchSize": batch_size, + } + + for entity in self._scroll_across_entities(graphql_query, variables): + yield entity["urn"] + + def _scroll_across_entities( + self, graphql_query: str, variables_orig: dict + ) -> Iterable[dict]: + variables = variables_orig.copy() first_iter = True scroll_id: Optional[str] = None while first_iter or scroll_id: first_iter = False + variables["scrollId"] = scroll_id - variables = { - "types": types, - "query": query, - "orFilters": orFilters, - "batchSize": batch_size, - "scrollId": scroll_id, - } response = self.execute_graphql( graphql_query, variables=variables, @@ -757,13 +771,116 @@ def get_urns_by_filter( data = response["scrollAcrossEntities"] scroll_id = data["nextScrollId"] for entry in data["searchResults"]: - yield entry["entity"]["urn"] + yield entry["entity"] if scroll_id: logger.debug( f"Scrolling to next scrollAcrossEntities page: {scroll_id}" ) + def _get_env_or_conditions(self, env: str) -> List[SearchFilterRule]: + # The env filter is a bit more tricky since it's not always stored + # in the same place in ElasticSearch. + return [ + # For most entity types, we look at the origin field. + { + "field": "origin", + "value": env, + "condition": "EQUAL", + }, + # For containers, we look at the customProperties field. + # For any containers created after https://github.com/datahub-project/datahub/pull/8027, + # we look for the "env" property. Otherwise, we use the "instance" property. + { + "field": "customProperties", + "value": f"env={env}", + }, + { + "field": "customProperties", + "value": f"instance={env}", + }, + # Note that not all entity types have an env (e.g. dashboards / charts). + # If the env filter is specified, these will be excluded. + ] + + def _get_status_filer( + self, status: RemovedStatusFilter + ) -> Optional[SearchFilterRule]: + if status == RemovedStatusFilter.NOT_SOFT_DELETED: + # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the + # removed field is simply not present in the ElasticSearch document. Ideally this + # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to + # use a negated filter. + return { + "field": "removed", + "values": ["true"], + "condition": "EQUAL", + "negated": True, + } + + elif status == RemovedStatusFilter.ONLY_SOFT_DELETED: + return { + "field": "removed", + "values": ["true"], + "condition": "EQUAL", + } + + elif status == RemovedStatusFilter.ALL: + # We don't need to add a filter for this case. + return None + else: + raise ValueError(f"Invalid status filter: {status}") + + def _get_container_filter(self, container: str) -> SearchFilterRule: + # Warn if container is not a fully qualified urn. + # TODO: Change this once we have a first-class container urn type. + if guess_entity_type(container) != "container": + raise ValueError(f"Invalid container urn: {container}") + + return { + "field": "browsePathV2", + "values": [container], + "condition": "CONTAIN", + } + + def _get_platform_instance_filter( + self, platform: Optional[str], platform_instance: str + ) -> SearchFilterRule: + if platform: + # Massage the platform instance into a fully qualified urn, if necessary. + platform_instance = make_dataplatform_instance_urn( + platform, platform_instance + ) + + # Warn if platform_instance is not a fully qualified urn. + # TODO: Change this once we have a first-class data platform instance urn type. + if guess_entity_type(platform_instance) != "dataPlatformInstance": + raise ValueError(f"Invalid data platform instance urn: {platform_instance}") + + return { + "field": "platformInstance", + "values": [platform_instance], + "condition": "EQUAL", + } + + def _get_platform_filter(self, platform: str) -> SearchFilterRule: + return { + "field": "platform.keyword", + "values": [make_data_platform_urn(platform)], + "condition": "EQUAL", + } + + def _get_types(self, entity_types: Optional[List[str]]) -> Optional[List[str]]: + types: Optional[List[str]] = None + if entity_types is not None: + if not entity_types: + raise ValueError( + "entity_types cannot be an empty list; use None for all entities" + ) + + types = [_graphql_entity_type(entity_type) for entity_type in entity_types] + return types + def get_latest_pipeline_checkpoint( self, pipeline_name: str, platform: str ) -> Optional[Checkpoint["GenericCheckpointState"]]: @@ -1033,43 +1150,36 @@ def initialize_schema_resolver_from_datahub( self, platform: str, platform_instance: Optional[str], env: str ) -> Tuple["SchemaResolver", Set[str]]: logger.info("Initializing schema resolver") - - # TODO: Filter on platform instance? - logger.info(f"Fetching urns for platform {platform}, env {env}") - with PerfTimer() as timer: - urns = set( - self.get_urns_by_filter( - entity_types=[DatasetUrn.ENTITY_TYPE], - platform=platform, - env=env, - batch_size=3000, - ) - ) - logger.info( - f"Fetched {len(urns)} urns in {timer.elapsed_seconds()} seconds" - ) - schema_resolver = self._make_schema_resolver( platform, platform_instance, env, include_graph=False ) + + logger.info(f"Fetching schemas for platform {platform}, env {env}") + urns = [] + count = 0 with PerfTimer() as timer: - count = 0 - for i, urn in enumerate(urns): - if i % 1000 == 0: - logger.debug(f"Loaded {i} schema metadata") + for urn, schema_info in self._bulk_fetch_schema_info_by_filter( + platform=platform, + platform_instance=platform_instance, + env=env, + ): try: - schema_metadata = self.get_aspect(urn, SchemaMetadataClass) - if schema_metadata: - schema_resolver.add_schema_metadata(urn, schema_metadata) - count += 1 + urns.append(urn) + schema_resolver.add_graphql_schema_metadata(urn, schema_info) + count += 1 except Exception: - logger.warning("Failed to load schema metadata", exc_info=True) + logger.warning("Failed to add schema info", exc_info=True) + + if count % 1000 == 0: + logger.debug( + f"Loaded {count} schema info in {timer.elapsed_seconds()} seconds" + ) logger.info( - f"Loaded {count} schema metadata in {timer.elapsed_seconds()} seconds" + f"Finished loading total {count} schema info in {timer.elapsed_seconds()} seconds" ) logger.info("Finished initializing schema resolver") - return schema_resolver, urns + return schema_resolver, set(urns) def parse_sql_lineage( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index ae49a4ba17c11..8a16b1a4a5f6b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -285,9 +285,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): # Maps view ref -> actual sql self.view_definitions: FileBackedDict[str] = FileBackedDict() - self.sql_parser_schema_resolver = SchemaResolver( - platform=self.platform, env=self.config.env - ) + self.sql_parser_schema_resolver = self._init_schema_resolver() self.add_config_to_report() atexit.register(cleanup, config) @@ -446,6 +444,27 @@ def test_connection(config_dict: dict) -> TestConnectionReport: ) return test_report + def _init_schema_resolver(self) -> SchemaResolver: + schema_resolution_required = ( + self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser + ) + schema_ingestion_enabled = ( + self.config.include_views and self.config.include_tables + ) + + if schema_resolution_required and not schema_ingestion_enabled: + if self.ctx.graph: + return self.ctx.graph.initialize_schema_resolver_from_datahub( + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + )[0] + else: + logger.warning( + "Failed to load schema info from DataHub as DataHubGraph is missing.", + ) + return SchemaResolver(platform=self.platform, env=self.config.env) + def get_dataplatform_instance_aspect( self, dataset_urn: str, project_id: str ) -> MetadataWorkUnit: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 2d6882caa38ef..661589a0c58e5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -6,6 +6,7 @@ import pydantic +from datahub.ingestion.api.report import Report from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport @@ -16,18 +17,20 @@ logger: logging.Logger = logging.getLogger(__name__) -class BigQuerySchemaApiPerfReport: - list_projects = PerfTimer() - list_datasets = PerfTimer() - get_columns_for_dataset = PerfTimer() - get_tables_for_dataset = PerfTimer() - list_tables = PerfTimer() - get_views_for_dataset = PerfTimer() +@dataclass +class BigQuerySchemaApiPerfReport(Report): + list_projects: PerfTimer = field(default_factory=PerfTimer) + list_datasets: PerfTimer = field(default_factory=PerfTimer) + get_columns_for_dataset: PerfTimer = field(default_factory=PerfTimer) + get_tables_for_dataset: PerfTimer = field(default_factory=PerfTimer) + list_tables: PerfTimer = field(default_factory=PerfTimer) + get_views_for_dataset: PerfTimer = field(default_factory=PerfTimer) -class BigQueryAuditLogApiPerfReport: - get_exported_log_entries = PerfTimer() - list_log_entries = PerfTimer() +@dataclass +class BigQueryAuditLogApiPerfReport(Report): + get_exported_log_entries: PerfTimer = field(default_factory=PerfTimer) + list_log_entries: PerfTimer = field(default_factory=PerfTimer) @dataclass diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py index 0bdcb115b377c..54475cb509621 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py @@ -4,6 +4,7 @@ from hashlib import md5 from typing import Any, List, Optional, Set, Tuple +import avro.schema import jsonref from confluent_kafka.schema_registry.schema_registry_client import ( RegisteredSchema, @@ -22,6 +23,8 @@ SchemaField, SchemaMetadata, ) +from datahub.metadata.schema_classes import OwnershipSourceTypeClass +from datahub.utilities.mapping import OperationProcessor logger = logging.getLogger(__name__) @@ -59,6 +62,14 @@ def __init__( except Exception as e: logger.warning(f"Failed to get subjects from schema registry: {e}") + self.field_meta_processor = OperationProcessor( + self.source_config.field_meta_mapping, + self.source_config.tag_prefix, + OwnershipSourceTypeClass.SERVICE, + self.source_config.strip_user_ids_from_email, + match_nested_props=True, + ) + @classmethod def create( cls, source_config: KafkaSourceConfig, report: KafkaSourceReport @@ -290,10 +301,19 @@ def _get_schema_fields( fields: List[SchemaField] = [] if schema.schema_type == "AVRO": cleaned_str: str = self.get_schema_str_replace_confluent_ref_avro(schema) + avro_schema = avro.schema.parse(cleaned_str) + # "value.id" or "value.[type=string]id" fields = schema_util.avro_schema_to_mce_fields( - cleaned_str, is_key_schema=is_key_schema + avro_schema, + is_key_schema=is_key_schema, + meta_mapping_processor=self.field_meta_processor + if self.source_config.enable_meta_mapping + else None, + schema_tags_field=self.source_config.schema_tags_field, + tag_prefix=self.source_config.tag_prefix, ) + elif schema.schema_type == "PROTOBUF": imported_schemas: List[ ProtobufSchema diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 61f6103347eb3..566304e1999b7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -5,6 +5,7 @@ from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Type +import avro.schema import confluent_kafka import confluent_kafka.admin import pydantic @@ -18,6 +19,7 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.kafka import KafkaConsumerConnectionConfig from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.emitter import mce_builder from datahub.emitter.mce_builder import ( make_data_platform_urn, make_dataplatform_instance_urn, @@ -56,8 +58,10 @@ DataPlatformInstanceClass, DatasetPropertiesClass, KafkaSchemaClass, + OwnershipSourceTypeClass, SubTypesClass, ) +from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.registries.domain_registry import DomainRegistry logger = logging.getLogger(__name__) @@ -89,6 +93,29 @@ class KafkaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): default="datahub.ingestion.source.confluent_schema_registry.ConfluentSchemaRegistry", description="The fully qualified implementation class(custom) that implements the KafkaSchemaRegistryBase interface.", ) + schema_tags_field = pydantic.Field( + default="tags", + description="The field name in the schema metadata that contains the tags to be added to the dataset.", + ) + enable_meta_mapping = pydantic.Field( + default=True, + description="When enabled, applies the mappings that are defined through the meta_mapping directives.", + ) + meta_mapping: Dict = pydantic.Field( + default={}, + description="mapping rules that will be executed against top-level schema properties. Refer to the section below on meta automated mappings.", + ) + field_meta_mapping: Dict = pydantic.Field( + default={}, + description="mapping rules that will be executed against field-level schema properties. Refer to the section below on meta automated mappings.", + ) + strip_user_ids_from_email: bool = pydantic.Field( + default=False, + description="Whether or not to strip email id while adding owners using meta mappings.", + ) + tag_prefix: str = pydantic.Field( + default="", description="Prefix added to tags during ingestion." + ) ignore_warnings_on_schema_type: bool = pydantic.Field( default=False, description="Disables warnings reported for non-AVRO/Protobuf value or key schemas if set.", @@ -167,6 +194,14 @@ def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext): graph=self.ctx.graph, ) + self.meta_processor = OperationProcessor( + self.source_config.meta_mapping, + self.source_config.tag_prefix, + OwnershipSourceTypeClass.SERVICE, + self.source_config.strip_user_ids_from_email, + match_nested_props=True, + ) + def init_kafka_admin_client(self) -> None: try: # TODO: Do we require separate config than existing consumer_config ? @@ -227,7 +262,6 @@ def _extract_record( logger.debug(f"topic = {topic}") AVRO = "AVRO" - DOC_KEY = "doc" # 1. Create the default dataset snapshot for the topic. dataset_name = topic @@ -261,8 +295,8 @@ def _extract_record( topic, topic_detail, extra_topic_config ) - # 4. Set dataset's description as top level doc, if topic schema type is avro - description = None + # 4. Set dataset's description, tags, ownership, etc, if topic schema type is avro + description: Optional[str] = None if ( schema_metadata is not None and isinstance(schema_metadata.platformSchema, KafkaSchemaClass) @@ -271,9 +305,41 @@ def _extract_record( # Point to note: # In Kafka documentSchema and keySchema both contains "doc" field. # DataHub Dataset "description" field is mapped to documentSchema's "doc" field. - schema = json.loads(schema_metadata.platformSchema.documentSchema) - if isinstance(schema, dict): - description = schema.get(DOC_KEY) + + avro_schema = avro.schema.parse( + schema_metadata.platformSchema.documentSchema + ) + description = avro_schema.doc + # set the tags + all_tags: List[str] = [] + for tag in avro_schema.other_props.get( + self.source_config.schema_tags_field, [] + ): + all_tags.append(self.source_config.tag_prefix + tag) + + if self.source_config.enable_meta_mapping: + meta_aspects = self.meta_processor.process(avro_schema.other_props) + + meta_owners_aspects = meta_aspects.get(Constants.ADD_OWNER_OPERATION) + if meta_owners_aspects: + dataset_snapshot.aspects.append(meta_owners_aspects) + + meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION) + if meta_terms_aspect: + dataset_snapshot.aspects.append(meta_terms_aspect) + + # Create the tags aspect + meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION) + if meta_tags_aspect: + all_tags += [ + tag_association.tag[len("urn:li:tag:") :] + for tag_association in meta_tags_aspect.tags + ] + + if all_tags: + dataset_snapshot.aspects.append( + mce_builder.make_global_tag_aspect_with_tag_list(all_tags) + ) dataset_properties = DatasetPropertiesClass( name=topic, customProperties=custom_props, description=description diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index b3fa5e3401c07..f3344782917ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -901,6 +901,108 @@ def _extract_lineages(self): return +@dataclass +class SnowflakeSinkConnector: + connector_manifest: ConnectorManifest + report: KafkaConnectSourceReport + + def __init__( + self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport + ) -> None: + self.connector_manifest = connector_manifest + self.report = report + self._extract_lineages() + + @dataclass + class SnowflakeParser: + database_name: str + schema_name: str + topics_to_tables: Dict[str, str] + + def report_warning(self, key: str, reason: str) -> None: + logger.warning(f"{key}: {reason}") + self.report.report_warning(key, reason) + + def get_table_name_from_topic_name(self, topic_name: str) -> str: + """ + This function converts the topic name to a valid Snowflake table name using some rules. + Refer below link for more info + https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics + """ + table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name) + if re.match("^[^a-zA-Z_].*", table_name): + table_name = "_" + table_name + # Connector may append original topic's hash code as suffix for conflict resolution + # if generated table names for 2 topics are similar. This corner case is not handled here. + # Note that Snowflake recommends to choose topic names that follow the rules for + # Snowflake identifier names so this case is not recommended by snowflake. + return table_name + + def get_parser( + self, + connector_manifest: ConnectorManifest, + ) -> SnowflakeParser: + database_name = connector_manifest.config["snowflake.database.name"] + schema_name = connector_manifest.config["snowflake.schema.name"] + + # Fetch user provided topic to table map + provided_topics_to_tables: Dict[str, str] = {} + if connector_manifest.config.get("snowflake.topic2table.map"): + for each in connector_manifest.config["snowflake.topic2table.map"].split( + "," + ): + topic, table = each.split(":") + provided_topics_to_tables[topic.strip()] = table.strip() + + topics_to_tables: Dict[str, str] = {} + # Extract lineage for only those topics whose data ingestion started + for topic in connector_manifest.topic_names: + if topic in provided_topics_to_tables: + # If user provided which table to get mapped with this topic + topics_to_tables[topic] = provided_topics_to_tables[topic] + else: + # Else connector converts topic name to a valid Snowflake table name. + topics_to_tables[topic] = self.get_table_name_from_topic_name(topic) + + return self.SnowflakeParser( + database_name=database_name, + schema_name=schema_name, + topics_to_tables=topics_to_tables, + ) + + def _extract_lineages(self): + self.connector_manifest.flow_property_bag = self.connector_manifest.config + + # For all snowflake sink connector properties, refer below link + # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector + # remove private keys, secrets from properties + secret_properties = [ + "snowflake.private.key", + "snowflake.private.key.passphrase", + "value.converter.basic.auth.user.info", + ] + for k in secret_properties: + if k in self.connector_manifest.flow_property_bag: + del self.connector_manifest.flow_property_bag[k] + + lineages: List[KafkaConnectLineage] = list() + parser = self.get_parser(self.connector_manifest) + + for topic, table in parser.topics_to_tables.items(): + target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}" + lineages.append( + KafkaConnectLineage( + source_dataset=topic, + source_platform=KAFKA, + target_dataset=target_dataset, + target_platform="snowflake", + ) + ) + + self.connector_manifest.lineages = lineages + return + + @dataclass class ConfluentS3SinkConnector: connector_manifest: ConnectorManifest @@ -1130,6 +1232,12 @@ def get_connectors_manifest(self) -> List[ConnectorManifest]: connector_manifest = ConfluentS3SinkConnector( connector_manifest=connector_manifest, report=self.report ).connector_manifest + elif connector_manifest.config.get("connector.class").__eq__( + "com.snowflake.kafka.connector.SnowflakeSinkConnector" + ): + connector_manifest = SnowflakeSinkConnector( + connector_manifest=connector_manifest, report=self.report + ).connector_manifest else: self.report.report_dropped(connector_manifest.name) logger.warning( diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 0cf9932ba0878..a000c66a406c2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -746,7 +746,7 @@ def get_request(): # respect Retry-After sleep_time = error_response.headers.get("retry-after") if sleep_time is not None: - time.sleep(sleep_time) + time.sleep(float(sleep_time)) raise HTTPError429 raise http_error diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 0bc8bb17934f7..95f6444384408 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -51,15 +51,17 @@ class DatabaseId: database: str = Field( description="Database created from share in consumer account." ) - platform_instance: str = Field( - description="Platform instance of consumer snowflake account." + platform_instance: Optional[str] = Field( + default=None, + description="Platform instance of consumer snowflake account.", ) class SnowflakeShareConfig(ConfigModel): database: str = Field(description="Database from which share is created.") - platform_instance: str = Field( - description="Platform instance for snowflake account in which share is created." + platform_instance: Optional[str] = Field( + default=None, + description="Platform instance for snowflake account in which share is created.", ) consumers: Set[DatabaseId] = Field( @@ -247,10 +249,11 @@ def validate_shares( if shares: # Check: platform_instance should be present - assert current_platform_instance is not None, ( - "Did you forget to set `platform_instance` for current ingestion ? " - "It is required to use `platform_instance` when ingesting from multiple snowflake accounts." - ) + if current_platform_instance is None: + logger.info( + "It is advisable to use `platform_instance` when ingesting from multiple snowflake accounts, if they contain databases with same name. " + "Setting `platform_instance` allows distinguishing such databases without conflict and correctly ingest their metadata." + ) databases_included_in_share: List[DatabaseId] = [] databases_created_from_share: List[DatabaseId] = [] @@ -259,10 +262,11 @@ def validate_shares( shared_db = DatabaseId( share_details.database, share_details.platform_instance ) - assert all( - consumer.platform_instance != share_details.platform_instance - for consumer in share_details.consumers - ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake." + if current_platform_instance: + assert all( + consumer.platform_instance != share_details.platform_instance + for consumer in share_details.consumers + ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake." databases_included_in_share.append(shared_db) databases_created_from_share.extend(share_details.consumers) @@ -306,7 +310,11 @@ def inbounds(self) -> Dict[str, DatabaseId]: f"database {consumer.database} is created from inbound share {share_name}." ) inbounds[consumer.database] = share_details.source_database - break + if self.platform_instance: + break + # If not using platform_instance, any one of consumer databases + # can be the database from this instance. so we include all relevant + # databases in inbounds. else: logger.info( f"Skipping Share {share_name}, as it does not include current platform instance {self.platform_instance}", diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py index 6f7520bbf1988..dad0ce7b59ee1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py @@ -93,11 +93,15 @@ def report_missing_databases( db_names = [db.name for db in databases] missing_dbs = [db for db in inbounds + outbounds if db not in db_names] - if missing_dbs: + if missing_dbs and self.config.platform_instance: self.report_warning( "snowflake-shares", f"Databases {missing_dbs} were not ingested. Siblings/Lineage will not be set for these.", ) + elif missing_dbs: + logger.debug( + f"Databases {missing_dbs} were not ingested in this recipe.", + ) def gen_siblings( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index b5458a42192fc..112defe76d957 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -367,12 +367,12 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) ) def warn(self, log: logging.Logger, key: str, reason: str) -> None: - self.report.report_warning(key, reason) + self.report.report_warning(key, reason[:100]) log.warning(f"{key} => {reason}") def error(self, log: logging.Logger, key: str, reason: str) -> None: - self.report.report_failure(key, reason) - log.error(f"{key} => {reason}") + self.report.report_failure(key, reason[:100]) + log.error(f"{key} => {reason}\n{traceback.format_exc()}") def get_inspectors(self) -> Iterable[Inspector]: # This method can be overridden in the case that you want to dynamically @@ -528,10 +528,8 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit try: self.add_profile_metadata(inspector) except Exception as e: - logger.warning( - "Failed to get enrichment data for profiler", exc_info=True - ) - self.report.report_warning( + self.warn( + logger, "profile_metadata", f"Failed to get enrichment data for profile {e}", ) @@ -638,14 +636,9 @@ def loop_tables( # noqa: C901 dataset_name, inspector, schema, table, sql_config ) except Exception as e: - logger.warning( - f"Unable to ingest {schema}.{table} due to an exception.\n {traceback.format_exc()}" - ) - self.report.report_warning( - f"{schema}.{table}", f"Ingestion error: {e}" - ) + self.warn(logger, f"{schema}.{table}", f"Ingestion error: {e}") except Exception as e: - self.report.report_failure(f"{schema}", f"Tables error: {e}") + self.error(logger, f"{schema}", f"Tables error: {e}") def add_information_for_schema(self, inspector: Inspector, schema: str) -> None: pass @@ -806,9 +799,10 @@ def _get_columns( try: columns = inspector.get_columns(table, schema) if len(columns) == 0: - self.report.report_warning(MISSING_COLUMN_INFO, dataset_name) + self.warn(logger, MISSING_COLUMN_INFO, dataset_name) except Exception as e: - self.report.report_warning( + self.warn( + logger, dataset_name, f"unable to get column information due to an error -> {e}", ) @@ -903,14 +897,9 @@ def loop_views( sql_config=sql_config, ) except Exception as e: - logger.warning( - f"Unable to ingest view {schema}.{view} due to an exception.\n {traceback.format_exc()}" - ) - self.report.report_warning( - f"{schema}.{view}", f"Ingestion error: {e}" - ) + self.warn(logger, f"{schema}.{view}", f"Ingestion error: {e}") except Exception as e: - self.report.report_failure(f"{schema}", f"Views error: {e}") + self.error(logger, f"{schema}", f"Views error: {e}") def _process_view( self, @@ -924,9 +913,7 @@ def _process_view( columns = inspector.get_columns(view, schema) except KeyError: # For certain types of views, we are unable to fetch the list of columns. - self.report.report_warning( - dataset_name, "unable to get schema for this view" - ) + self.warn(logger, dataset_name, "unable to get schema for this view") schema_metadata = None else: schema_fields = self.get_schema_fields(dataset_name, columns) @@ -1112,7 +1099,8 @@ def loop_profiler_requests( if partition is None and self.is_table_partitioned( database=None, schema=schema, table=table ): - self.report.report_warning( + self.warn( + logger, "profile skipped as partitioned table is empty or partition id was invalid", dataset_name, ) diff --git a/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py b/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py new file mode 100644 index 0000000000000..1b66258557406 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py @@ -0,0 +1,43 @@ +import sys +from typing import List, Optional + +if sys.version_info < (3, 10): + from importlib_metadata import EntryPoint, entry_points +else: + from importlib.metadata import EntryPoint, entry_points + + +_CUSTOM_PACKAGE_GROUP_KEY = "datahub.custom_packages" + +_MODELS_KEY = "models" + + +class CustomPackageException(Exception): + pass + + +def _get_all_registered_custom_packages() -> List[EntryPoint]: + return list(entry_points(group=_CUSTOM_PACKAGE_GROUP_KEY)) + + +def _get_custom_package_for_name(name: str) -> Optional[str]: + entrypoints = [ + ep for ep in _get_all_registered_custom_packages() if ep.name == name + ] + + if not entrypoints: + return None + + if len(entrypoints) > 1: + all_package_options = [ + entrypoint.dist.name for entrypoint in entrypoints if entrypoint.dist + ] + raise CustomPackageException( + f"Multiple custom packages registered for {name}: cannot pick between {all_package_options}" + ) + + return entrypoints[0].value + + +def get_custom_models_package() -> Optional[str]: + return _get_custom_package_for_name(_MODELS_KEY) diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py index 8865254e88579..4fcef990ae4f4 100644 --- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py +++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py @@ -269,7 +269,7 @@ def get_schema_fields_for_hive_column( hive_column_name=hive_column_name, hive_column_type=hive_column_type ) schema_fields = avro_schema_to_mce_fields( - avro_schema_string=json.dumps(avro_schema_json), + avro_schema=json.dumps(avro_schema_json), default_nullable=default_nullable, swallow_exceptions=False, ) diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index 32666ceecdf85..793eccfb22c7e 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -1,6 +1,8 @@ import contextlib import logging +import operator import re +from functools import reduce from typing import Any, Dict, List, Match, Optional, Union from datahub.emitter import mce_builder @@ -94,11 +96,13 @@ def __init__( tag_prefix: str = "", owner_source_type: Optional[str] = None, strip_owner_email_id: bool = False, + match_nested_props: bool = False, ): self.operation_defs = operation_defs self.tag_prefix = tag_prefix self.strip_owner_email_id = strip_owner_email_id self.owner_source_type = owner_source_type + self.match_nested_props = match_nested_props def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]: # Defining the following local variables - @@ -121,9 +125,18 @@ def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]: ) if not operation_type or not operation_config: continue + raw_props_value = raw_props.get(operation_key) + if not raw_props_value and self.match_nested_props: + try: + raw_props_value = reduce( + operator.getitem, operation_key.split("."), raw_props + ) + except KeyError: + pass + maybe_match = self.get_match( self.operation_defs[operation_key][Constants.MATCH], - raw_props.get(operation_key), + raw_props_value, ) if maybe_match is not None: operation = self.get_operation_value( diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index d677b0874b985..f18235af3d1fd 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -14,6 +14,7 @@ import sqlglot.optimizer.qualify import sqlglot.optimizer.qualify_columns from pydantic import BaseModel +from typing_extensions import TypedDict from datahub.emitter.mce_builder import ( DEFAULT_ENV, @@ -36,6 +37,15 @@ SQL_PARSE_RESULT_CACHE_SIZE = 1000 +class GraphQLSchemaField(TypedDict): + fieldPath: str + nativeDataType: str + + +class GraphQLSchemaMetadata(TypedDict): + fields: List[GraphQLSchemaField] + + class QueryType(enum.Enum): CREATE = "CREATE" SELECT = "SELECT" @@ -330,6 +340,12 @@ def add_schema_metadata( def add_raw_schema_info(self, urn: str, schema_info: SchemaInfo) -> None: self._save_to_cache(urn, schema_info) + def add_graphql_schema_metadata( + self, urn: str, schema_metadata: GraphQLSchemaMetadata + ) -> None: + schema_info = self.convert_graphql_schema_metadata_to_info(schema_metadata) + self._save_to_cache(urn, schema_info) + def _save_to_cache(self, urn: str, schema_info: Optional[SchemaInfo]) -> None: self._schema_cache[urn] = schema_info @@ -356,6 +372,24 @@ def _convert_schema_aspect_to_info( not in DatasetUrn.get_simple_field_path_from_v2_field_path(col.fieldPath) } + @classmethod + def convert_graphql_schema_metadata_to_info( + cls, schema: GraphQLSchemaMetadata + ) -> SchemaInfo: + return { + DatasetUrn.get_simple_field_path_from_v2_field_path(field["fieldPath"]): ( + # The actual types are more of a "nice to have". + field["nativeDataType"] + or "str" + ) + for field in schema["fields"] + # TODO: We can't generate lineage to columns nested within structs yet. + if "." + not in DatasetUrn.get_simple_field_path_from_v2_field_path( + field["fieldPath"] + ) + } + # TODO add a method to load all from graphql def close(self) -> None: diff --git a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json new file mode 100644 index 0000000000000..76d49cebe5ae3 --- /dev/null +++ b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json @@ -0,0 +1,152 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "connector.class": "com.snowflake.kafka.connector.SnowflakeSinkConnector", + "snowflake.database.name": "kafka_db", + "snowflake.schema.name": "kafka_schema", + "snowflake.topic2table.map": "topic1:table1", + "tasks.max": "1", + "topics": "topic1,_topic+2", + "snowflake.user.name": "kafka_connector_user_1", + "name": "snowflake_sink1", + "snowflake.url.name": "bcaurux-lc62744.snowflakecomputing.com:443" + }, + "name": "snowflake_sink1", + "description": "Sink connector using `com.snowflake.kafka.connector.SnowflakeSinkConnector` plugin." + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": {}, + "name": "snowflake_sink1:topic1", + "type": { + "string": "COMMAND" + } + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:kafka,topic1,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,kafka_db.kafka_schema.table1,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": {}, + "name": "snowflake_sink1:_topic+2", + "type": { + "string": "COMMAND" + } + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:kafka,_topic+2,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,kafka_db.kafka_schema._topic_2,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-test" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py index 5f907bb05443c..48063908e624f 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py +++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py @@ -534,3 +534,103 @@ def test_kafka_connect_ingest_stateful( "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.mysql_source2,PROD),librarydb.member)", ] assert sorted(deleted_job_urns) == sorted(difference_job_urns) + + +def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: + api_vs_response = { + "http://localhost:28083": { + "method": "GET", + "status_code": 200, + "json": { + "version": "7.4.0-ccs", + "commit": "30969fa33c185e880b9e02044761dfaac013151d", + "kafka_cluster_id": "MDgRZlZhSZ-4fXhwRR79bw", + }, + }, + } + + api_vs_response.update(override_data) + + for url in api_vs_response.keys(): + request_mock.register_uri( + api_vs_response[url]["method"], + url, + json=api_vs_response[url]["json"], + status_code=api_vs_response[url]["status_code"], + ) + + +@freeze_time(FROZEN_TIME) +def test_kafka_connect_snowflake_sink_ingest( + pytestconfig, tmp_path, mock_time, requests_mock +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/kafka-connect" + override_data = { + "http://localhost:28083/connectors": { + "method": "GET", + "status_code": 200, + "json": ["snowflake_sink1"], + }, + "http://localhost:28083/connectors/snowflake_sink1": { + "method": "GET", + "status_code": 200, + "json": { + "name": "snowflake_sink1", + "config": { + "connector.class": "com.snowflake.kafka.connector.SnowflakeSinkConnector", + "snowflake.database.name": "kafka_db", + "snowflake.schema.name": "kafka_schema", + "snowflake.topic2table.map": "topic1:table1", + "tasks.max": "1", + "topics": "topic1,_topic+2", + "snowflake.user.name": "kafka_connector_user_1", + "snowflake.private.key": "rrSnqU=", + "name": "snowflake_sink1", + "snowflake.url.name": "bcaurux-lc62744.snowflakecomputing.com:443", + }, + "tasks": [{"connector": "snowflake_sink1", "task": 0}], + "type": "sink", + }, + }, + "http://localhost:28083/connectors/snowflake_sink1/topics": { + "method": "GET", + "status_code": 200, + "json": {"snowflake_sink1": {"topics": ["topic1", "_topic+2"]}}, + }, + } + + register_mock_api(request_mock=requests_mock, override_data=override_data) + + pipeline = Pipeline.create( + { + "run_id": "kafka-connect-test", + "source": { + "type": "kafka-connect", + "config": { + "platform_instance": "connect-instance-1", + "connect_uri": KAFKA_CONNECT_SERVER, + "connector_patterns": { + "allow": [ + "snowflake_sink1", + ] + }, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/kafka_connect_snowflake_sink_mces.json", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + golden_file = "kafka_connect_snowflake_sink_mces_golden.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "kafka_connect_snowflake_sink_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) diff --git a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json index e51eaa10b8b10..7dd328168e84c 100644 --- a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json +++ b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json @@ -86,7 +86,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -103,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -118,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -144,10 +147,10 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "hash": "cc452cf58242cdb9d09cf33d657497d8", + "hash": "a79a2fe3adab60b21d272a9cc3e93595", "platformSchema": { "com.linkedin.pegasus2avro.schema.KafkaSchema": { - "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\"},{\"name\":\"firstName\",\"type\":\"string\"},{\"name\":\"lastName\",\"type\":\"string\"}]}", + "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", "documentSchemaType": "AVRO", "keySchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}", "keySchemaType": "AVRO" @@ -188,7 +191,15 @@ }, "nativeDataType": "email", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Email\"]}" }, { "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName", @@ -200,7 +211,15 @@ }, "nativeDataType": "firstName", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" }, { "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName", @@ -212,7 +231,15 @@ }, "nativeDataType": "lastName", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" } ] } @@ -224,6 +251,15 @@ ] } }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [ + { + "tag": "urn:li:tag:PII" + } + ] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -246,7 +282,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -263,7 +300,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -280,7 +318,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -295,7 +334,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -321,10 +361,10 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "hash": "dc1cf32c2688cc3d2d27fe6e856f06d2", + "hash": "62c7c400ec5760797a59c45e59c2f2dc", "platformSchema": { "com.linkedin.pegasus2avro.schema.KafkaSchema": { - "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\"},{\"name\":\"firstName\",\"type\":\"string\"},{\"name\":\"lastName\",\"type\":\"string\"}]}", + "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", "documentSchemaType": "AVRO", "keySchema": "\"string\"", "keySchemaType": "AVRO" @@ -353,7 +393,15 @@ }, "nativeDataType": "email", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Email\"]}" }, { "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName", @@ -365,7 +413,15 @@ }, "nativeDataType": "firstName", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" }, { "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName", @@ -377,7 +433,15 @@ }, "nativeDataType": "lastName", "recursive": false, - "isPartOfKey": false + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" } ] } @@ -389,6 +453,15 @@ ] } }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [ + { + "tag": "urn:li:tag:PII" + } + ] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { @@ -411,7 +484,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -428,7 +502,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } }, { @@ -443,7 +518,56 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "kafka-test" + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Email", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Email" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Name", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Name" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:PII", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "PII" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/kafka/value_schema.avsc b/metadata-ingestion/tests/integration/kafka/value_schema.avsc index 788cb94c47a72..8cb6c42cb03f4 100644 --- a/metadata-ingestion/tests/integration/kafka/value_schema.avsc +++ b/metadata-ingestion/tests/integration/kafka/value_schema.avsc @@ -3,18 +3,22 @@ "type": "record", "name": "CreateUserRequest", "doc": "Value schema for kafka topic", + "tags": ["PII"], "fields": [ { "name": "email", - "type": "string" + "type": "string", + "tags": ["Email"] }, { "name": "firstName", - "type": "string" + "type": "string", + "tags": ["Name"] }, { "name": "lastName", - "type": "string" + "type": "string", + "tags": ["Name"] } ] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json new file mode 100644 index 0000000000000..10f5ee20b0c1f --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json @@ -0,0 +1,31 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,t1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,t2,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "a" + }, + "upstreams": [] + }, + { + "downstream": { + "table": null, + "column": "b" + }, + "upstreams": [] + }, + { + "downstream": { + "table": null, + "column": "c" + }, + "upstreams": [] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 7581d3bac010e..483c1ac4cc7f9 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -208,6 +208,16 @@ def test_select_from_union(): ) +def test_select_ambiguous_column_no_schema(): + assert_sql_result( + """ + select A, B, C from t1 inner join t2 on t1.id = t2.id + """, + dialect="hive", + expected_file=RESOURCE_DIR / "test_select_ambiguous_column_no_schema.json", + ) + + def test_merge_from_union(): # TODO: We don't support merge statements yet, but the union should still get handled. diff --git a/metadata-ingestion/tests/unit/test_kafka_source.py b/metadata-ingestion/tests/unit/test_kafka_source.py index b48ebf12ee37a..603068780d0a7 100644 --- a/metadata-ingestion/tests/unit/test_kafka_source.py +++ b/metadata-ingestion/tests/unit/test_kafka_source.py @@ -1,3 +1,4 @@ +import json from itertools import chain from typing import Dict, Optional, Tuple from unittest.mock import MagicMock, patch @@ -7,11 +8,17 @@ RegisteredSchema, Schema, ) +from freezegun import freeze_time from datahub.emitter.mce_builder import ( + OwnerType, make_dataplatform_instance_urn, make_dataset_urn, make_dataset_urn_with_platform_instance, + make_global_tag_aspect_with_tag_list, + make_glossary_terms_aspect_from_urn_list, + make_owner_urn, + make_ownership_aspect_from_urn_list, ) from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -20,7 +27,10 @@ from datahub.metadata.schema_classes import ( BrowsePathsClass, DataPlatformInstanceClass, + GlobalTagsClass, + GlossaryTermsClass, KafkaSchemaClass, + OwnershipClass, SchemaMetadataClass, ) @@ -521,3 +531,148 @@ def test_kafka_source_succeeds_with_describe_configs_error( mock_admin_client_instance.describe_configs.assert_called_once() assert len(workunits) == 2 + + +@freeze_time("2023-09-20 10:00:00") +@patch( + "datahub.ingestion.source.confluent_schema_registry.SchemaRegistryClient", + autospec=True, +) +@patch("datahub.ingestion.source.kafka.confluent_kafka.Consumer", autospec=True) +def test_kafka_source_topic_meta_mappings( + mock_kafka_consumer, mock_schema_registry_client, mock_admin_client +): + # Setup the topic to key/value schema mappings for all types of schema registry subject name strategies. + # ,) + topic_subject_schema_map: Dict[str, Tuple[RegisteredSchema, RegisteredSchema]] = { + "topic1": ( + RegisteredSchema( + schema_id="schema_id_2", + schema=Schema( + schema_str='{"type":"record", "name":"Topic1Key", "namespace": "test.acryl", "fields": [{"name":"t1key", "type": "string"}]}', + schema_type="AVRO", + ), + subject="topic1-key", + version=1, + ), + RegisteredSchema( + schema_id="schema_id_1", + schema=Schema( + schema_str=json.dumps( + { + "type": "record", + "name": "Topic1Value", + "namespace": "test.acryl", + "fields": [{"name": "t1value", "type": "string"}], + "owner": "@charles", + "business_owner": "jdoe.last@gmail.com", + "data_governance.team_owner": "Finance", + "has_pii": True, + "int_property": 1, + "double_property": 2.5, + } + ), + schema_type="AVRO", + ), + subject="topic1-value", + version=1, + ), + ) + } + + # Mock the kafka consumer + mock_kafka_instance = mock_kafka_consumer.return_value + mock_cluster_metadata = MagicMock() + mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map.keys()} + mock_kafka_instance.list_topics.return_value = mock_cluster_metadata + + # Mock the schema registry client + # - mock get_subjects: all subjects in topic_subject_schema_map + mock_schema_registry_client.return_value.get_subjects.return_value = [ + v.subject for v in chain(*topic_subject_schema_map.values()) + ] + + # - mock get_latest_version + def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: + for registered_schema in chain(*topic_subject_schema_map.values()): + if registered_schema.subject == subject_name: + return registered_schema + return None + + mock_schema_registry_client.return_value.get_latest_version = ( + mock_get_latest_version + ) + + ctx = PipelineContext(run_id="test1") + kafka_source = KafkaSource.create( + { + "connection": {"bootstrap": "localhost:9092"}, + "meta_mapping": { + "owner": { + "match": "^@(.*)", + "operation": "add_owner", + "config": {"owner_type": "user"}, + }, + "business_owner": { + "match": ".*", + "operation": "add_owner", + "config": {"owner_type": "user"}, + }, + "has_pii": { + "match": True, + "operation": "add_tag", + "config": {"tag": "has_pii_test"}, + }, + "int_property": { + "match": 1, + "operation": "add_tag", + "config": {"tag": "int_meta_property"}, + }, + "double_property": { + "match": 2.5, + "operation": "add_term", + "config": {"term": "double_meta_property"}, + }, + "data_governance.team_owner": { + "match": "Finance", + "operation": "add_term", + "config": {"term": "Finance_test"}, + }, + }, + }, + ctx, + ) + workunits = [w for w in kafka_source.get_workunits()] + assert len(workunits) == 4 + mce = workunits[0].metadata + assert isinstance(mce, MetadataChangeEvent) + + ownership_aspect = [ + asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, OwnershipClass) + ][0] + assert ownership_aspect == make_ownership_aspect_from_urn_list( + [ + make_owner_urn("charles", OwnerType.USER), + make_owner_urn("jdoe.last@gmail.com", OwnerType.USER), + ], + "SERVICE", + ) + + tags_aspect = [ + asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, GlobalTagsClass) + ][0] + assert tags_aspect == make_global_tag_aspect_with_tag_list( + ["has_pii_test", "int_meta_property"] + ) + + terms_aspect = [ + asp + for asp in mce.proposedSnapshot.aspects + if isinstance(asp, GlossaryTermsClass) + ][0] + assert terms_aspect == make_glossary_terms_aspect_from_urn_list( + [ + "urn:li:glossaryTerm:Finance_test", + "urn:li:glossaryTerm:double_meta_property", + ] + ) diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py index aea1d8ddd9a54..d69dd4a8a96b0 100644 --- a/metadata-ingestion/tests/unit/test_mapping.py +++ b/metadata-ingestion/tests/unit/test_mapping.py @@ -231,3 +231,51 @@ def test_operation_processor_advanced_matching_tags(): tag_aspect: GlobalTagsClass = aspect_map["add_tag"] assert len(tag_aspect.tags) == 1 assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567" + + +def test_operation_processor_matching_nested_props(): + raw_props = { + "gdpr": { + "pii": True, + }, + } + processor = OperationProcessor( + operation_defs={ + "gdpr.pii": { + "match": True, + "operation": "add_tag", + "config": {"tag": "pii"}, + }, + }, + owner_source_type="SOURCE_CONTROL", + match_nested_props=True, + ) + aspect_map = processor.process(raw_props) + assert "add_tag" in aspect_map + + tag_aspect: GlobalTagsClass = aspect_map["add_tag"] + assert len(tag_aspect.tags) == 1 + assert tag_aspect.tags[0].tag == "urn:li:tag:pii" + + +def test_operation_processor_matching_dot_props(): + raw_props = { + "gdpr.pii": True, + } + processor = OperationProcessor( + operation_defs={ + "gdpr.pii": { + "match": True, + "operation": "add_tag", + "config": {"tag": "pii"}, + }, + }, + owner_source_type="SOURCE_CONTROL", + match_nested_props=True, + ) + aspect_map = processor.process(raw_props) + assert "add_tag" in aspect_map + + tag_aspect: GlobalTagsClass = aspect_map["add_tag"] + assert len(tag_aspect.tags) == 1 + assert tag_aspect.tags[0].tag == "urn:li:tag:pii" diff --git a/metadata-ingestion/tests/unit/test_schema_util.py b/metadata-ingestion/tests/unit/test_schema_util.py index e81c335e178a2..0a111d700cf8c 100644 --- a/metadata-ingestion/tests/unit/test_schema_util.py +++ b/metadata-ingestion/tests/unit/test_schema_util.py @@ -6,7 +6,12 @@ from typing import Dict, List, Type import pytest +from freezegun import freeze_time +from datahub.emitter.mce_builder import ( + make_global_tag_aspect_with_tag_list, + make_glossary_terms_aspect_from_urn_list, +) from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.metadata.com.linkedin.pegasus2avro.schema import ( DateTypeClass, @@ -15,6 +20,7 @@ StringTypeClass, TimeTypeClass, ) +from datahub.utilities.mapping import OperationProcessor logger = logging.getLogger(__name__) @@ -771,3 +777,106 @@ def test_ignore_exceptions(): """ fields: List[SchemaField] = avro_schema_to_mce_fields(malformed_schema) assert not fields + + +@freeze_time("2023-09-12") +def test_avro_schema_to_mce_fields_with_field_meta_mapping(): + schema = """ +{ + "type": "record", + "name": "Payment", + "namespace": "some.event.namespace", + "fields": [ + {"name": "id", "type": "string"}, + {"name": "amount", "type": "double", "doc": "amountDoc","has_pii": "False"}, + {"name": "name","type": "string","default": "","has_pii": "True"}, + {"name": "phoneNumber", + "type": [{ + "type": "record", + "name": "PhoneNumber", + "doc": "testDoc", + "fields": [{ + "name": "areaCode", + "type": "string", + "doc": "areaCodeDoc", + "default": "" + }, { + "name": "countryCode", + "type": "string", + "default": "" + }, { + "name": "prefix", + "type": "string", + "default": "" + }, { + "name": "number", + "type": "string", + "default": "" + }] + }, + "null" + ], + "default": "null", + "has_pii": "True", + "glossary_field": "TERM_PhoneNumber" + }, + {"name": "address", + "type": [{ + "type": "record", + "name": "Address", + "fields": [{ + "name": "street", + "type": "string", + "default": "" + }] + }, + "null" + ], + "doc": "addressDoc", + "default": "null", + "has_pii": "True", + "glossary_field": "TERM_Address" + } + ] +} +""" + processor = OperationProcessor( + operation_defs={ + "has_pii": { + "match": "True", + "operation": "add_tag", + "config": {"tag": "has_pii_test"}, + }, + "glossary_field": { + "match": "TERM_(.*)", + "operation": "add_term", + "config": {"term": "{{ $match }}"}, + }, + } + ) + fields = avro_schema_to_mce_fields(schema, meta_mapping_processor=processor) + expected_field_paths = [ + "[version=2.0].[type=Payment].[type=string].id", + "[version=2.0].[type=Payment].[type=double].amount", + "[version=2.0].[type=Payment].[type=string].name", + "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber", + "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].areaCode", + "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].countryCode", + "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].prefix", + "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].number", + "[version=2.0].[type=Payment].[type=Address].address", + "[version=2.0].[type=Payment].[type=Address].address.[type=string].street", + ] + assert_field_paths_match(fields, expected_field_paths) + + pii_tag_aspect = make_global_tag_aspect_with_tag_list(["has_pii_test"]) + assert fields[1].globalTags is None + assert fields[2].globalTags == pii_tag_aspect + assert fields[3].globalTags == pii_tag_aspect + assert fields[3].glossaryTerms == make_glossary_terms_aspect_from_urn_list( + ["urn:li:glossaryTerm:PhoneNumber"] + ) + assert fields[8].globalTags == pii_tag_aspect + assert fields[8].glossaryTerms == make_glossary_terms_aspect_from_urn_list( + ["urn:li:glossaryTerm:Address"] + ) diff --git a/metadata-ingestion/tests/unit/test_snowflake_shares.py b/metadata-ingestion/tests/unit/test_snowflake_shares.py index 7de86139baf39..9e33ba6132e06 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_shares.py +++ b/metadata-ingestion/tests/unit/test_snowflake_shares.py @@ -231,6 +231,7 @@ def test_snowflake_shares_workunit_inbound_share( else: siblings_aspect = wu.get_aspect_of_type(Siblings) assert siblings_aspect is not None + assert not siblings_aspect.primary assert len(siblings_aspect.siblings) == 1 assert siblings_aspect.siblings == [ wu.get_urn().replace("instance1.db1", "instance2.db1") @@ -275,6 +276,7 @@ def test_snowflake_shares_workunit_outbound_share( for wu in wus: siblings_aspect = wu.get_aspect_of_type(Siblings) assert siblings_aspect is not None + assert siblings_aspect.primary assert len(siblings_aspect.siblings) == 2 assert siblings_aspect.siblings == [ wu.get_urn().replace("instance1.db2", "instance2.db2_from_share"), @@ -336,13 +338,85 @@ def test_snowflake_shares_workunit_inbound_and_outbound_share( siblings_aspect = wu.get_aspect_of_type(Siblings) assert siblings_aspect is not None if "db1" in wu.get_urn(): + assert not siblings_aspect.primary assert len(siblings_aspect.siblings) == 1 assert siblings_aspect.siblings == [ wu.get_urn().replace("instance1.db1", "instance2.db1") ] else: + assert siblings_aspect.primary assert len(siblings_aspect.siblings) == 2 assert siblings_aspect.siblings == [ wu.get_urn().replace("instance1.db2", "instance2.db2_from_share"), wu.get_urn().replace("instance1.db2", "instance3.db2"), ] + + +def test_snowflake_shares_workunit_inbound_and_outbound_share_no_platform_instance( + snowflake_databases: List[SnowflakeDatabase], +) -> None: + config = SnowflakeV2Config( + account_id="abc12345", + shares={ + "share1": SnowflakeShareConfig( + database="db1", + consumers=[ + DatabaseId(database="db1_from_share"), + DatabaseId(database="db1_other"), + ], + ), + "share2": SnowflakeShareConfig( + database="db2_main", + consumers=[ + DatabaseId(database="db2"), + DatabaseId(database="db2_other"), + ], + ), + }, + ) + + report = SnowflakeV2Report() + shares_handler = SnowflakeSharesHandler( + config, report, lambda x: make_snowflake_urn(x) + ) + + assert sorted(config.outbounds().keys()) == ["db1", "db2_main"] + assert sorted(config.inbounds().keys()) == [ + "db1_from_share", + "db1_other", + "db2", + "db2_other", + ] + wus = list(shares_handler.get_shares_workunits(snowflake_databases)) + + # 6 Sibling aspects for db1 tables + # 6 Sibling aspects and and 6 upstreamLineage for db2 tables + assert len(wus) == 18 + + for wu in wus: + assert isinstance( + wu.metadata, (MetadataChangeProposal, MetadataChangeProposalWrapper) + ) + if wu.metadata.aspectName == "upstreamLineage": + upstream_aspect = wu.get_aspect_of_type(UpstreamLineage) + assert upstream_aspect is not None + assert len(upstream_aspect.upstreams) == 1 + assert upstream_aspect.upstreams[0].dataset == wu.get_urn().replace( + "db2.", "db2_main." + ) + else: + siblings_aspect = wu.get_aspect_of_type(Siblings) + assert siblings_aspect is not None + if "db1" in wu.get_urn(): + assert siblings_aspect.primary + assert len(siblings_aspect.siblings) == 2 + assert siblings_aspect.siblings == [ + wu.get_urn().replace("db1.", "db1_from_share."), + wu.get_urn().replace("db1.", "db1_other."), + ] + else: + assert not siblings_aspect.primary + assert len(siblings_aspect.siblings) == 1 + assert siblings_aspect.siblings == [ + wu.get_urn().replace("db2.", "db2_main.") + ] diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index fc72fc4257491..95de3cdb3c526 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -49,6 +49,7 @@ dependencies { annotationProcessor externalDependency.lombok // VisibleForTesting compileOnly externalDependency.guava + testImplementation externalDependency.testngJava8 testImplementation externalDependency.mockito testImplementation externalDependency.mockServer testImplementation externalDependency.mockServerClient diff --git a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc index 6a723090fda07..64216636af26d 100644 --- a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc +++ b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc @@ -143,6 +143,11 @@ "type" : [ "string", "null" ], "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided" + }, { + "name" : "pipelineName", + "type" : [ "null", "string" ], + "doc" : "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion.", + "default" : null }, { "name" : "registryName", "type" : [ "null", "string" ], diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java index 748990752f45b..bbb8e532f1033 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java @@ -26,7 +26,7 @@ import datahub.protobuf.model.ProtobufField; import datahub.protobuf.visitors.ProtobufModelVisitor; import datahub.protobuf.visitors.VisitContext; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.Set; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java index 58e78435a43a5..3a00edca8284a 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java @@ -3,7 +3,7 @@ import com.google.protobuf.DescriptorProtos; import com.google.protobuf.ExtensionRegistry; import datahub.protobuf.model.ProtobufGraph; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java index 3696f5795e1f9..7c98077690d66 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java @@ -5,7 +5,7 @@ import com.google.protobuf.DescriptorProtos.FileDescriptorProto; import com.linkedin.schema.EnumType; import com.linkedin.schema.SchemaFieldDataType; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.Set; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java index a21acf7f6c113..543b815f7f72b 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java @@ -17,7 +17,7 @@ import com.linkedin.schema.SchemaMetadata; import com.linkedin.schema.StringType; import datahub.protobuf.ProtobufDataset; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.Arrays; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java index a7e6dd035160c..80ffafff3f451 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java @@ -1,7 +1,7 @@ package datahub.protobuf.model; import com.google.protobuf.DescriptorProtos.FileDescriptorSet; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.HashSet; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java index 035c16552aeb5..e961b6ffd2d61 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java @@ -5,7 +5,7 @@ import com.linkedin.schema.MapType; import com.linkedin.schema.RecordType; import com.linkedin.schema.SchemaFieldDataType; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.Set; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java index f9b168437643b..438e0a79206bd 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java @@ -6,7 +6,7 @@ import com.google.protobuf.DescriptorProtos.OneofDescriptorProto; import com.linkedin.schema.SchemaFieldDataType; import com.linkedin.schema.UnionType; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.List; import java.util.Set; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java index 9645c6b66ef5f..ceebefb3a207e 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java @@ -5,7 +5,7 @@ import datahub.protobuf.model.ProtobufElement; import datahub.protobuf.model.ProtobufGraph; import org.jgrapht.GraphPath; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java index 165823d8e4925..fb51f42a6c759 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.common.urn.DatasetUrn; import com.linkedin.data.template.RecordTemplate; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.net.URISyntaxException; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java index c5c20f8928ec3..4edc65b29d663 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java @@ -1,7 +1,7 @@ package datahub.protobuf.visitors.dataset; import datahub.protobuf.model.ProtobufGraph; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java index 0420953a647cb..b3fa2c8fd081b 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.common.urn.Urn; import datahub.protobuf.model.ProtobufGraph; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java index a313681c5a5a0..09fc0a3765436 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.common.InstitutionalMemoryMetadata; import com.linkedin.common.url.Url; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java index 84e7eb19f893b..971500b5f43a2 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.data.template.StringMap; import com.linkedin.dataset.DatasetProperties; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java index cf2649e86dc43..b087c683f9ffe 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java @@ -6,7 +6,7 @@ import com.linkedin.common.OwnershipType; import com.linkedin.common.urn.Urn; import datahub.protobuf.model.ProtobufGraph; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java index 2316416729bef..dc3647cdf34c8 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.data.template.StringMap; import com.linkedin.dataset.DatasetProperties; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java index 04fd52cf82e84..c140a798ef6e6 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java @@ -2,7 +2,7 @@ import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.urn.GlossaryTermUrn; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java index 0a1928310bfc2..57a8cf1d63cd2 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java @@ -15,7 +15,7 @@ import com.linkedin.schema.StringType; import com.linkedin.util.Pair; import datahub.protobuf.ProtobufDataset; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.net.URISyntaxException; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java index 6c855e70d7f37..1da29b5320637 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java @@ -7,7 +7,7 @@ import com.linkedin.schema.UnionType; import com.linkedin.util.Pair; import datahub.protobuf.ProtobufDataset; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java index 6fe1098f5e99a..84ab1312a7d8a 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java @@ -3,7 +3,7 @@ import com.linkedin.tag.TagProperties; import datahub.protobuf.visitors.tags.TagVisitor; import datahub.event.MetadataChangeProposalWrapper; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.List; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index a2c643516dce6..ad54cf6524398 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -31,7 +31,7 @@ dependencies { api externalDependency.datastaxOssCore api externalDependency.datastaxOssQueryBuilder api externalDependency.elasticSearchRest - api externalDependency.elasticSearchTransport + api externalDependency.elasticSearchJava implementation externalDependency.javatuples api externalDependency.javaxValidation runtimeOnly externalDependency.jna @@ -64,6 +64,7 @@ dependencies { testImplementation externalDependency.testContainers testImplementation externalDependency.testContainersJunit testImplementation externalDependency.testContainersElasticsearch + testImplementation externalDependency.testContainersOpenSearch testImplementation externalDependency.testContainersCassandra testImplementation externalDependency.lombok testImplementation externalDependency.springBootTest @@ -101,14 +102,20 @@ dependencies { } test { - // https://docs.gradle.org/current/userguide/performance.html - maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + doFirst { + // override, testng controlling parallelization + // increasing >1 will merely run all tests extra times + maxParallelForks = 1 + } + useTestNG() { + suites 'src/test/resources/testng.xml' + } testLogging.showStandardStreams = true testLogging.exceptionFormat = 'full' } tasks.withType(Test) { - enableAssertions = false + enableAssertions = false } project.compileJava { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java new file mode 100644 index 0000000000000..6b5a3d5bfb06e --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -0,0 +1,39 @@ +package com.linkedin.metadata.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.entity.client.EntityClientCache; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.metadata.entity.DeleteEntityService; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.event.EventProducer; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.LineageSearchService; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.client.CachingEntitySearchService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import lombok.Getter; + + +/** + * Java backed SystemEntityClient + */ +@Getter +public class SystemJavaEntityClient extends JavaEntityClient implements SystemEntityClient { + + private final EntityClientCache entityClientCache; + private final Authentication systemAuthentication; + + public SystemJavaEntityClient(EntityService entityService, DeleteEntityService deleteEntityService, + EntitySearchService entitySearchService, CachingEntitySearchService cachingEntitySearchService, + SearchService searchService, LineageSearchService lineageSearchService, + TimeseriesAspectService timeseriesAspectService, EventProducer eventProducer, + RestliEntityClient restliEntityClient, Authentication systemAuthentication, + EntityClientCacheConfig cacheConfig) { + super(entityService, deleteEntityService, entitySearchService, cachingEntitySearchService, searchService, + lineageSearchService, timeseriesAspectService, eventProducer, restliEntityClient); + this.systemAuthentication = systemAuthentication; + this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, systemAuthentication, cacheConfig); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 8df7a9600ca94..946931a54f4ec 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -45,15 +45,15 @@ import lombok.Value; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.*; @@ -297,12 +297,12 @@ private List getLineageRelationships(@Nonnull List ent // Get search query for given list of edges and source urns @VisibleForTesting - static QueryBuilder getQueryForLineage( - @Nonnull List urns, - @Nonnull List lineageEdges, - @Nonnull GraphFilters graphFilters, - @Nullable Long startTimeMillis, - @Nullable Long endTimeMillis) { + public static QueryBuilder getQueryForLineage( + @Nonnull List urns, + @Nonnull List lineageEdges, + @Nonnull GraphFilters graphFilters, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { BoolQueryBuilder query = QueryBuilders.boolQuery(); if (lineageEdges.isEmpty()) { return query; @@ -361,10 +361,10 @@ static QueryBuilder getQueryForLineage( * physically stored inside the Graph Store. */ @VisibleForTesting - static void addEdgeToPaths( - @Nonnull final Map existingPaths, - @Nonnull final Urn parentUrn, - @Nonnull final Urn childUrn) { + public static void addEdgeToPaths( + @Nonnull final Map existingPaths, + @Nonnull final Urn parentUrn, + @Nonnull final Urn childUrn) { // Collect all full-paths to this child node. This is what will be returned. UrnArrayArray pathsToParent = existingPaths.get(parentUrn); if (pathsToParent != null && pathsToParent.size() > 0) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java index 8d2fcaa857541..f8b0e8a291e7a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java @@ -10,11 +10,11 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.delete.DeleteRequest; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.reindex.BulkByScrollResponse; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.reindex.BulkByScrollResponse; import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.buildQuery; import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 346befca22559..02e36af343b07 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -45,8 +45,8 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.index.query.QueryBuilders; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.QueryBuilders; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java index 66422c5997d17..1df938f902e0f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java @@ -1,9 +1,9 @@ package com.linkedin.metadata.graph.elastic; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java index ea1f6cead80a9..6985ceb00afd2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java @@ -28,17 +28,17 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java index 402b579b13879..dc30d4c80abc0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java @@ -28,18 +28,18 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java index 6ef207dada497..0836c569ed5d1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java @@ -28,18 +28,18 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 32adce458770d..bf4dffe9e5fb8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -27,7 +27,7 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchResponse; +import org.opensearch.action.search.SearchResponse; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 14f67ddcbf337..10c2fd725dca9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -30,36 +30,36 @@ import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.http.client.config.RequestConfig; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksRequest; -import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; -import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions; -import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.GetAliasesResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.core.CountRequest; -import org.elasticsearch.client.indices.CreateIndexRequest; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.client.indices.GetIndexResponse; -import org.elasticsearch.client.indices.GetMappingsRequest; -import org.elasticsearch.client.indices.PutMappingRequest; -import org.elasticsearch.client.tasks.TaskSubmissionResponse; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.reindex.ReindexRequest; -import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.SortBuilders; -import org.elasticsearch.search.sort.SortOrder; -import org.elasticsearch.tasks.TaskInfo; +import org.opensearch.OpenSearchException; +import org.opensearch.action.admin.cluster.node.tasks.list.ListTasksRequest; +import org.opensearch.action.admin.indices.alias.IndicesAliasesRequest; +import org.opensearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions; +import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.GetAliasesResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.core.CountRequest; +import org.opensearch.client.indices.CreateIndexRequest; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.client.indices.GetIndexResponse; +import org.opensearch.client.indices.GetMappingsRequest; +import org.opensearch.client.indices.PutMappingRequest; +import org.opensearch.client.tasks.TaskSubmissionResponse; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.reindex.ReindexRequest; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.tasks.TaskInfo; @Slf4j @@ -117,7 +117,7 @@ public ESIndexBuilder(RestHighLevelClient searchClient, int numShards, int numRe RetryConfig config = RetryConfig.custom() .maxAttempts(Math.max(1, numRetries)) .waitDuration(Duration.ofSeconds(10)) - .retryOnException(e -> e instanceof ElasticsearchException) + .retryOnException(e -> e instanceof OpenSearchException) .failAfterMaxAttempts(true) .build(); @@ -153,7 +153,8 @@ public ReindexConfig buildReindexState(String indexName, Map map Settings currentSettings = _searchClient.indices() .getSettings(new GetSettingsRequest().indices(indexName), RequestOptions.DEFAULT) .getIndexToSettings() - .valuesIt() + .values() + .iterator() .next(); builder.currentSettings(currentSettings); @@ -170,6 +171,15 @@ public ReindexConfig buildReindexState(String indexName, Map map return builder.build(); } + /** + * Builds index with given name, mappings and settings + * Deprecated: Use the `buildIndex(ReindexConfig indexState) to enforce conventions via ReindexConfig class + * earlier in the process. + * @param indexName index name + * @param mappings ES mappings + * @param settings ES settings + * @throws IOException ES error + */ @Deprecated public void buildIndex(String indexName, Map mappings, Map settings) throws IOException { buildIndex(buildReindexState(indexName, mappings, settings)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java index a0c0bd85c04c6..4f5f2926d3da0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java @@ -10,7 +10,7 @@ import lombok.Getter; import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.common.settings.Settings; +import org.opensearch.common.settings.Settings; import java.util.List; import java.util.Map; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 3cb3c441afd68..5fd0a80d23c50 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -38,21 +38,21 @@ import lombok.Value; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.SortOrder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.terms.IncludeExclude; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.aggregations.bucket.terms.Terms; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortOrder; import static com.linkedin.metadata.utils.SearchUtil.filterSoftDeletedByDefault; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index f3864d99ba5e9..cbaf70ca22617 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -35,20 +35,20 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.Request; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.Response; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.core.CountRequest; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.search.SearchModule; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.core.CountRequest; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.LoggingDeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.search.SearchModule; +import org.opensearch.search.builder.SearchSourceBuilder; import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.registry.template.util.TemplateUtil.*; @@ -63,7 +63,7 @@ public class ESSearchDAO { private static final NamedXContentRegistry X_CONTENT_REGISTRY; static { - SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); + SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); } @@ -137,7 +137,7 @@ private AggregationMetadata transformAggregationMetadata(@Nonnull AggregationMet } @VisibleForTesting - SearchResult transformIndexIntoEntityName(SearchResult result) { + public SearchResult transformIndexIntoEntityName(SearchResult result) { return result.setMetadata(result.getMetadata().setAggregations(transformIndexIntoEntityName(result.getMetadata().getAggregations()))); } private ScrollResult transformIndexIntoEntityName(ScrollResult result) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index d95bbcf893628..e2bdea84eda0e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -11,8 +11,8 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; import static com.linkedin.metadata.utils.SearchUtil.*; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index f4be46e58f3b8..bba3a9fa4232d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -24,15 +24,15 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MultiMatchQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java index c0b1ac028e9d4..79c00fc7cdd20 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.support.IndicesOptions; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.support.IndicesOptions; public class PITAwareSearchRequest extends SearchRequest { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index b01c736ec23ae..ce88f31449c35 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -35,24 +35,24 @@ import com.linkedin.metadata.search.utils.ESUtils; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.common.lucene.search.function.CombineFunction; -import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; -import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.Operator; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.QueryStringQueryBuilder; -import org.elasticsearch.index.query.SimpleQueryStringBuilder; -import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder; -import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; -import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; -import org.elasticsearch.search.SearchModule; +import org.opensearch.common.lucene.search.function.CombineFunction; +import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; +import org.opensearch.common.lucene.search.function.FunctionScoreQuery; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.LoggingDeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.Operator; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; +import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; +import org.opensearch.search.SearchModule; import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.*; @@ -69,7 +69,7 @@ public class SearchQueryBuilder { } private static final NamedXContentRegistry X_CONTENT_REGISTRY; static { - SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); + SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); } @@ -135,7 +135,7 @@ private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQuery * @return A set of SearchFieldConfigs containing the searchable fields from the input entities. */ @VisibleForTesting - Set getStandardFields(@Nonnull Collection entitySpecs) { + public Set getStandardFields(@Nonnull Collection entitySpecs) { Set fields = new HashSet<>(); // Always present final float urnBoost = Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); @@ -168,7 +168,7 @@ Set getStandardFields(@Nonnull Collection entityS } @VisibleForTesting - Set getFieldsFromEntitySpec(EntitySpec entitySpec) { + public Set getFieldsFromEntitySpec(EntitySpec entitySpec) { Set fields = new HashSet<>(); List searchableFieldSpecs = entitySpec.getSearchableFieldSpecs(); for (SearchableFieldSpec fieldSpec : searchableFieldSpecs) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index dbd933d59d7f3..5fcc10b7af5cf 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -54,23 +54,23 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.common.text.Text; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.aggregations.Aggregation; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.Aggregations; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; -import org.elasticsearch.search.suggest.term.TermSuggestion; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.text.Text; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.aggregations.bucket.terms.Terms; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightField; +import org.opensearch.search.suggest.term.TermSuggestion; import static com.linkedin.metadata.search.utils.ESUtils.NAME_SUGGESTION; import static com.linkedin.metadata.search.utils.ESUtils.toFacetField; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java index 297453bdce517..be64df3179a9d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java @@ -2,11 +2,11 @@ import com.linkedin.metadata.utils.metrics.MetricUtils; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.DocWriteRequest; -import org.elasticsearch.action.bulk.BulkProcessor; -import org.elasticsearch.action.bulk.BulkRequest; -import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.support.WriteRequest; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.support.WriteRequest; import java.util.Arrays; import java.util.HashMap; @@ -76,7 +76,7 @@ private static String buildMetricName(DocWriteRequest.OpType opType, String stat public static String buildBulkRequestSummary(BulkRequest request) { return request.requests().stream().map(req -> String.format( "Failed to perform bulk request: index [%s], optype: [%s], type [%s], id [%s]", - req.index(), req.opType(), req.type(), req.id()) + req.index(), req.opType(), req.opType(), req.id()) ).collect(Collectors.joining(";")); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java index a7ece47a7f5d6..a1e5b363d8a78 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java @@ -7,19 +7,19 @@ import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.DocWriteRequest; -import org.elasticsearch.action.bulk.BackoffPolicy; -import org.elasticsearch.action.bulk.BulkProcessor; -import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.tasks.TaskSubmissionResponse; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.reindex.BulkByScrollResponse; -import org.elasticsearch.index.reindex.DeleteByQueryRequest; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BackoffPolicy; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.tasks.TaskSubmissionResponse; +import org.opensearch.common.Nullable; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.index.reindex.DeleteByQueryRequest; import java.io.Closeable; import java.io.IOException; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java index 1a63f2d4d0312..edcdf5654028c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java @@ -6,15 +6,15 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.delete.DeleteRequest; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.client.indices.GetIndexResponse; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.script.Script; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.client.indices.GetIndexResponse; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 12c081a5c25a6..9a7d9a1b4c420 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -16,21 +16,21 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.RequestOptions; +import org.opensearch.client.RequestOptions; import org.apache.commons.lang.StringUtils; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.builder.PointInTimeBuilder; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.ScoreSortBuilder; -import org.elasticsearch.search.sort.SortOrder; -import org.elasticsearch.search.suggest.SuggestBuilder; -import org.elasticsearch.search.suggest.SuggestBuilders; -import org.elasticsearch.search.suggest.SuggestionBuilder; -import org.elasticsearch.search.suggest.term.TermSuggestionBuilder; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.builder.PointInTimeBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.ScoreSortBuilder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.search.suggest.SuggestBuilder; +import org.opensearch.search.suggest.SuggestBuilders; +import org.opensearch.search.suggest.SuggestionBuilder; +import org.opensearch.search.suggest.term.TermSuggestionBuilder; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java index c7e8d0940c530..5eb03eb23d01a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java @@ -13,26 +13,26 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.delete.DeleteRequest; -import org.elasticsearch.action.delete.DeleteResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.tasks.GetTaskRequest; -import org.elasticsearch.client.tasks.GetTaskResponse; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.reindex.BulkByScrollResponse; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.PipelineAggregatorBuilders; -import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.BucketSortPipelineAggregationBuilder; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.SortOrder; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.tasks.GetTaskRequest; +import org.opensearch.client.tasks.GetTaskResponse; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.PipelineAggregatorBuilders; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.pipeline.BucketSortPipelineAggregationBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortOrder; import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index 3fcb62424853a..dd8e19861ccd2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -31,14 +31,14 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.tasks.GetTaskResponse; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilter; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.aggregations.metrics.ParsedMax; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.tasks.GetTaskResponse; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHits; +import org.opensearch.search.aggregations.bucket.filter.ParsedFilter; +import org.opensearch.search.aggregations.bucket.terms.ParsedStringTerms; +import org.opensearch.search.aggregations.bucket.terms.Terms; +import org.opensearch.search.aggregations.metrics.ParsedMax; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java index fee9cd9bca56e..a3e5a051a47e3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.timeline.eventgenerator; -import com.datahub.authentication.Authentication; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.dataprocess.DataProcessInstanceRelationships; @@ -8,7 +7,7 @@ import com.linkedin.dataprocess.DataProcessRunStatus; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeEvent; import com.linkedin.metadata.timeline.data.ChangeOperation; @@ -27,9 +26,8 @@ public class DataProcessInstanceRunEventChangeEventGenerator private static final String COMPLETED_STATUS = "COMPLETED"; private static final String STARTED_STATUS = "STARTED"; - public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final EntityClient entityClient, @Nonnull final - Authentication authentication) { - super(entityClient, authentication); + public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) { + super(entityClient); } @Override @@ -108,8 +106,8 @@ private DataProcessInstanceRelationships getRelationships(@Nonnull final String EntityResponse entityResponse; try { entityUrn = Urn.createFromString(entityUrnString); - entityResponse = _entityClient.getV2(DATA_PROCESS_INSTANCE_ENTITY_NAME, entityUrn, - Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME), _authentication); + entityResponse = _entityClient.getV2(entityUrn, + Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME)); } catch (Exception e) { return null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java index 7f6aa5e53268e..d5539ec3d3822 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java @@ -5,7 +5,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeEvent; @@ -19,16 +19,14 @@ */ public abstract class EntityChangeEventGenerator { // TODO: Add a check for supported aspects - protected EntityClient _entityClient; + protected SystemEntityClient _entityClient; protected Authentication _authentication; public EntityChangeEventGenerator() { } - public EntityChangeEventGenerator(@Nonnull final EntityClient entityClient, - @Nonnull final Authentication authentication) { + public EntityChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) { _entityClient = entityClient; - _authentication = authentication; } @Deprecated diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 01fe41718d7f0..43ba87f474d6a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -49,26 +49,26 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.client.Request; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.Response; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.core.CountRequest; -import org.elasticsearch.client.core.CountResponse; -import org.elasticsearch.client.tasks.TaskSubmissionResponse; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.SortBuilders; -import org.elasticsearch.search.sort.SortOrder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.core.CountRequest; +import org.opensearch.client.core.CountResponse; +import org.opensearch.client.tasks.TaskSubmissionResponse; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import static com.linkedin.metadata.Constants.*; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java index 6c5dbf2582c05..b0751a9c6f9ea 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java @@ -17,7 +17,7 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilder; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 5389d602ae5c1..316d25d1f37f4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -30,24 +30,24 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.Aggregations; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.PipelineAggregatorBuilders; -import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; -import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.elasticsearch.search.aggregations.metrics.ParsedCardinality; -import org.elasticsearch.search.aggregations.metrics.ParsedSum; -import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.ParsedBucketMetricValue; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.PipelineAggregatorBuilders; +import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.metrics.ParsedCardinality; +import org.opensearch.search.aggregations.metrics.ParsedSum; +import org.opensearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; +import org.opensearch.search.aggregations.pipeline.ParsedBucketMetricValue; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java index 46d08bc8887b9..54fb2bc8b1f65 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java @@ -39,7 +39,7 @@ public AspectUtilsTest() throws EntityRegistryException { @Test public void testAdditionalChanges() { - Database server = EbeanTestUtils.createTestServer(); + Database server = EbeanTestUtils.createTestServer(AspectUtilsTest.class.getSimpleName()); EbeanAspectDao aspectDao = new EbeanAspectDao(server); aspectDao.setConnectionValidated(true); EventProducer mockProducer = mock(EventProducer.class); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java deleted file mode 100644 index 327447341badf..0000000000000 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java +++ /dev/null @@ -1,153 +0,0 @@ -package com.linkedin.metadata; - -import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; -import com.linkedin.metadata.config.search.CustomConfiguration; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; -import com.linkedin.metadata.config.search.ExactMatchConfiguration; -import com.linkedin.metadata.config.search.PartialConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.WordGramConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; -import com.linkedin.metadata.models.registry.ConfigEntityRegistry; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.models.registry.EntityRegistryException; -import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; -import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; -import com.linkedin.metadata.version.GitVersion; -import java.util.Optional; -import org.apache.http.HttpHost; -import org.apache.http.impl.nio.reactor.IOReactorConfig; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestClientBuilder; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.boot.test.context.TestConfiguration; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Primary; -import org.springframework.context.annotation.Scope; -import org.testcontainers.elasticsearch.ElasticsearchContainer; - -import javax.annotation.Nonnull; - -import java.util.Map; - - -@TestConfiguration -public class ESTestConfiguration { - private static final int HTTP_PORT = 9200; - public static final int REFRESH_INTERVAL_SECONDS = 5; - - public static void syncAfterWrite(ESBulkProcessor bulkProcessor) throws InterruptedException { - bulkProcessor.flush(); - Thread.sleep(1000); - } - - @Bean - public SearchConfiguration searchConfiguration() { - SearchConfiguration searchConfiguration = new SearchConfiguration(); - searchConfiguration.setMaxTermBucketSize(20); - - ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration(); - exactMatchConfiguration.setExclusive(false); - exactMatchConfiguration.setExactFactor(10.0f); - exactMatchConfiguration.setWithPrefix(true); - exactMatchConfiguration.setPrefixFactor(6.0f); - exactMatchConfiguration.setCaseSensitivityFactor(0.7f); - exactMatchConfiguration.setEnableStructured(true); - - WordGramConfiguration wordGramConfiguration = new WordGramConfiguration(); - wordGramConfiguration.setTwoGramFactor(1.2f); - wordGramConfiguration.setThreeGramFactor(1.5f); - wordGramConfiguration.setFourGramFactor(1.8f); - - PartialConfiguration partialConfiguration = new PartialConfiguration(); - partialConfiguration.setFactor(0.4f); - partialConfiguration.setUrnFactor(0.5f); - - searchConfiguration.setExactMatch(exactMatchConfiguration); - searchConfiguration.setWordGram(wordGramConfiguration); - searchConfiguration.setPartial(partialConfiguration); - return searchConfiguration; - } - - @Bean - public CustomSearchConfiguration customSearchConfiguration() throws Exception { - CustomConfiguration customConfiguration = new CustomConfiguration(); - customConfiguration.setEnabled(true); - customConfiguration.setFile("search_config_builder_test.yml"); - return customConfiguration.resolve(new YAMLMapper()); - } - - @Scope("singleton") - @Bean(name = "testElasticsearchContainer") - @Nonnull - public ElasticsearchContainer elasticsearchContainer() { - ESTestUtils.ES_CONTAINER.start(); - return ESTestUtils.ES_CONTAINER; - } - - @Primary - @Scope("singleton") - @Bean(name = "elasticSearchRestHighLevelClient") - @Nonnull - public RestHighLevelClient getElasticsearchClient(@Qualifier("testElasticsearchContainer") ElasticsearchContainer esContainer) { - // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability - // within firewalled environments to override with an environment variable to point to the offline repository. - // A helper method to construct a standard rest client for Elastic search. - final RestClientBuilder builder = - RestClient.builder(new HttpHost( - "localhost", - esContainer.getMappedPort(HTTP_PORT), "http") - ).setHttpClientConfigCallback(httpAsyncClientBuilder -> - httpAsyncClientBuilder.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build())); - - builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder. - setConnectionRequestTimeout(30000)); - - return new RestHighLevelClient(builder); - } - - /* - Cannot use the factory class without circular dependencies - */ - @Primary - @Bean(name = "elasticSearchBulkProcessor") - @Nonnull - public ESBulkProcessor getBulkProcessor(@Qualifier("elasticSearchRestHighLevelClient") RestHighLevelClient searchClient) { - return ESBulkProcessor.builder(searchClient) - .async(true) - /* - * Force a refresh as part of this request. This refresh policy does not scale for high indexing or search throughput but is useful - * to present a consistent view to for indices with very low traffic. And it is wonderful for tests! - */ - .writeRequestRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) - .bulkRequestsLimit(10000) - .bulkFlushPeriod(REFRESH_INTERVAL_SECONDS - 1) - .retryInterval(1L) - .numRetries(1) - .build(); - } - - @Primary - @Bean(name = "elasticSearchIndexBuilder") - @Nonnull - protected ESIndexBuilder getIndexBuilder(@Qualifier("elasticSearchRestHighLevelClient") RestHighLevelClient searchClient) { - GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); - return new ESIndexBuilder(searchClient, 1, 1, 3, 1, Map.of(), - false, false, - new ElasticSearchConfiguration(), gitVersion); - } - - @Bean(name = "entityRegistry") - public EntityRegistry entityRegistry() throws EntityRegistryException { - return new ConfigEntityRegistry( - ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); - } - - @Bean(name = "longTailEntityRegistry") - public EntityRegistry longTailEntityRegistry() throws EntityRegistryException { - return new ConfigEntityRegistry( - ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); - } -} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java index 180166e963fca..c6eefede8a860 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java @@ -2,7 +2,7 @@ import io.ebean.Database; import io.ebean.DatabaseFactory; -import io.ebean.config.ServerConfig; +import io.ebean.config.DatabaseConfig; import io.ebean.datasource.DataSourceConfig; import javax.annotation.Nonnull; @@ -13,19 +13,19 @@ private EbeanTestUtils() { } @Nonnull - public static Database createTestServer() { - return DatabaseFactory.create(createTestingH2ServerConfig()); + public static Database createTestServer(String instanceId) { + return DatabaseFactory.create(createTestingH2ServerConfig(instanceId)); } @Nonnull - private static ServerConfig createTestingH2ServerConfig() { + private static DatabaseConfig createTestingH2ServerConfig(String instanceId) { DataSourceConfig dataSourceConfig = new DataSourceConfig(); dataSourceConfig.setUsername("tester"); dataSourceConfig.setPassword(""); - dataSourceConfig.setUrl("jdbc:h2:mem:test;IGNORECASE=TRUE;mode=mysql;"); + dataSourceConfig.setUrl(String.format("jdbc:h2:mem:%s;IGNORECASE=TRUE;mode=mysql;", instanceId)); dataSourceConfig.setDriver("org.h2.Driver"); - ServerConfig serverConfig = new ServerConfig(); + DatabaseConfig serverConfig = new DatabaseConfig(); serverConfig.setName("gma"); serverConfig.setDataSourceConfig(dataSourceConfig); serverConfig.setDdlGenerate(true); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java index 154131ceb6fee..10a73cbe532a2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.elasticsearch.update; import com.linkedin.metadata.search.elasticsearch.update.BulkListener; -import org.elasticsearch.action.bulk.BulkRequest; -import org.elasticsearch.action.support.WriteRequest; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.support.WriteRequest; import org.mockito.Mockito; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java index 5c882e5158f90..2d84c9f3444de 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.elasticsearch.update; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.mockito.Mockito; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java index 9e453e6e75677..38b2ed4ed199a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java @@ -22,7 +22,7 @@ public EbeanAspectMigrationsDaoTest() throws EntityRegistryException { @BeforeMethod public void setupTest() { - Database server = EbeanTestUtils.createTestServer(); + Database server = EbeanTestUtils.createTestServer(EbeanAspectMigrationsDaoTest.class.getSimpleName()); _mockProducer = mock(EventProducer.class); EbeanAspectDao dao = new EbeanAspectDao(server); dao.setConnectionValidated(true); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 90f9baa4ca4c2..e8a7d8740d328 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -22,7 +22,7 @@ import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; -import io.datahub.test.DataGenerator; +import io.datahubproject.test.DataGenerator; import io.ebean.Database; import io.ebean.Transaction; import io.ebean.TxScope; @@ -61,7 +61,8 @@ public EbeanEntityServiceTest() throws EntityRegistryException { @BeforeMethod public void setupTest() { - Database server = EbeanTestUtils.createTestServer(); + Database server = EbeanTestUtils.createTestServer(EbeanEntityServiceTest.class.getSimpleName()); + _mockProducer = mock(EventProducer.class); _aspectDao = new EbeanAspectDao(server); @@ -239,6 +240,7 @@ public void testNestedTransactions() throws AssertionError { System.out.println("done"); } + @Test public void dataGeneratorThreadingTest() { DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl); @@ -262,7 +264,7 @@ public void dataGeneratorThreadingTest() { * This test is designed to detect multi-threading persistence exceptions like duplicate key, * exceptions that exceed retry limits or unnecessary versions. */ - @Test + @Test // ensure same thread as h2 public void multiThreadingTest() { DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl); Database server = ((EbeanAspectDao) _entityServiceImpl._aspectDao).getServer(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java similarity index 98% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java index 3ba2c858fb1a3..baed3ade0d207 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.graph.elastic; +package com.linkedin.metadata.graph.search; import com.google.common.collect.ImmutableList; import com.google.common.io.Resources; @@ -8,6 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.graph.GraphFilters; +import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.query.filter.RelationshipDirection; import java.net.URL; @@ -16,7 +17,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.elasticsearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java similarity index 93% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index 1717e466359d3..0ce43c9d31571 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -1,12 +1,11 @@ -package com.linkedin.metadata.graph.elastic; +package com.linkedin.metadata.graph.search; -import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.TagUrn; import com.linkedin.common.urn.Urn; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; @@ -14,6 +13,9 @@ import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.graph.RelatedEntity; +import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; +import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO; +import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.filter.Filter; @@ -23,18 +25,17 @@ import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; -import java.util.Arrays; -import java.util.Collections; -import org.elasticsearch.client.RestHighLevelClient; +import io.datahubproject.test.search.SearchTestUtils; import org.junit.Assert; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; +import org.opensearch.client.RestHighLevelClient; import org.testng.SkipException; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import javax.annotation.Nonnull; +import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; @@ -43,15 +44,16 @@ import static com.linkedin.metadata.search.utils.QueryUtils.*; import static org.testng.Assert.assertEquals; -@Import(ESTestConfiguration.class) -public class ElasticSearchGraphServiceTest extends GraphServiceTestBase { +abstract public class SearchGraphServiceTestBase extends GraphServiceTestBase { - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); private final IndexConvention _indexConvention = new IndexConventionImpl(null); private final String _indexName = _indexConvention.getIndexName(INDEX_NAME); @@ -74,10 +76,10 @@ public void wipe() throws Exception { @Nonnull private ElasticSearchGraphService buildService() { LineageRegistry lineageRegistry = new LineageRegistry(SnapshotEntityRegistry.getInstance()); - ESGraphQueryDAO readDAO = new ESGraphQueryDAO(_searchClient, lineageRegistry, _indexConvention, GraphQueryConfiguration.testDefaults); - ESGraphWriteDAO writeDAO = new ESGraphWriteDAO(_indexConvention, _bulkProcessor, 1); - return new ElasticSearchGraphService(lineageRegistry, _bulkProcessor, _indexConvention, writeDAO, readDAO, - _esIndexBuilder); + ESGraphQueryDAO readDAO = new ESGraphQueryDAO(getSearchClient(), lineageRegistry, _indexConvention, GraphQueryConfiguration.testDefaults); + ESGraphWriteDAO writeDAO = new ESGraphWriteDAO(_indexConvention, getBulkProcessor(), 1); + return new ElasticSearchGraphService(lineageRegistry, getBulkProcessor(), _indexConvention, writeDAO, readDAO, + getIndexBuilder()); } @Override @@ -88,7 +90,7 @@ protected GraphService getGraphService() { @Override protected void syncAfterWrite() throws Exception { - ESTestConfiguration.syncAfterWrite(_bulkProcessor); + SearchTestUtils.syncAfterWrite(getBulkProcessor()); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java similarity index 82% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java index 988a7ccc70741..989f9ae197239 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java @@ -1,9 +1,11 @@ -package com.linkedin.metadata.graph.elastic; +package com.linkedin.metadata.graph.search; import com.google.common.io.Resources; import java.net.URL; import java.nio.charset.StandardCharsets; -import org.elasticsearch.index.query.QueryBuilder; + +import com.linkedin.metadata.graph.elastic.TimeFilterUtils; +import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java new file mode 100644 index 0000000000000..7b550311bf823 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java @@ -0,0 +1,49 @@ +package com.linkedin.metadata.graph.search.elasticsearch; + +import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchSuite; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; + +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) +public class SearchGraphServiceElasticSearchTest extends SearchGraphServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } + +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java new file mode 100644 index 0000000000000..eabfb523fb910 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java @@ -0,0 +1,48 @@ +package com.linkedin.metadata.graph.search.opensearch; + +import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.search.opensearch.OpenSearchSuite; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) +public class SearchGraphServiceOpenSearchTest extends SearchGraphServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } + +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java similarity index 94% rename from metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index faff9f780e31c..461a146022446 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -1,8 +1,5 @@ package com.linkedin.metadata.search; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -16,8 +13,10 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.data.template.LongMap; -import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.TestEntityUtil; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; @@ -47,47 +46,60 @@ import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import org.elasticsearch.client.RestHighLevelClient; import org.junit.Assert; import org.mockito.Mockito; -import org.springframework.beans.factory.annotation.Autowired; +import org.opensearch.client.RestHighLevelClient; import org.springframework.cache.CacheManager; import org.springframework.cache.concurrent.ConcurrentMapCacheManager; -import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.*; -import static org.mockito.ArgumentMatchers.*; -import static org.mockito.Mockito.*; -import static org.testng.Assert.*; - -@Import(ESTestConfiguration.class) -public class LineageSearchServiceTest extends AbstractTestNGSpringContextTests { - - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; - @Autowired - private SearchConfiguration _searchConfiguration; - @Autowired - private CustomSearchConfiguration _customSearchConfiguration; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +abstract public class LineageServiceTestBase extends AbstractTestNGSpringContextTests { + + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); + + @Nonnull + abstract protected SearchConfiguration getSearchConfiguration(); + + @Nonnull + abstract protected CustomSearchConfiguration getCustomSearchConfiguration(); private EntityRegistry _entityRegistry; private IndexConvention _indexConvention; @@ -142,18 +154,18 @@ private void resetService(boolean withCache, boolean withLightingCache) { public void wipe() throws Exception { _elasticSearchService.clear(); clearCache(false); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); } @Nonnull private ElasticSearchService buildEntitySearchService() { EntityIndexBuilders indexBuilders = - new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, + new EntityIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); - ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration); - ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1); + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); + ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); + ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } @@ -198,7 +210,7 @@ public void testSearchService() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList())); @@ -232,7 +244,7 @@ public void testSearchService() throws Exception { document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = searchAcrossLineage(null, TEST1); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -306,7 +318,7 @@ public void testSearchService() throws Exception { // Cleanup _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())).thenReturn( @@ -350,7 +362,7 @@ public void testScrollAcrossLineage() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList())); @@ -383,7 +395,7 @@ public void testScrollAcrossLineage() throws Exception { // Cleanup _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())).thenReturn( @@ -424,7 +436,7 @@ public void testLightningSearchService() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList())); @@ -461,7 +473,7 @@ public void testLightningSearchService() throws Exception { document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = searchAcrossLineage(null, testStar); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -616,7 +628,7 @@ public void testLightningSearchService() throws Exception { // Cleanup _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), anyInt())).thenReturn( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java similarity index 92% rename from metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index ad836664d7f6d..c0144d36843f5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -1,7 +1,5 @@ package com.linkedin.metadata.search; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -9,7 +7,8 @@ import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; @@ -33,11 +32,9 @@ import com.linkedin.metadata.search.ranker.SimpleRanker; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; +import org.opensearch.client.RestHighLevelClient; import org.springframework.cache.CacheManager; import org.springframework.cache.concurrent.ConcurrentMapCacheManager; -import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; @@ -45,23 +42,28 @@ import javax.annotation.Nonnull; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite; +import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; -@Import(ESTestConfiguration.class) -public class SearchServiceTest extends AbstractTestNGSpringContextTests { - - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; - @Autowired - private SearchConfiguration _searchConfiguration; - @Autowired - private CustomSearchConfiguration _customSearchConfiguration; + +abstract public class SearchServiceTestBase extends AbstractTestNGSpringContextTests { + + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); + + @Nonnull + abstract protected SearchConfiguration getSearchConfiguration(); + + @Nonnull + abstract protected CustomSearchConfiguration getCustomSearchConfiguration(); + private EntityRegistry _entityRegistry; private IndexConvention _indexConvention; private SettingsBuilder _settingsBuilder; @@ -100,19 +102,19 @@ private void resetSearchService() { @BeforeMethod public void wipe() throws Exception { _elasticSearchService.clear(); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); } @Nonnull private ElasticSearchService buildEntitySearchService() { EntityIndexBuilders indexBuilders = - new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, + new EntityIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); - ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration); - ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, - _bulkProcessor, 1); + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); + ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); + ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, + getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } @@ -139,7 +141,7 @@ public void testSearchService() throws Exception { document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); @@ -154,7 +156,7 @@ public void testSearchService() throws Exception { document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "'test2'", null, null, 0, 10, new SearchFlags().setFulltext(true)); @@ -167,7 +169,7 @@ public void testSearchService() throws Exception { _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "'test2'", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); @@ -233,7 +235,7 @@ public void testAdvancedSearchOr() throws Exception { document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, new SearchFlags().setFulltext(true)); @@ -304,7 +306,7 @@ public void testAdvancedSearchSoftDelete() throws Exception { document.set("removed", JsonNodeFactory.instance.booleanNode(false)); _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, new SearchFlags().setFulltext(true)); @@ -369,7 +371,7 @@ public void testAdvancedSearchNegated() throws Exception { document.set("removed", JsonNodeFactory.instance.booleanNode(false)); _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, new SearchFlags().setFulltext(true)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java similarity index 86% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index 9a6d2dc6fc1fa..d358c03c612d0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -1,19 +1,18 @@ -package com.linkedin.metadata.search.elasticsearch; +package com.linkedin.metadata.search; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; -import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.browse.BrowseResult; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.SearchFlags; -import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; @@ -23,10 +22,7 @@ import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; -import java.util.List; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; +import org.opensearch.client.RestHighLevelClient; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.annotations.BeforeClass; @@ -34,24 +30,28 @@ import org.testng.annotations.Test; import javax.annotation.Nonnull; +import java.util.List; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite; +import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; -@Import(ESTestConfiguration.class) -public class ElasticSearchServiceTest extends AbstractTestNGSpringContextTests { +abstract public class TestEntityTestBase extends AbstractTestNGSpringContextTests { - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; - @Autowired - private SearchConfiguration _searchConfiguration; - @Autowired - private CustomSearchConfiguration _customSearchConfiguration; + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); + + @Nonnull + abstract protected SearchConfiguration getSearchConfiguration(); + + @Nonnull + abstract protected CustomSearchConfiguration getCustomSearchConfiguration(); private EntityRegistry _entityRegistry; private IndexConvention _indexConvention; @@ -83,12 +83,12 @@ public void wipe() throws Exception { @Nonnull private ElasticSearchService buildService() { EntityIndexBuilders indexBuilders = - new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, _indexConvention, _settingsBuilder); - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); - ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration); + new EntityIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); + ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); ESWriteDAO writeDAO = - new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1); + new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } @@ -109,7 +109,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -134,7 +134,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -152,7 +152,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); @@ -174,7 +174,7 @@ public void testElasticSearchServiceFulltext() throws Exception { document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -191,7 +191,7 @@ public void testElasticSearchServiceFulltext() throws Exception { document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); @@ -203,7 +203,7 @@ public void testElasticSearchServiceFulltext() throws Exception { _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java new file mode 100644 index 0000000000000..750423a024dcc --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java @@ -0,0 +1,32 @@ +package com.linkedin.metadata.search.elasticsearch; + +import io.datahubproject.test.search.ElasticsearchTestContainer; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testcontainers.containers.GenericContainer; +import org.testng.annotations.AfterSuite; + + +@TestConfiguration +public class ElasticSearchSuite extends AbstractTestNGSpringContextTests { + + private static final ElasticsearchTestContainer ELASTICSEARCH_TEST_CONTAINER; + private static GenericContainer container; + static { + ELASTICSEARCH_TEST_CONTAINER = new ElasticsearchTestContainer(); + } + + @AfterSuite + public void after() { + ELASTICSEARCH_TEST_CONTAINER.stopContainer(); + } + + @Bean(name = "testSearchContainer") + public GenericContainer testSearchContainer() { + if (container == null) { + container = ELASTICSEARCH_TEST_CONTAINER.startContainer(); + } + return container; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java new file mode 100644 index 0000000000000..cfacd4c15409a --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.search.fixtures.GoldenTestBase; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + +@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class GoldenElasticSearchTest extends GoldenTestBase { + + @Autowired + @Qualifier("longTailSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("entityRegistry") + private EntityRegistry entityRegistry; + + + @NotNull + @Override + protected EntityRegistry getEntityRegistry() { + return entityRegistry; + } + + @NotNull + @Override + protected SearchService getSearchService() { + return searchService; + } + + @Test + public void initTest() { + assertNotNull(searchService); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java new file mode 100644 index 0000000000000..20f4ee52f0e62 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java @@ -0,0 +1,30 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) +public class IndexBuilderElasticSearchTest extends IndexBuilderTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @Test + public void initTest() { + assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java new file mode 100644 index 0000000000000..0cb49bc555421 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase; +import io.datahubproject.test.fixtures.search.SearchLineageFixtureConfiguration; +import com.linkedin.metadata.search.LineageSearchService; +import com.linkedin.metadata.search.SearchService; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + + +@Import({ElasticSearchSuite.class, SearchLineageFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class LineageDataFixtureElasticSearchTest extends LineageDataFixtureTestBase { + + @Autowired + @Qualifier("searchLineageSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("searchLineageLineageSearchService") + protected LineageSearchService lineageService; + + @NotNull + @Override + protected LineageSearchService getLineageService() { + return lineageService; + } + + @NotNull + @Override + protected SearchService getSearchService() { + return searchService; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(lineageService); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java new file mode 100644 index 0000000000000..613ec5a26ff66 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java @@ -0,0 +1,66 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.search.LineageServiceTestBase; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + + +@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class LineageServiceElasticSearchTest extends LineageServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java new file mode 100644 index 0000000000000..855f46d239118 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java @@ -0,0 +1,45 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; + +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import lombok.Getter; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +/** + * Runs sample data fixture tests for Elasticsearch test container + */ +@Getter +@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class SampleDataFixtureElasticSearchTest extends SampleDataFixtureTestBase { + @Autowired + private RestHighLevelClient searchClient; + + @Autowired + @Qualifier("sampleDataSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("sampleDataEntityClient") + protected EntityClient entityClient; + + @Autowired + @Qualifier("entityRegistry") + private EntityRegistry entityRegistry; + + @Test + public void initTest() { + assertNotNull(searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java new file mode 100644 index 0000000000000..1a6a20cd9df9d --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.search.query.SearchDAOTestBase; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; + +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import lombok.Getter; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; + +import org.springframework.beans.factory.annotation.Qualifier; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +@Getter +@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class SearchDAOElasticSearchTest extends SearchDAOTestBase { + @Autowired + private RestHighLevelClient searchClient; + @Autowired + private SearchConfiguration searchConfiguration; + @Autowired + @Qualifier("sampleDataIndexConvention") + IndexConvention indexConvention; + + @Test + public void initTest() { + assertNotNull(searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java new file mode 100644 index 0000000000000..a9e9feac28007 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java @@ -0,0 +1,65 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.search.SearchServiceTestBase; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class SearchServiceElasticSearchTest extends SearchServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java new file mode 100644 index 0000000000000..7365887fb9b2e --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java @@ -0,0 +1,47 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + + +@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) +public class SystemMetadataServiceElasticSearchTest extends SystemMetadataServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java new file mode 100644 index 0000000000000..bec610b20dca1 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java @@ -0,0 +1,65 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.TestEntityTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class TestEntityElasticSearchTest extends TestEntityTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java new file mode 100644 index 0000000000000..5b85904edc923 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java @@ -0,0 +1,46 @@ +package com.linkedin.metadata.search.elasticsearch; + +import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class}) +public class TimeseriesAspectServiceElasticSearchTest extends TimeseriesAspectServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java deleted file mode 100644 index b506051e9bb5d..0000000000000 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java +++ /dev/null @@ -1,312 +0,0 @@ -package com.linkedin.metadata.search.elasticsearch.query; - -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.datahub.test.Snapshot; -import com.google.common.collect.ImmutableList; -import com.linkedin.data.template.LongMap; -import com.linkedin.data.template.StringArray; -import com.linkedin.metadata.ESSampleDataFixture; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; -import com.linkedin.metadata.query.filter.Condition; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.CriterionArray; -import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.search.AggregationMetadata; -import com.linkedin.metadata.search.AggregationMetadataArray; -import com.linkedin.metadata.search.FilterValueArray; -import com.linkedin.metadata.search.SearchEntityArray; -import com.linkedin.metadata.search.SearchResult; -import com.linkedin.metadata.search.SearchResultMetadata; -import com.linkedin.metadata.utils.SearchUtil; -import com.linkedin.metadata.utils.elasticsearch.IndexConvention; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; -import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; -import org.testng.annotations.Test; - -import com.linkedin.metadata.query.filter.Criterion; -import org.springframework.beans.factory.annotation.Qualifier; - -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.utils.SearchUtil.*; -import static org.testng.Assert.*; - - -@Import(ESSampleDataFixture.class) -public class ESSearchDAOTest extends AbstractTestNGSpringContextTests { - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private SearchConfiguration _searchConfiguration; - @Autowired - @Qualifier("sampleDataIndexConvention") - IndexConvention _indexConvention; - EntityRegistry _entityRegistry = new SnapshotEntityRegistry(new Snapshot()); - - - - @Test - public void testTransformFilterForEntitiesNoChange() { - Criterion c = new Criterion().setValue("urn:li:tag:abc").setValues( - new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword"); - - Filter f = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); - - Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention); - assertEquals(f, transformedFilter); - } - - @Test - public void testTransformFilterForEntitiesNullFilter() { - Filter transformedFilter = SearchUtil.transformFilterForEntities(null, _indexConvention); - assertNotNull(_indexConvention); - assertEquals(null, transformedFilter); - } - - @Test - public void testTransformFilterForEntitiesWithChanges() { - - Criterion c = new Criterion().setValue("dataset").setValues( - new StringArray(ImmutableList.of("dataset")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); - - Filter f = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); - Filter originalF = null; - try { - originalF = f.copy(); - } catch (CloneNotSupportedException e) { - fail(e.getMessage()); - } - assertEquals(f, originalF); - - Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention); - assertNotEquals(originalF, transformedFilter); - - Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues( - new StringArray(ImmutableList.of("smpldat_datasetindex_v2")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); - - Filter expectedNewFilter = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion)))); - - assertEquals(expectedNewFilter, transformedFilter); - } - - @Test - public void testTransformFilterForEntitiesWithUnderscore() { - - Criterion c = new Criterion().setValue("data_job").setValues( - new StringArray(ImmutableList.of("data_job")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); - - Filter f = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); - Filter originalF = null; - try { - originalF = f.copy(); - } catch (CloneNotSupportedException e) { - fail(e.getMessage()); - } - assertEquals(f, originalF); - - Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention); - assertNotEquals(originalF, transformedFilter); - - Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datajobindex_v2").setValues( - new StringArray(ImmutableList.of("smpldat_datajobindex_v2")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); - - Filter expectedNewFilter = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion)))); - - assertEquals(transformedFilter, expectedNewFilter); - } - - @Test - public void testTransformFilterForEntitiesWithSomeChanges() { - - Criterion criterionChanged = new Criterion().setValue("dataset").setValues( - new StringArray(ImmutableList.of("dataset")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); - Criterion criterionUnchanged = new Criterion().setValue("urn:li:tag:abc").setValues( - new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword"); - - Filter f = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(criterionChanged, criterionUnchanged)))); - Filter originalF = null; - try { - originalF = f.copy(); - } catch (CloneNotSupportedException e) { - fail(e.getMessage()); - } - assertEquals(f, originalF); - - Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention); - assertNotEquals(originalF, transformedFilter); - - Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues( - new StringArray(ImmutableList.of("smpldat_datasetindex_v2")) - ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); - - Filter expectedNewFilter = new Filter().setOr( - new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion, criterionUnchanged)))); - - assertEquals(expectedNewFilter, transformedFilter); - } - - @Test - public void testTransformIndexIntoEntityNameSingle() { - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); - // Empty aggregations - final SearchResultMetadata searchResultMetadata = - new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); - SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) - .setMetadata(searchResultMetadata) - .setFrom(0) - .setPageSize(100) - .setNumEntities(30); - SearchResult expectedResult = null; - try { - expectedResult = result.copy(); - } catch (CloneNotSupportedException e) { - fail(e.getMessage()); - } - assertEquals(expectedResult, searchDAO.transformIndexIntoEntityName(result)); - - // one facet, do not transform - Map aggMap = Map.of("urn:li:corpuser:datahub", Long.valueOf(3)); - - List aggregationMetadataList = new ArrayList<>(); - aggregationMetadataList.add(new AggregationMetadata().setName("owners") - .setDisplayName("Owned by") - .setAggregations(new LongMap(aggMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(aggMap, Collections.emptySet()))) - ); - searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); - result.setMetadata(searchResultMetadata); - - try { - expectedResult = result.copy(); - } catch (CloneNotSupportedException e) { - fail(e.getMessage()); - } - assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); - - // one facet, transform - Map entityTypeMap = Map.of("smpldat_datasetindex_v2", Long.valueOf(3)); - - aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType") - .setDisplayName("Type") - .setAggregations(new LongMap(entityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) - ); - searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); - result.setMetadata(searchResultMetadata); - - Map expectedEntityTypeMap = Map.of("dataset", Long.valueOf(3)); - - List expectedAggregationMetadataList = List.of( - new AggregationMetadata().setName("_entityType") - .setDisplayName("Type") - .setAggregations(new LongMap(expectedEntityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) - ); - expectedResult.setMetadata(new SearchResultMetadata().setAggregations(new AggregationMetadataArray(expectedAggregationMetadataList))); - assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); - } - - @Test - public void testTransformIndexIntoEntityNameNested() { - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); - // One nested facet - Map entityTypeMap = Map.of( - String.format("smpldat_datasetindex_v2%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), - String.format("smpldat_datasetindex_v2%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), - "smpldat_datasetindex_v2", Long.valueOf(20) - ); - List aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners") - .setDisplayName("Type␞Owned By") - .setAggregations(new LongMap(entityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) - ); - SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) - .setMetadata(new SearchResultMetadata().setAggregations( - new AggregationMetadataArray(aggregationMetadataList) - )) - .setFrom(0) - .setPageSize(100) - .setNumEntities(50); - - Map expectedEntityTypeMap = Map.of( - String.format("dataset%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), - String.format("dataset%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), - "dataset", Long.valueOf(20) - ); - - List expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners") - .setDisplayName("Type␞Owned By") - .setAggregations(new LongMap(expectedEntityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) - ); - SearchResult expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) - .setMetadata(new SearchResultMetadata().setAggregations( - new AggregationMetadataArray(expectedAggregationMetadataList))) - .setFrom(0) - .setPageSize(100) - .setNumEntities(50); - assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); - - // One nested facet, opposite order - entityTypeMap = Map.of( - String.format("urn:li:corpuser:datahub%ssmpldat_datasetindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), - String.format("urn:li:corpuser:datahub%ssmpldat_chartindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), - "urn:li:corpuser:datahub", Long.valueOf(20) - ); - aggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType") - .setDisplayName("Owned By␞Type") - .setAggregations(new LongMap(entityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) - ); - result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) - .setMetadata(new SearchResultMetadata().setAggregations( - new AggregationMetadataArray(aggregationMetadataList) - )) - .setFrom(0) - .setPageSize(100) - .setNumEntities(50); - - expectedEntityTypeMap = Map.of( - String.format("urn:li:corpuser:datahub%sdataset", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), - String.format("urn:li:corpuser:datahub%schart", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), - "urn:li:corpuser:datahub", Long.valueOf(20) - ); - - expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType") - .setDisplayName("Owned By␞Type") - .setAggregations(new LongMap(expectedEntityTypeMap)) - .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) - ); - expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) - .setMetadata(new SearchResultMetadata().setAggregations( - new AggregationMetadataArray(expectedAggregationMetadataList))) - .setFrom(0) - .setPageSize(100) - .setNumEntities(50); - assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); - } - - -} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java similarity index 74% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java index d720c95fef84d..ed81f3cebd027 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java @@ -1,60 +1,47 @@ -package com.linkedin.metadata.search.elasticsearch.fixtures; +package com.linkedin.metadata.search.fixtures; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.ESSampleDataFixture; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.MatchedFieldArray; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; +import javax.annotation.Nonnull; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; -import static com.linkedin.metadata.ESTestUtils.*; +import static io.datahubproject.test.search.SearchTestUtils.searchAcrossCustomEntities; +import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; import static org.testng.Assert.assertTrue; -import static org.testng.AssertJUnit.*; +import static org.testng.AssertJUnit.assertNotNull; -@Import(ESSampleDataFixture.class) -public class ElasticSearchGoldenTest extends AbstractTestNGSpringContextTests { +abstract public class GoldenTestBase extends AbstractTestNGSpringContextTests { private static final List SEARCHABLE_LONGTAIL_ENTITIES = Stream.of(EntityType.CHART, EntityType.CONTAINER, EntityType.DASHBOARD, EntityType.DATASET, EntityType.DOMAIN, EntityType.TAG ).map(EntityTypeMapper::getName) .collect(Collectors.toList()); - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - @Qualifier("longTailSearchService") - protected SearchService searchService; + @Nonnull + abstract protected EntityRegistry getEntityRegistry(); - @Autowired - @Qualifier("longTailEntityClient") - protected EntityClient entityClient; - - @Autowired - @Qualifier("longTailEntityRegistry") - private EntityRegistry entityRegistry; + @Nonnull + abstract protected SearchService getSearchService(); @Test public void testNameMatchPetProfiles() { /* Searching for "pet profiles" should return "pet_profiles" as the first 2 search results */ - assertNotNull(searchService); - assertNotNull(entityRegistry); - SearchResult searchResult = searchAcrossCustomEntities(searchService, "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + assertNotNull(getEntityRegistry()); + SearchResult searchResult = searchAcrossCustomEntities(getSearchService(), "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES); assertTrue(searchResult.getEntities().size() >= 2); Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); @@ -68,8 +55,8 @@ public void testNameMatchPetProfile() { /* Searching for "pet profile" should return "pet_profiles" as the first 2 search results */ - assertNotNull(searchService); - SearchResult searchResult = searchAcrossEntities(searchService, "pet profile", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + SearchResult searchResult = searchAcrossEntities(getSearchService(), "pet profile", SEARCHABLE_LONGTAIL_ENTITIES); assertTrue(searchResult.getEntities().size() >= 2); Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); @@ -84,8 +71,8 @@ public void testGlossaryTerms() { Searching for "ReturnRate" should return all tables that have the glossary term applied before anything else */ - assertNotNull(searchService); - SearchResult searchResult = searchAcrossEntities(searchService, "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + SearchResult searchResult = searchAcrossEntities(getSearchService(), "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES); SearchEntityArray entities = searchResult.getEntities(); assertTrue(searchResult.getEntities().size() >= 4); MatchedFieldArray firstResultMatchedFields = entities.get(0).getMatchedFields(); @@ -105,8 +92,8 @@ public void testNameMatchPartiallyQualified() { Searching for "analytics.pet_details" (partially qualified) should return the fully qualified table name as the first search results before any others */ - assertNotNull(searchService); - SearchResult searchResult = searchAcrossEntities(searchService, "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + SearchResult searchResult = searchAcrossEntities(getSearchService(), "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES); assertTrue(searchResult.getEntities().size() >= 2); Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); @@ -121,8 +108,8 @@ public void testNameMatchCollaborativeActionitems() { Searching for "collaborative actionitems" should return "collaborative_actionitems" as the first search result, followed by "collaborative_actionitems_old" */ - assertNotNull(searchService); - SearchResult searchResult = searchAcrossEntities(searchService, "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + SearchResult searchResult = searchAcrossEntities(getSearchService(), "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES); assertTrue(searchResult.getEntities().size() >= 2); Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); @@ -144,13 +131,17 @@ public void testNameMatchCustomerOrders() { Searching for "customer orders" should return "customer_orders" as the first search result, not suffixed by anything */ - assertNotNull(searchService); - SearchResult searchResult = searchAcrossEntities(searchService, "customer orders", SEARCHABLE_LONGTAIL_ENTITIES); + assertNotNull(getSearchService()); + SearchResult searchResult = searchAcrossEntities(getSearchService(), "customer orders", SEARCHABLE_LONGTAIL_ENTITIES); assertTrue(searchResult.getEntities().size() >= 2); Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); // Checks that the table name is not suffixed with anything - assertTrue(firstResultUrn.toString().contains("customer_orders,")); + assertTrue(firstResultUrn.toString().contains("customer_orders,"), + "Expected firstResultUrn to contain `customer_orders,` but results are " + + searchResult.getEntities().stream() + .map(e -> String.format("(Score: %s Urn: %s)", e.getScore(), e.getEntity().getId())) + .collect(Collectors.joining(", "))); Double firstResultScore = searchResult.getEntities().get(0).getScore(); Double secondResultScore = searchResult.getEntities().get(1).getScore(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java similarity index 52% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java index 55f7d4618f479..eaf8feedeb6ed 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java @@ -1,50 +1,43 @@ -package com.linkedin.metadata.search.elasticsearch.fixtures; +package com.linkedin.metadata.search.fixtures; import com.linkedin.common.urn.Urn; -import com.linkedin.metadata.ESSearchLineageFixture; -import com.linkedin.metadata.ESTestUtils; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; +import javax.annotation.Nonnull; import java.net.URISyntaxException; -import static com.linkedin.metadata.ESTestUtils.lineage; +import static io.datahubproject.test.search.SearchTestUtils.lineage; +import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +abstract public class LineageDataFixtureTestBase extends AbstractTestNGSpringContextTests { -@Import(ESSearchLineageFixture.class) -public class SearchLineageDataFixtureTests extends AbstractTestNGSpringContextTests { + @Nonnull + abstract protected LineageSearchService getLineageService(); - @Autowired - @Qualifier("searchLineageSearchService") - protected SearchService searchService; - - @Autowired - @Qualifier("searchLineageLineageSearchService") - protected LineageSearchService lineageService; + @Nonnull + abstract protected SearchService getSearchService(); @Test public void testFixtureInitialization() { - assertNotNull(searchService); - SearchResult noResult = ESTestUtils.searchAcrossEntities(searchService, "no results"); + assertNotNull(getSearchService()); + SearchResult noResult = searchAcrossEntities(getSearchService(), "no results"); assertEquals(noResult.getEntities().size(), 0); - SearchResult result = ESTestUtils.searchAcrossEntities(searchService, "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8"); + SearchResult result = searchAcrossEntities(getSearchService(), "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8"); assertEquals(result.getEntities().size(), 1); assertEquals(result.getEntities().get(0).getEntity().toString(), "urn:li:dataset:(urn:li:dataPlatform:9cf8c96,e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8,PROD)"); - LineageSearchResult lineageResult = lineage(lineageService, result.getEntities().get(0).getEntity(), 1); + LineageSearchResult lineageResult = lineage(getLineageService(), result.getEntities().get(0).getEntity(), 1); assertEquals(lineageResult.getEntities().size(), 10); } @@ -54,15 +47,15 @@ public void testDatasetLineage() throws URISyntaxException { "urn:li:dataset:(urn:li:dataPlatform:9cf8c96,e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8,PROD)"); // 1 hops - LineageSearchResult lineageResult = lineage(lineageService, testUrn, 1); + LineageSearchResult lineageResult = lineage(getLineageService(), testUrn, 1); assertEquals(lineageResult.getEntities().size(), 10); // 2 hops - lineageResult = lineage(lineageService, testUrn, 2); + lineageResult = lineage(getLineageService(), testUrn, 2); assertEquals(lineageResult.getEntities().size(), 5); // 3 hops - lineageResult = lineage(lineageService, testUrn, 3); + lineageResult = lineage(getLineageService(), testUrn, 3); assertEquals(lineageResult.getEntities().size(), 12); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java similarity index 81% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 450378b247cea..1660504810296 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.search.elasticsearch.fixtures; +package com.linkedin.metadata.search.fixtures; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; @@ -12,7 +12,6 @@ import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.ESSampleDataFixture; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -28,22 +27,19 @@ import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; - import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; import com.linkedin.r2.RemoteInvocationException; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.AnalyzeRequest; -import org.elasticsearch.client.indices.AnalyzeResponse; -import org.elasticsearch.client.indices.GetMappingsRequest; -import org.elasticsearch.client.indices.GetMappingsResponse; import org.junit.Assert; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Import; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.AnalyzeRequest; +import org.opensearch.client.indices.AnalyzeResponse; +import org.opensearch.client.indices.GetMappingsRequest; +import org.opensearch.client.indices.GetMappingsResponse; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; +import javax.annotation.Nonnull; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -54,36 +50,36 @@ import java.util.stream.IntStream; import java.util.stream.Stream; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestUtils.*; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX; -import static com.linkedin.metadata.utils.SearchUtil.*; +import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; +import static io.datahubproject.test.search.SearchTestUtils.autocomplete; +import static io.datahubproject.test.search.SearchTestUtils.scroll; +import static io.datahubproject.test.search.SearchTestUtils.search; +import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; +import static io.datahubproject.test.search.SearchTestUtils.searchStructured; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; -import static org.testng.Assert.assertFalse; - -@Import(ESSampleDataFixture.class) -public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { - private static final Authentication AUTHENTICATION = +abstract public class SampleDataFixtureTestBase extends AbstractTestNGSpringContextTests { + protected static final Authentication AUTHENTICATION = new Authentication(new Actor(ActorType.USER, "test"), ""); - @Autowired - private RestHighLevelClient _searchClient; + @Nonnull + abstract protected EntityRegistry getEntityRegistry(); - @Autowired - @Qualifier("sampleDataSearchService") - protected SearchService searchService; + @Nonnull + abstract protected SearchService getSearchService(); - @Autowired - @Qualifier("sampleDataEntityClient") - protected EntityClient entityClient; + @Nonnull + abstract protected EntityClient getEntityClient(); - @Autowired - @Qualifier("entityRegistry") - private EntityRegistry entityRegistry; + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); @Test public void testSearchFieldConfig() throws IOException { @@ -91,29 +87,29 @@ public void testSearchFieldConfig() throws IOException { For every field in every entity fixture, ensure proper detection of field types and analyzers */ Map fixtureEntities = new HashMap<>(); - fixtureEntities.put(entityRegistry.getEntitySpec("dataset"), "smpldat_datasetindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("chart"), "smpldat_chartindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("container"), "smpldat_containerindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("corpgroup"), "smpldat_corpgroupindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("corpuser"), "smpldat_corpuserindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("dashboard"), "smpldat_dashboardindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("dataflow"), "smpldat_dataflowindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("datajob"), "smpldat_datajobindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("domain"), "smpldat_domainindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("glossarynode"), "smpldat_glossarynodeindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("glossaryterm"), "smpldat_glossarytermindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("mlfeature"), "smpldat_mlfeatureindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("mlfeaturetable"), "smpldat_mlfeaturetableindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("mlmodelgroup"), "smpldat_mlmodelgroupindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("mlmodel"), "smpldat_mlmodelindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("mlprimarykey"), "smpldat_mlprimarykeyindex_v2"); - fixtureEntities.put(entityRegistry.getEntitySpec("tag"), "smpldat_tagindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("dataset"), "smpldat_datasetindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("chart"), "smpldat_chartindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("container"), "smpldat_containerindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("corpgroup"), "smpldat_corpgroupindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("corpuser"), "smpldat_corpuserindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("dashboard"), "smpldat_dashboardindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("dataflow"), "smpldat_dataflowindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("datajob"), "smpldat_datajobindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("domain"), "smpldat_domainindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("glossarynode"), "smpldat_glossarynodeindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("glossaryterm"), "smpldat_glossarytermindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("mlfeature"), "smpldat_mlfeatureindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("mlfeaturetable"), "smpldat_mlfeaturetableindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("mlmodelgroup"), "smpldat_mlmodelgroupindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("mlmodel"), "smpldat_mlmodelindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("mlprimarykey"), "smpldat_mlprimarykeyindex_v2"); + fixtureEntities.put(getEntityRegistry().getEntitySpec("tag"), "smpldat_tagindex_v2"); for (Map.Entry entry : fixtureEntities.entrySet()) { EntitySpec entitySpec = entry.getKey(); GetMappingsRequest req = new GetMappingsRequest().indices(entry.getValue()); - GetMappingsResponse resp = _searchClient.indices().getMapping(req, RequestOptions.DEFAULT); + GetMappingsResponse resp = getSearchClient().indices().getMapping(req, RequestOptions.DEFAULT); Map> mappings = (Map>) resp.mappings() .get(entry.getValue()).sourceAsMap().get("properties"); @@ -182,7 +178,7 @@ public void testSearchFieldConfig() throws IOException { public void testDatasetHasTags() throws IOException { GetMappingsRequest req = new GetMappingsRequest() .indices("smpldat_datasetindex_v2"); - GetMappingsResponse resp = _searchClient.indices().getMapping(req, RequestOptions.DEFAULT); + GetMappingsResponse resp = getSearchClient().indices().getMapping(req, RequestOptions.DEFAULT); Map> mappings = (Map>) resp.mappings() .get("smpldat_datasetindex_v2").sourceAsMap().get("properties"); assertTrue(mappings.containsKey("hasTags")); @@ -191,11 +187,11 @@ public void testDatasetHasTags() throws IOException { @Test public void testFixtureInitialization() { - assertNotNull(searchService); - SearchResult noResult = searchAcrossEntities(searchService, "no results"); + assertNotNull(getSearchService()); + SearchResult noResult = searchAcrossEntities(getSearchService(), "no results"); assertEquals(0, noResult.getEntities().size()); - final SearchResult result = searchAcrossEntities(searchService, "test"); + final SearchResult result = searchAcrossEntities(getSearchService(), "test"); Map expectedTypes = Map.of( "dataset", 13, @@ -209,7 +205,7 @@ public void testFixtureInitialization() { Map> actualTypes = new HashMap<>(); for (String key : expectedTypes.keySet()) { actualTypes.put(key, result.getEntities().stream() - .map(SearchEntity::getEntity).filter(entity -> key.equals(entity.getEntityType())).collect(Collectors.toList())); + .map(SearchEntity::getEntity).filter(entity -> key.equals(entity.getEntityType())).collect(Collectors.toList())); } expectedTypes.forEach((key, value) -> @@ -241,7 +237,7 @@ public void testDataPlatform() { .build(); expected.forEach((key, value) -> { - SearchResult result = searchAcrossEntities(searchService, key); + SearchResult result = searchAcrossEntities(getSearchService(), key); assertEquals(result.getEntities().size(), value.intValue(), String.format("Unexpected data platform `%s` hits.", key)); // max is 100 without pagination }); @@ -257,14 +253,14 @@ public void testUrn() { "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)" ).forEach(query -> - assertTrue(searchAcrossEntities(searchService, query).getEntities().size() >= 1, - String.format("Unexpected >1 urn result for `%s`", query)) + assertTrue(searchAcrossEntities(getSearchService(), query).getEntities().size() >= 1, + String.format("Unexpected >1 urn result for `%s`", query)) ); } @Test public void testExactTable() { - SearchResult results = searchAcrossEntities(searchService, "stg_customers"); + SearchResult results = searchAcrossEntities(getSearchService(), "stg_customers"); assertEquals(results.getEntities().size(), 1, "Unexpected single urn result for `stg_customers`"); assertEquals(results.getEntities().get(0).getEntity().toString(), "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)"); @@ -281,7 +277,7 @@ public void testStemming() { testSets.forEach(testSet -> { Integer expectedResults = null; for (String testQuery : testSet) { - SearchResult results = searchAcrossEntities(searchService, testQuery); + SearchResult results = searchAcrossEntities(getSearchService(), testQuery); assertTrue(results.hasEntities() && !results.getEntities().isEmpty(), String.format("Expected search results for `%s`", testQuery)); @@ -299,7 +295,7 @@ public void testStemmingOverride() throws IOException { Set testSet = Set.of("customer", "customers"); Set results = testSet.stream() - .map(test -> searchAcrossEntities(searchService, test)) + .map(test -> searchAcrossEntities(getSearchService(), test)) .collect(Collectors.toSet()); results.forEach(r -> assertTrue(r.hasEntities() && !r.getEntities().isEmpty(), "Expected search results")); @@ -352,7 +348,7 @@ public void testDelimitedSynonym() throws IOException { "customer acquisition cost" ); List resultCounts = testSet.stream().map(q -> { - SearchResult result = searchAcrossEntities(searchService, q); + SearchResult result = searchAcrossEntities(getSearchService(), q); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), "Expected search results for: " + q); return result.getEntities().size(); @@ -363,26 +359,26 @@ public void testDelimitedSynonym() throws IOException { public void testNegateAnalysis() throws IOException { String queryWithMinus = "logging_events -bckp"; AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer( - "smpldat_datasetindex_v2", - "query_word_delimited", queryWithMinus + "smpldat_datasetindex_v2", + "query_word_delimited", queryWithMinus ); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), - List.of("logging_events -bckp", "logging_ev", "-bckp", "log", "event", "bckp")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), + List.of("logging_events -bckp", "logging_ev", "-bckp", "log", "event", "bckp")); request = AnalyzeRequest.withIndexAnalyzer( - "smpldat_datasetindex_v2", - "word_gram_3", queryWithMinus + "smpldat_datasetindex_v2", + "word_gram_3", queryWithMinus ); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("logging events -bckp")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("logging events -bckp")); request = AnalyzeRequest.withIndexAnalyzer( - "smpldat_datasetindex_v2", - "word_gram_4", queryWithMinus + "smpldat_datasetindex_v2", + "word_gram_4", queryWithMinus ); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of()); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of()); } @@ -391,49 +387,49 @@ public void testWordGram() throws IOException { String text = "hello.cat_cool_customer"; AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", text); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat", "cat cool", "cool customer")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat", "cat cool", "cool customer")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", text); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool", "cat cool customer")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool", "cat cool customer")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", text); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool customer")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool customer")); String testMoreSeparators = "quick.brown:fox jumped-LAZY_Dog"; request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", testMoreSeparators); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), - List.of("quick brown", "brown fox", "fox jumped", "jumped lazy", "lazy dog")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), + List.of("quick brown", "brown fox", "fox jumped", "jumped lazy", "lazy dog")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", testMoreSeparators); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), - List.of("quick brown fox", "brown fox jumped", "fox jumped lazy", "jumped lazy dog")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), + List.of("quick brown fox", "brown fox jumped", "fox jumped lazy", "jumped lazy dog")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", testMoreSeparators); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), - List.of("quick brown fox jumped", "brown fox jumped lazy", "fox jumped lazy dog")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), + List.of("quick brown fox jumped", "brown fox jumped lazy", "fox jumped lazy dog")); String textWithQuotesAndDuplicateWord = "\"my_db.my_exact_table\""; request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", textWithQuotesAndDuplicateWord); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db", "db my", "my exact", "exact table")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db", "db my", "my exact", "exact table")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", textWithQuotesAndDuplicateWord); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my", "db my exact", "my exact table")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my", "db my exact", "my exact table")); request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", textWithQuotesAndDuplicateWord); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my exact", "db my exact table")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my exact", "db my exact table")); String textWithParens = "(hi) there"; request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", textWithParens); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hi there")); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hi there")); String oneWordText = "hello"; for (String analyzer : List.of("word_gram_2", "word_gram_3", "word_gram_4")) { request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", analyzer, oneWordText); assertEquals(getTokens(request) - .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of()); + .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of()); } } @@ -463,7 +459,7 @@ public void testUrnSynonym() throws IOException { "big query" ); List results = testSet.stream().map(query -> { - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), "Expected search results for: " + query); return result; }).collect(Collectors.toList()); @@ -504,9 +500,9 @@ public void testTokenizationWithNumber() throws IOException { ); List tokens = getTokens(request).map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()); assertEquals(tokens, List.of( - "harshal-playground-306419", "harshal", "playground", "306419", - "test_schema", "test", "schema", - "austin311_deriv", "austin311", "deriv"), + "harshal-playground-306419", "harshal", "playground", "306419", + "test_schema", "test", "schema", + "austin311_deriv", "austin311", "deriv"), String.format("Unexpected tokens. Found %s", tokens)); request = AnalyzeRequest.withIndexAnalyzer( @@ -622,7 +618,7 @@ public void testChartAutoComplete() throws InterruptedException, IOException { List.of("B", "Ba", "Baz", "Baz ", "Baz C", "Baz Ch", "Baz Cha", "Baz Char", "Baz Chart", "Baz Chart ") .forEach(query -> { try { - AutoCompleteResults result = autocomplete(new ChartType(entityClient), query); + AutoCompleteResults result = autocomplete(new ChartType(getEntityClient()), query); assertTrue(result.getEntities().size() == 2, String.format("Expected 2 results for `%s` found %s", query, result.getEntities().size())); } catch (Exception e) { @@ -637,7 +633,7 @@ public void testDatasetAutoComplete() { "excess_deaths_de", "excess_deaths_der", "excess_deaths_derived") .forEach(query -> { try { - AutoCompleteResults result = autocomplete(new DatasetType(entityClient), query); + AutoCompleteResults result = autocomplete(new DatasetType(getEntityClient()), query); assertTrue(result.getEntities().size() >= 1, String.format("Expected >= 1 results for `%s` found %s", query, result.getEntities().size())); } catch (Exception e) { @@ -652,7 +648,7 @@ public void testContainerAutoComplete() { "container-autocomp-test") .forEach(query -> { try { - AutoCompleteResults result = autocomplete(new ContainerType(entityClient), query); + AutoCompleteResults result = autocomplete(new ContainerType(getEntityClient()), query); assertTrue(result.getEntities().size() >= 1, String.format("Expected >= 1 results for `%s` found %s", query, result.getEntities().size())); } catch (Exception e) { @@ -666,7 +662,7 @@ public void testGroupAutoComplete() { List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ") .forEach(query -> { try { - AutoCompleteResults result = autocomplete(new CorpGroupType(entityClient), query); + AutoCompleteResults result = autocomplete(new CorpGroupType(getEntityClient()), query); assertTrue(result.getEntities().size() == 1, String.format("Expected 1 results for `%s` found %s", query, result.getEntities().size())); } catch (Exception e) { @@ -680,7 +676,7 @@ public void testUserAutoComplete() { List.of("D", "Da", "Dat", "Data ", "Data H", "Data Hu", "Data Hub", "Data Hub ") .forEach(query -> { try { - AutoCompleteResults result = autocomplete(new CorpUserType(entityClient, null), query); + AutoCompleteResults result = autocomplete(new CorpUserType(getEntityClient(), null), query); assertTrue(result.getEntities().size() >= 1, String.format("Expected at least 1 results for `%s` found %s", query, result.getEntities().size())); } catch (Exception e) { @@ -702,7 +698,7 @@ public void testSmokeTestQueries() { ); Map results = expectedFulltextMinimums.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey()))); + .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(getSearchService(), entry.getKey()))); results.forEach((key, value) -> { Integer actualCount = value.getEntities().size(); @@ -719,7 +715,7 @@ public void testSmokeTestQueries() { ); results = expectedStructuredMinimums.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchStructured(searchService, entry.getKey()))); + .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchStructured(getSearchService(), entry.getKey()))); results.forEach((key, value) -> { Integer actualCount = value.getEntities().size(); @@ -772,7 +768,7 @@ public void testUnderscore() throws IOException { @Test public void testFacets() { Set expectedFacets = Set.of("entity", "typeNames", "platform", "origin", "tags"); - SearchResult testResult = searchAcrossEntities(searchService, "cypress"); + SearchResult testResult = searchAcrossEntities(getSearchService(), "cypress"); expectedFacets.forEach(facet -> { assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), String.format("Failed to find facet `%s` in %s", facet, @@ -780,7 +776,7 @@ public void testFacets() { .map(AggregationMetadata::getName).collect(Collectors.toList()))); }); AggregationMetadata entityAggMeta = testResult.getMetadata().getAggregations().stream().filter( - aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); + aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); Map expectedEntityTypeCounts = new HashMap<>(); expectedEntityTypeCounts.put("container", 0L); expectedEntityTypeCounts.put("corpuser", 0L); @@ -805,28 +801,28 @@ public void testFacets() { @Test public void testNestedAggregation() { Set expectedFacets = Set.of("platform"); - SearchResult testResult = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResult = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets)); assertEquals(testResult.getMetadata().getAggregations().size(), 1); expectedFacets.forEach(facet -> { assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), - String.format("Failed to find facet `%s` in %s", facet, - testResult.getMetadata().getAggregations().stream() - .map(AggregationMetadata::getName).collect(Collectors.toList()))); + String.format("Failed to find facet `%s` in %s", facet, + testResult.getMetadata().getAggregations().stream() + .map(AggregationMetadata::getName).collect(Collectors.toList()))); }); expectedFacets = Set.of("platform", "typeNames", "_entityType", "entity"); - SearchResult testResult2 = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResult2 = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets)); assertEquals(testResult2.getMetadata().getAggregations().size(), 4); expectedFacets.forEach(facet -> { assertTrue(testResult2.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), - String.format("Failed to find facet `%s` in %s", facet, - testResult2.getMetadata().getAggregations().stream() - .map(AggregationMetadata::getName).collect(Collectors.toList()))); + String.format("Failed to find facet `%s` in %s", facet, + testResult2.getMetadata().getAggregations().stream() + .map(AggregationMetadata::getName).collect(Collectors.toList()))); }); AggregationMetadata entityTypeAggMeta = testResult2.getMetadata().getAggregations().stream().filter( - aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get(); + aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get(); AggregationMetadata entityAggMeta = testResult2.getMetadata().getAggregations().stream().filter( - aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); + aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); assertEquals(entityTypeAggMeta.getAggregations(), entityAggMeta.getAggregations()); Map expectedEntityTypeCounts = new HashMap<>(); expectedEntityTypeCounts.put("container", 0L); @@ -849,24 +845,24 @@ public void testNestedAggregation() { assertEquals(entityTypeAggMeta.getAggregations(), expectedEntityTypeCounts); expectedFacets = Set.of("platform", "typeNames", "entity"); - SearchResult testResult3 = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResult3 = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets)); assertEquals(testResult3.getMetadata().getAggregations().size(), 4); expectedFacets.forEach(facet -> { assertTrue(testResult3.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), - String.format("Failed to find facet `%s` in %s", facet, - testResult3.getMetadata().getAggregations().stream() - .map(AggregationMetadata::getName).collect(Collectors.toList()))); + String.format("Failed to find facet `%s` in %s", facet, + testResult3.getMetadata().getAggregations().stream() + .map(AggregationMetadata::getName).collect(Collectors.toList()))); }); AggregationMetadata entityTypeAggMeta3 = testResult3.getMetadata().getAggregations().stream().filter( - aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get(); + aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get(); AggregationMetadata entityAggMeta3 = testResult3.getMetadata().getAggregations().stream().filter( - aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); + aggMeta -> aggMeta.getName().equals("entity")).findFirst().get(); assertEquals(entityTypeAggMeta3.getAggregations(), entityAggMeta3.getAggregations()); assertEquals(entityTypeAggMeta3.getAggregations(), expectedEntityTypeCounts); String singleNestedFacet = String.format("_entityType%sowners", AGGREGATION_SEPARATOR_CHAR); expectedFacets = Set.of(singleNestedFacet); - SearchResult testResultSingleNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResultSingleNested = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets)); assertEquals(testResultSingleNested.getMetadata().getAggregations().size(), 1); Map expectedNestedFacetCounts = new HashMap<>(); expectedNestedFacetCounts.put("datajob␞urn:li:corpuser:datahub", 2L); @@ -885,17 +881,17 @@ public void testNestedAggregation() { assertEquals(testResultSingleNested.getMetadata().getAggregations().get(0).getAggregations(), expectedNestedFacetCounts); expectedFacets = Set.of("platform", singleNestedFacet, "typeNames", "origin"); - SearchResult testResultNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResultNested = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets)); assertEquals(testResultNested.getMetadata().getAggregations().size(), 4); expectedFacets.forEach(facet -> { assertTrue(testResultNested.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), - String.format("Failed to find facet `%s` in %s", facet, - testResultNested.getMetadata().getAggregations().stream() - .map(AggregationMetadata::getName).collect(Collectors.toList()))); + String.format("Failed to find facet `%s` in %s", facet, + testResultNested.getMetadata().getAggregations().stream() + .map(AggregationMetadata::getName).collect(Collectors.toList()))); }); List expectedNestedAgg = testResultNested.getMetadata().getAggregations().stream().filter( - agg -> agg.getName().equals(singleNestedFacet)).collect(Collectors.toList()); + agg -> agg.getName().equals(singleNestedFacet)).collect(Collectors.toList()); assertEquals(expectedNestedAgg.size(), 1); AggregationMetadata nestedAgg = expectedNestedAgg.get(0); assertEquals(nestedAgg.getDisplayName(), String.format("Type%sOwned By", AGGREGATION_SEPARATOR_CHAR)); @@ -959,7 +955,7 @@ public void testScrollAcrossEntities() throws IOException { int totalResults = 0; String scrollId = null; do { - ScrollResult result = scroll(searchService, query, batchSize, scrollId); + ScrollResult result = scroll(getSearchService(), query, batchSize, scrollId); int numResults = result.hasEntities() ? result.getEntities().size() : 0; assertTrue(numResults <= batchSize); totalResults += numResults; @@ -972,13 +968,13 @@ public void testScrollAcrossEntities() throws IOException { @Test public void testSearchAcrossMultipleEntities() { String query = "logging_events"; - SearchResult result = search(searchService, query); + SearchResult result = search(getSearchService(), query); assertEquals((int) result.getNumEntities(), 8); - result = search(searchService, List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), query); + result = search(getSearchService(), List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), query); assertEquals((int) result.getNumEntities(), 8); - result = search(searchService, List.of(DATASET_ENTITY_NAME), query); + result = search(getSearchService(), List.of(DATASET_ENTITY_NAME), query); assertEquals((int) result.getNumEntities(), 4); - result = search(searchService, List.of(DATA_JOB_ENTITY_NAME), query); + result = search(getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query); assertEquals((int) result.getNumEntities(), 4); } @@ -1046,7 +1042,7 @@ public void testFragmentUrns() { ); testSet.forEach(query -> { - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected partial urn search results", query)); @@ -1064,7 +1060,7 @@ public void testPlatformTest() { List results = testFields.stream() .map(fieldName -> { final String query = String.format("%s:%s", fieldName, testPlatform.replaceAll(":", "\\\\:")); - SearchResult result = searchStructured(searchService, query); + SearchResult result = searchStructured(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1095,7 +1091,7 @@ public void testPlatformTest() { // Test field variations with/without .keyword List entityClientResults = testFilters.stream().map(filter -> { try { - return entityClient.search("dataset", "*", filter, null, 0, 100, + return getEntityClient().search("dataset", "*", filter, null, 0, 100, AUTHENTICATION, new SearchFlags().setFulltext(fulltextFlag)); } catch (RemoteInvocationException e) { throw new RuntimeException(e); @@ -1112,7 +1108,7 @@ public void testPlatformTest() { @Test public void testStructQueryFieldMatch() { String query = STRUCTURED_QUERY_PREFIX + "name: customers"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1125,7 +1121,7 @@ public void testStructQueryFieldMatch() { @Test public void testStructQueryFieldPrefixMatch() { String query = STRUCTURED_QUERY_PREFIX + "name: customers*"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1138,7 +1134,7 @@ public void testStructQueryFieldPrefixMatch() { @Test public void testStructQueryCustomPropertiesKeyPrefix() { String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=*"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1151,7 +1147,7 @@ public void testStructQueryCustomPropertiesKeyPrefix() { @Test public void testStructQueryCustomPropertiesMatch() { String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=model"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1169,7 +1165,7 @@ public void testCustomPropertiesQuoted() { ); Map results = expectedResults.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey()))); + .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(getSearchService(), entry.getKey()))); results.forEach((key, value) -> { Integer actualCount = value.getEntities().size(); @@ -1183,7 +1179,7 @@ public void testCustomPropertiesQuoted() { @Test public void testStructQueryFieldPaths() { String query = STRUCTURED_QUERY_PREFIX + "fieldPaths: customer_id"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1196,7 +1192,7 @@ public void testStructQueryFieldPaths() { @Test public void testStructQueryBoolean() { String query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy OR tags:urn\\:li\\:tag\\:testTag"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1206,7 +1202,7 @@ public void testStructQueryBoolean() { assertEquals(result.getEntities().size(), 2); query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1216,7 +1212,7 @@ public void testStructQueryBoolean() { assertEquals(result.getEntities().size(), 1); query = STRUCTURED_QUERY_PREFIX + "tags:urn\\:li\\:tag\\:testTag"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1229,7 +1225,7 @@ public void testStructQueryBoolean() { @Test public void testStructQueryBrowsePaths() { String query = STRUCTURED_QUERY_PREFIX + "browsePaths:*/dbt/*"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1242,7 +1238,7 @@ public void testStructQueryBrowsePaths() { @Test public void testOr() { String query = "stg_customers | logging_events"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1250,7 +1246,7 @@ public void testOr() { assertEquals(result.getEntities().size(), 9); query = "stg_customers"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1258,7 +1254,7 @@ public void testOr() { assertEquals(result.getEntities().size(), 1); query = "logging_events"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1269,7 +1265,7 @@ public void testOr() { @Test public void testNegate() { String query = "logging_events -bckp"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1277,7 +1273,7 @@ public void testNegate() { assertEquals(result.getEntities().size(), 7); query = "logging_events"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1288,7 +1284,7 @@ public void testNegate() { @Test public void testPrefix() { String query = "bigquery"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1296,7 +1292,7 @@ public void testPrefix() { assertEquals(result.getEntities().size(), 8); query = "big*"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1307,7 +1303,7 @@ public void testPrefix() { @Test public void testParens() { String query = "dbt | (bigquery + covid19)"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1315,7 +1311,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 11); query = "dbt"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1323,7 +1319,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 9); query = "bigquery + covid19"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1331,7 +1327,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 2); query = "bigquery"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1339,7 +1335,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 8); query = "covid19"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1349,55 +1345,55 @@ public void testParens() { @Test public void testGram() { String query = "jaffle shop customers"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", - "Expected exact match in 1st position"); + "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", + "Expected exact match in 1st position"); query = "shop customers source"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", - "Expected ngram match in 1st position"); + "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", + "Expected ngram match in 1st position"); query = "jaffle shop stg customers"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", - "Expected ngram match in 1st position"); + "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", + "Expected ngram match in 1st position"); query = "jaffle shop transformers customers"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", - "Expected ngram match in 1st position"); + "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", + "Expected ngram match in 1st position"); query = "shop raw customers"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "Expected ngram match in 1st position"); + "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "Expected ngram match in 1st position"); } @Test public void testPrefixVsExact() { String query = "\"customers\""; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1415,7 +1411,7 @@ public void testPrefixVsExact() { public void testPrefixVsExactCaseSensitivity() { List insensitiveExactMatches = List.of("testExactMatchCase", "testexactmatchcase", "TESTEXACTMATCHCASE"); for (String query : insensitiveExactMatches) { - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1432,33 +1428,33 @@ public void testPrefixVsExactCaseSensitivity() { @Test public void testColumnExactMatch() { String query = "unit_data"; - SearchResult result = searchAcrossEntities(searchService, query); + SearchResult result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), - String.format("%s - Expected search results to include matched fields", query)); + String.format("%s - Expected search results to include matched fields", query)); assertTrue(result.getEntities().size() > 2, - String.format("%s - Expected search results to have at least two results", query)); + String.format("%s - Expected search results to have at least two results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:testOnly," + query + ",PROD)", - "Expected table name exact match first"); + "urn:li:dataset:(urn:li:dataPlatform:testOnly," + query + ",PROD)", + "Expected table name exact match first"); query = "special_column_only_present_here_info"; - result = searchAcrossEntities(searchService, query); + result = searchAcrossEntities(getSearchService(), query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), - String.format("%s - Expected search results", query)); + String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), - String.format("%s - Expected search results to include matched fields", query)); + String.format("%s - Expected search results to include matched fields", query)); assertTrue(result.getEntities().size() > 2, - String.format("%s - Expected search results to have at least two results", query)); + String.format("%s - Expected search results to have at least two results", query)); assertEquals(result.getEntities().get(0).getEntity().toString(), - "urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)", - "Expected table with column name exact match first"); + "urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)", + "Expected table with column name exact match first"); } private Stream getTokens(AnalyzeRequest request) throws IOException { - return _searchClient.indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream(); + return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream(); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java similarity index 85% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java index 2416280cb8f93..4472af339c074 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java @@ -1,43 +1,40 @@ -package com.linkedin.metadata.search.elasticsearch.indexbuilder; +package com.linkedin.metadata.search.indexbuilder; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.google.common.collect.ImmutableMap; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.systemmetadata.SystemMetadataMappingsBuilder; import com.linkedin.metadata.version.GitVersion; -import java.util.Optional; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.client.IndicesClient; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.indices.GetIndexResponse; -import org.elasticsearch.cluster.metadata.AliasMetadata; -import org.elasticsearch.rest.RestStatus; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; +import org.opensearch.OpenSearchException; +import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.client.IndicesClient; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.client.indices.GetIndexResponse; +import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.rest.RestStatus; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import javax.annotation.Nonnull; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.*; + +abstract public class IndexBuilderTestBase extends AbstractTestNGSpringContextTests { -@Import(ESTestConfiguration.class) -public class ESIndexBuilderTest extends AbstractTestNGSpringContextTests { + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); - @Autowired - private RestHighLevelClient _searchClient; private static IndicesClient _indexClient; private static final String TEST_INDEX_NAME = "esindex_builder_test"; private static ESIndexBuilder testDefaultBuilder; @@ -45,9 +42,9 @@ public class ESIndexBuilderTest extends AbstractTestNGSpringContextTests { @BeforeClass public void setup() { - _indexClient = _searchClient.indices(); + _indexClient = getSearchClient().indices(); GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); - testDefaultBuilder = new ESIndexBuilder(_searchClient, 1, 0, 0, + testDefaultBuilder = new ESIndexBuilder(getSearchClient(), 1, 0, 0, 0, Map.of(), false, false, new ElasticSearchConfiguration(), gitVersion); } @@ -65,7 +62,7 @@ public static void wipe() throws Exception { }); _indexClient.delete(new DeleteIndexRequest(TEST_INDEX_NAME), RequestOptions.DEFAULT); - } catch (ElasticsearchException exception) { + } catch (OpenSearchException exception) { if (exception.status() != RestStatus.NOT_FOUND) { throw exception; } @@ -79,7 +76,7 @@ public static GetIndexResponse getTestIndex() throws IOException { @Test public void testESIndexBuilderCreation() throws Exception { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); - ESIndexBuilder customIndexBuilder = new ESIndexBuilder(_searchClient, 2, 0, 1, + ESIndexBuilder customIndexBuilder = new ESIndexBuilder(getSearchClient(), 2, 0, 1, 0, Map.of(), false, false, new ElasticSearchConfiguration(), gitVersion); customIndexBuilder.buildIndex(TEST_INDEX_NAME, Map.of(), Map.of()); @@ -93,7 +90,7 @@ public void testESIndexBuilderCreation() throws Exception { @Test public void testMappingReindex() throws Exception { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); - ESIndexBuilder enabledMappingReindex = new ESIndexBuilder(_searchClient, 1, 0, 0, + ESIndexBuilder enabledMappingReindex = new ESIndexBuilder(getSearchClient(), 1, 0, 0, 0, Map.of(), false, true, new ElasticSearchConfiguration(), gitVersion); @@ -111,7 +108,7 @@ public void testMappingReindex() throws Exception { Map newProps = ((Map) SystemMetadataMappingsBuilder.getMappings().get("properties")) .entrySet().stream() .map(m -> !m.getKey().equals("urn") ? m - : Map.entry("urn", ImmutableMap.builder().put("type", "wildcard").build())) + : Map.entry("urn", ImmutableMap.builder().put("type", "text").build())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); enabledMappingReindex.buildIndex(TEST_INDEX_NAME, Map.of("properties", newProps), Map.of()); @@ -134,7 +131,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception { String expectedShards = "5"; GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); - ESIndexBuilder changedShardBuilder = new ESIndexBuilder(_searchClient, + ESIndexBuilder changedShardBuilder = new ESIndexBuilder(getSearchClient(), Integer.parseInt(expectedShards), testDefaultBuilder.getNumReplicas(), testDefaultBuilder.getNumRetries(), @@ -162,7 +159,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception { public void testSettingsNoReindex() throws Exception { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); List noReindexBuilders = List.of( - new ESIndexBuilder(_searchClient, + new ESIndexBuilder(getSearchClient(), testDefaultBuilder.getNumShards(), testDefaultBuilder.getNumReplicas() + 1, testDefaultBuilder.getNumRetries(), @@ -170,7 +167,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, new ElasticSearchConfiguration(), gitVersion), - new ESIndexBuilder(_searchClient, + new ESIndexBuilder(getSearchClient(), testDefaultBuilder.getNumShards(), testDefaultBuilder.getNumReplicas(), testDefaultBuilder.getNumRetries(), @@ -178,7 +175,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, new ElasticSearchConfiguration(), gitVersion), - new ESIndexBuilder(_searchClient, + new ESIndexBuilder(getSearchClient(), testDefaultBuilder.getNumShards() + 1, testDefaultBuilder.getNumReplicas(), testDefaultBuilder.getNumRetries(), @@ -186,7 +183,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), false, false, new ElasticSearchConfiguration(), gitVersion), - new ESIndexBuilder(_searchClient, + new ESIndexBuilder(getSearchClient(), testDefaultBuilder.getNumShards(), testDefaultBuilder.getNumReplicas() + 1, testDefaultBuilder.getNumRetries(), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java similarity index 98% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 0b33185549299..0d2ce236d9f54 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -1,8 +1,10 @@ -package com.linkedin.metadata.search.elasticsearch.indexbuilder; +package com.linkedin.metadata.search.indexbuilder; import com.google.common.collect.ImmutableMap; import com.linkedin.metadata.TestEntitySpecBuilder; import java.util.Map; + +import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java new file mode 100644 index 0000000000000..3896ba749e85e --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.fixtures.GoldenTestBase; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + +@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class GoldenOpenSearchTest extends GoldenTestBase { + + @Autowired + @Qualifier("longTailSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("entityRegistry") + private EntityRegistry entityRegistry; + + + @NotNull + @Override + protected EntityRegistry getEntityRegistry() { + return entityRegistry; + } + + @NotNull + @Override + protected SearchService getSearchService() { + return searchService; + } + + @Test + public void initTest() { + assertNotNull(searchService); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java new file mode 100644 index 0000000000000..312b56364bd91 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java @@ -0,0 +1,30 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) +public class IndexBuilderOpenSearchTest extends IndexBuilderTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @Test + public void initTest() { + assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java new file mode 100644 index 0000000000000..6fc0677ad6e39 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.search.LineageSearchService; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase; +import io.datahubproject.test.fixtures.search.SearchLineageFixtureConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + + +@Import({OpenSearchSuite.class, SearchLineageFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class LineageDataFixtureOpenSearchTest extends LineageDataFixtureTestBase { + + @Autowired + @Qualifier("searchLineageSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("searchLineageLineageSearchService") + protected LineageSearchService lineageService; + + @NotNull + @Override + protected LineageSearchService getLineageService() { + return lineageService; + } + + @NotNull + @Override + protected SearchService getSearchService() { + return searchService; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(lineageService); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java new file mode 100644 index 0000000000000..1a6242c2211fd --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java @@ -0,0 +1,65 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.LineageServiceTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class LineageServiceOpenSearchTest extends LineageServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java new file mode 100644 index 0000000000000..559c623c97d5a --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java @@ -0,0 +1,31 @@ +package com.linkedin.metadata.search.opensearch; + +import io.datahubproject.test.search.OpenSearchTestContainer; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testcontainers.containers.GenericContainer; +import org.testng.annotations.AfterSuite; + +@TestConfiguration +public class OpenSearchSuite extends AbstractTestNGSpringContextTests { + + private static final OpenSearchTestContainer OPENSEARCH_TEST_CONTAINER; + private static GenericContainer container; + static { + OPENSEARCH_TEST_CONTAINER = new OpenSearchTestContainer(); + } + + @AfterSuite + public void after() { + OPENSEARCH_TEST_CONTAINER.stopContainer(); + } + + @Bean(name = "testSearchContainer") + public GenericContainer testSearchContainer() { + if (container == null) { + container = OPENSEARCH_TEST_CONTAINER.startContainer(); + } + return container; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java new file mode 100644 index 0000000000000..081eb5f70fc85 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import lombok.Getter; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +/** + * Runs sample data fixture tests for Opensearch test container + */ +@Getter +@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class SampleDataFixtureOpenSearchTest extends SampleDataFixtureTestBase { + @Autowired + private RestHighLevelClient searchClient; + + @Autowired + @Qualifier("sampleDataSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("sampleDataEntityClient") + protected EntityClient entityClient; + + @Autowired + @Qualifier("entityRegistry") + private EntityRegistry entityRegistry; + + @Test + public void initTest() { + assertNotNull(searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java new file mode 100644 index 0000000000000..0b166975da0d1 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.search.query.SearchDAOTestBase; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import lombok.Getter; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.testng.annotations.Test; + +import static org.testng.AssertJUnit.assertNotNull; + + +@Getter +@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class}) +public class SearchDAOOpenSearchTest extends SearchDAOTestBase { + @Autowired + private RestHighLevelClient searchClient; + @Autowired + private SearchConfiguration searchConfiguration; + @Autowired + @Qualifier("sampleDataIndexConvention") + IndexConvention indexConvention; + + @Test + public void initTest() { + assertNotNull(searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java new file mode 100644 index 0000000000000..8a55ba7b37ef9 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java @@ -0,0 +1,65 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.SearchServiceTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class SearchServiceOpenSearchTest extends SearchServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java new file mode 100644 index 0000000000000..f0bb8e1c12479 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java @@ -0,0 +1,47 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + + +@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) +public class SystemMetadataServiceOpenSearchTest extends SystemMetadataServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java new file mode 100644 index 0000000000000..467f7fb43be1b --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java @@ -0,0 +1,65 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.TestEntityTestBase; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class}) +public class TestEntityOpenSearchTest extends TestEntityTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + @Autowired + private SearchConfiguration _searchConfiguration; + @Autowired + private CustomSearchConfiguration _customSearchConfiguration; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @NotNull + @Override + protected SearchConfiguration getSearchConfiguration() { + return _searchConfiguration; + } + + @NotNull + @Override + protected CustomSearchConfiguration getCustomSearchConfiguration() { + return _customSearchConfiguration; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java new file mode 100644 index 0000000000000..3333b9f0942f5 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java @@ -0,0 +1,46 @@ +package com.linkedin.metadata.search.opensearch; + +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; +import org.jetbrains.annotations.NotNull; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class}) +public class TimeseriesAspectServiceOpenSearchTest extends TimeseriesAspectServiceTestBase { + + @Autowired + private RestHighLevelClient _searchClient; + @Autowired + private ESBulkProcessor _bulkProcessor; + @Autowired + private ESIndexBuilder _esIndexBuilder; + + @NotNull + @Override + protected RestHighLevelClient getSearchClient() { + return _searchClient; + } + + @NotNull + @Override + protected ESBulkProcessor getBulkProcessor() { + return _bulkProcessor; + } + + @NotNull + @Override + protected ESIndexBuilder getIndexBuilder() { + return _esIndexBuilder; + } + + @Test + public void initTest() { + AssertJUnit.assertNotNull(_searchClient); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java similarity index 86% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index 0a5f71345751b..91e7747afb4a1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -1,7 +1,8 @@ -package com.linkedin.metadata.search.elasticsearch.query; +package com.linkedin.metadata.search.query; import com.linkedin.common.urn.Urn; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.entity.TestEntityRegistry; @@ -11,11 +12,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; @@ -28,8 +29,8 @@ import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; -@Import(ESTestConfiguration.class) -public class ESBrowseDAOTest extends AbstractTestNGSpringContextTests { +@Import(SearchCommonTestConfiguration.class) +public class BrowseDAOTest extends AbstractTestNGSpringContextTests { private RestHighLevelClient _mockClient; private ESBrowseDAO _browseDAO; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java new file mode 100644 index 0000000000000..2dbc142d45071 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -0,0 +1,307 @@ +package com.linkedin.metadata.search.query; + +import com.datahub.test.Snapshot; +import com.google.common.collect.ImmutableList; +import com.linkedin.data.template.LongMap; +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.AggregationMetadata; +import com.linkedin.metadata.search.AggregationMetadataArray; +import com.linkedin.metadata.search.FilterValueArray; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.utils.SearchUtil; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; + +abstract public class SearchDAOTestBase extends AbstractTestNGSpringContextTests { + + abstract protected RestHighLevelClient getSearchClient(); + + abstract protected SearchConfiguration getSearchConfiguration(); + + abstract protected IndexConvention getIndexConvention(); + + EntityRegistry _entityRegistry = new SnapshotEntityRegistry(new Snapshot()); + + + @Test + public void testTransformFilterForEntitiesNoChange() { + Criterion c = new Criterion().setValue("urn:li:tag:abc").setValues( + new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword"); + + Filter f = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); + + Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention()); + assertEquals(f, transformedFilter); + } + + @Test + public void testTransformFilterForEntitiesNullFilter() { + Filter transformedFilter = SearchUtil.transformFilterForEntities(null, getIndexConvention()); + assertNotNull(getIndexConvention()); + assertEquals(null, transformedFilter); + } + + @Test + public void testTransformFilterForEntitiesWithChanges() { + + Criterion c = new Criterion().setValue("dataset").setValues( + new StringArray(ImmutableList.of("dataset")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); + + Filter f = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); + Filter originalF = null; + try { + originalF = f.copy(); + } catch (CloneNotSupportedException e) { + fail(e.getMessage()); + } + assertEquals(f, originalF); + + Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention()); + assertNotEquals(originalF, transformedFilter); + + Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues( + new StringArray(ImmutableList.of("smpldat_datasetindex_v2")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); + + Filter expectedNewFilter = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion)))); + + assertEquals(expectedNewFilter, transformedFilter); + } + + @Test + public void testTransformFilterForEntitiesWithUnderscore() { + + Criterion c = new Criterion().setValue("data_job").setValues( + new StringArray(ImmutableList.of("data_job")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); + + Filter f = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c)))); + Filter originalF = null; + try { + originalF = f.copy(); + } catch (CloneNotSupportedException e) { + fail(e.getMessage()); + } + assertEquals(f, originalF); + + Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention()); + assertNotEquals(originalF, transformedFilter); + + Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datajobindex_v2").setValues( + new StringArray(ImmutableList.of("smpldat_datajobindex_v2")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); + + Filter expectedNewFilter = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion)))); + + assertEquals(transformedFilter, expectedNewFilter); + } + + @Test + public void testTransformFilterForEntitiesWithSomeChanges() { + + Criterion criterionChanged = new Criterion().setValue("dataset").setValues( + new StringArray(ImmutableList.of("dataset")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType"); + Criterion criterionUnchanged = new Criterion().setValue("urn:li:tag:abc").setValues( + new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword"); + + Filter f = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(criterionChanged, criterionUnchanged)))); + Filter originalF = null; + try { + originalF = f.copy(); + } catch (CloneNotSupportedException e) { + fail(e.getMessage()); + } + assertEquals(f, originalF); + + Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention()); + assertNotEquals(originalF, transformedFilter); + + Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues( + new StringArray(ImmutableList.of("smpldat_datasetindex_v2")) + ).setNegated(false).setCondition(Condition.EQUAL).setField("_index"); + + Filter expectedNewFilter = new Filter().setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion, criterionUnchanged)))); + + assertEquals(expectedNewFilter, transformedFilter); + } + + @Test + public void testTransformIndexIntoEntityNameSingle() { + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), getIndexConvention(), false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); + // Empty aggregations + final SearchResultMetadata searchResultMetadata = + new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); + SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) + .setMetadata(searchResultMetadata) + .setFrom(0) + .setPageSize(100) + .setNumEntities(30); + SearchResult expectedResult = null; + try { + expectedResult = result.copy(); + } catch (CloneNotSupportedException e) { + fail(e.getMessage()); + } + assertEquals(expectedResult, searchDAO.transformIndexIntoEntityName(result)); + + // one facet, do not transform + Map aggMap = Map.of("urn:li:corpuser:datahub", Long.valueOf(3)); + + List aggregationMetadataList = new ArrayList<>(); + aggregationMetadataList.add(new AggregationMetadata().setName("owners") + .setDisplayName("Owned by") + .setAggregations(new LongMap(aggMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(aggMap, Collections.emptySet()))) + ); + searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); + result.setMetadata(searchResultMetadata); + + try { + expectedResult = result.copy(); + } catch (CloneNotSupportedException e) { + fail(e.getMessage()); + } + assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); + + // one facet, transform + Map entityTypeMap = Map.of("smpldat_datasetindex_v2", Long.valueOf(3)); + + aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType") + .setDisplayName("Type") + .setAggregations(new LongMap(entityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) + ); + searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); + result.setMetadata(searchResultMetadata); + + Map expectedEntityTypeMap = Map.of("dataset", Long.valueOf(3)); + + List expectedAggregationMetadataList = List.of( + new AggregationMetadata().setName("_entityType") + .setDisplayName("Type") + .setAggregations(new LongMap(expectedEntityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) + ); + expectedResult.setMetadata(new SearchResultMetadata().setAggregations(new AggregationMetadataArray(expectedAggregationMetadataList))); + assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); + } + + @Test + public void testTransformIndexIntoEntityNameNested() { + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), getIndexConvention(), false, + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); + // One nested facet + Map entityTypeMap = Map.of( + String.format("smpldat_datasetindex_v2%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), + String.format("smpldat_datasetindex_v2%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), + "smpldat_datasetindex_v2", Long.valueOf(20) + ); + List aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners") + .setDisplayName("Type␞Owned By") + .setAggregations(new LongMap(entityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) + ); + SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) + .setMetadata(new SearchResultMetadata().setAggregations( + new AggregationMetadataArray(aggregationMetadataList) + )) + .setFrom(0) + .setPageSize(100) + .setNumEntities(50); + + Map expectedEntityTypeMap = Map.of( + String.format("dataset%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), + String.format("dataset%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), + "dataset", Long.valueOf(20) + ); + + List expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners") + .setDisplayName("Type␞Owned By") + .setAggregations(new LongMap(expectedEntityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) + ); + SearchResult expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) + .setMetadata(new SearchResultMetadata().setAggregations( + new AggregationMetadataArray(expectedAggregationMetadataList))) + .setFrom(0) + .setPageSize(100) + .setNumEntities(50); + assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); + + // One nested facet, opposite order + entityTypeMap = Map.of( + String.format("urn:li:corpuser:datahub%ssmpldat_datasetindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), + String.format("urn:li:corpuser:datahub%ssmpldat_chartindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), + "urn:li:corpuser:datahub", Long.valueOf(20) + ); + aggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType") + .setDisplayName("Owned By␞Type") + .setAggregations(new LongMap(entityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet()))) + ); + result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) + .setMetadata(new SearchResultMetadata().setAggregations( + new AggregationMetadataArray(aggregationMetadataList) + )) + .setFrom(0) + .setPageSize(100) + .setNumEntities(50); + + expectedEntityTypeMap = Map.of( + String.format("urn:li:corpuser:datahub%sdataset", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3), + String.format("urn:li:corpuser:datahub%schart", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7), + "urn:li:corpuser:datahub", Long.valueOf(20) + ); + + expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType") + .setDisplayName("Owned By␞Type") + .setAggregations(new LongMap(expectedEntityTypeMap)) + .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet()))) + ); + expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>())) + .setMetadata(new SearchResultMetadata().setAggregations( + new AggregationMetadataArray(expectedAggregationMetadataList))) + .setFrom(0) + .setPageSize(100) + .setNumEntities(50); + assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java similarity index 94% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index 36c8bb8f9a676..66e7b62741f4c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.google.common.collect.ImmutableSet; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -9,7 +9,9 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import org.elasticsearch.search.aggregations.AggregationBuilder; + +import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; +import org.opensearch.search.aggregations.AggregationBuilder; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java similarity index 88% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index be91cb0288950..34b98f38254cd 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,15 +1,17 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.metadata.TestEntitySpecBuilder; import java.util.List; import java.util.Map; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.query.MultiMatchQueryBuilder; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; + +import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java similarity index 93% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java index 3dad9c59c6b53..6b6664ffdf30e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -7,12 +7,14 @@ import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; -import org.elasticsearch.common.lucene.search.function.CombineFunction; -import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; -import org.elasticsearch.index.query.MatchAllQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; -import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; +import com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder; +import org.opensearch.common.lucene.search.function.CombineFunction; +import org.opensearch.common.lucene.search.function.FunctionScoreQuery; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.testng.annotations.Test; import java.io.IOException; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java similarity index 95% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java index 8e73b0ceeae8d..9c0815efdc8b4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java @@ -1,8 +1,10 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.schema.PathSpec; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -26,15 +28,15 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.util.Pair; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchAllQueryBuilder; -import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; -import org.elasticsearch.index.query.MatchPhraseQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryStringQueryBuilder; -import org.elasticsearch.index.query.SimpleQueryStringBuilder; -import org.elasticsearch.index.query.TermQueryBuilder; -import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.mockito.Mockito; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; @@ -50,7 +52,7 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; -@Import(ESTestConfiguration.class) +@Import(SearchCommonTestConfiguration.class) public class SearchQueryBuilderTest extends AbstractTestNGSpringContextTests { @Autowired diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java similarity index 95% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index db56e2d34881b..90c6c523c588f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,11 +1,12 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.google.common.collect.ImmutableList; import com.linkedin.data.template.StringArray; -import com.linkedin.metadata.ESTestConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.WordGramConfiguration; import java.util.ArrayList; @@ -28,17 +29,17 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.ExistsQueryBuilder; -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.query.MultiMatchQueryBuilder; -import org.elasticsearch.index.query.TermsQueryBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; @@ -48,7 +49,7 @@ import static org.testng.Assert.*; -@Import(ESTestConfiguration.class) +@Import(SearchCommonTestConfiguration.class) public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { @Autowired private EntityRegistry entityRegistry; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 4f364c246818f..ddd75a152c333 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -4,7 +4,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; -import org.elasticsearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java similarity index 84% rename from metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java index 6e116df5b2906..e6a9bd7d198f7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.systemmetadata; -import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.IngestionRunSummary; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -9,9 +8,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.mxe.SystemMetadata; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; +import org.opensearch.client.RestHighLevelClient; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; @@ -20,18 +17,20 @@ import javax.annotation.Nonnull; import java.util.List; -import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite; +import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; -@Import(ESTestConfiguration.class) -public class ElasticSearchSystemMetadataServiceTest extends AbstractTestNGSpringContextTests { +abstract public class SystemMetadataServiceTestBase extends AbstractTestNGSpringContextTests { + + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; private final IndexConvention _indexConvention = new IndexConventionImpl("es_system_metadata_service_test"); private ElasticSearchSystemMetadataService _client; @@ -49,8 +48,8 @@ public void wipe() throws Exception { @Nonnull private ElasticSearchSystemMetadataService buildService() { - ESSystemMetadataDAO dao = new ESSystemMetadataDAO(_searchClient, _indexConvention, _bulkProcessor, 1); - return new ElasticSearchSystemMetadataService(_bulkProcessor, _indexConvention, dao, _esIndexBuilder); + ESSystemMetadataDAO dao = new ESSystemMetadataDAO(getSearchClient(), _indexConvention, getBulkProcessor(), 1); + return new ElasticSearchSystemMetadataService(getBulkProcessor(), _indexConvention, dao, getIndexBuilder()); } @Test @@ -70,7 +69,7 @@ public void testListRuns() throws Exception { _client.insert(metadata2, "urn:li:chart:2", "chartKey"); _client.insert(metadata2, "urn:li:chart:2", "Ownership"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List runs = _client.listRuns(0, 20, false); @@ -99,7 +98,7 @@ public void testOverwriteRuns() throws Exception { _client.insert(metadata2, "urn:li:chart:2", "chartKey"); _client.insert(metadata2, "urn:li:chart:2", "Ownership"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List runs = _client.listRuns(0, 20, false); @@ -128,7 +127,7 @@ public void testFindByRunId() throws Exception { _client.insert(metadata2, "urn:li:chart:2", "chartKey"); _client.insert(metadata2, "urn:li:chart:2", "Ownership"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE); @@ -156,11 +155,11 @@ public void testDelete() throws Exception { _client.insert(metadata2, "urn:li:chart:2", "chartKey"); _client.insert(metadata2, "urn:li:chart:2", "Ownership"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); _client.deleteUrn("urn:li:chart:1"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE); @@ -172,7 +171,7 @@ public void testDelete() throws Exception { public void testInsertNullData() throws Exception { _client.insert(null, "urn:li:chart:1", "chartKey"); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List runs = _client.listRuns(0, 20, false); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java index 2703dd7fe6cbe..9e89328715510 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java @@ -27,7 +27,7 @@ public EbeanTimelineServiceTest() throws EntityRegistryException { @BeforeMethod public void setupTest() { - Database server = EbeanTestUtils.createTestServer(); + Database server = EbeanTestUtils.createTestServer(EbeanTimelineServiceTest.class.getSimpleName()); _aspectDao = new EbeanAspectDao(server); _aspectDao.setConnectionValidated(true); _entityTimelineService = new TimelineServiceImpl(_aspectDao, _testEntityRegistry); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java similarity index 97% rename from metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index d65234bf89d49..cc60ba8679e1f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.timeseries.elastic; +package com.linkedin.metadata.timeseries.search; import com.datahub.test.BatchType; import com.datahub.test.ComplexNestedRecord; @@ -16,7 +16,6 @@ import com.linkedin.data.template.StringArrayArray; import com.linkedin.data.template.StringMap; import com.linkedin.data.template.StringMapArray; -import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; @@ -32,6 +31,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -45,9 +45,7 @@ import com.linkedin.timeseries.GroupingBucket; import com.linkedin.timeseries.GroupingBucketType; import com.linkedin.timeseries.TimeWindowSize; -import org.elasticsearch.client.RestHighLevelClient; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; +import org.opensearch.client.RestHighLevelClient; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -60,15 +58,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite; +import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; +import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -@Import(ESTestConfiguration.class) -public class ElasticSearchTimeseriesAspectServiceTest extends AbstractTestNGSpringContextTests { +abstract public class TimeseriesAspectServiceTestBase extends AbstractTestNGSpringContextTests { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); static { int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); @@ -85,12 +83,15 @@ public class ElasticSearchTimeseriesAspectServiceTest extends AbstractTestNGSpri private static final String ES_FIELD_TIMESTAMP = "timestampMillis"; private static final String ES_FIELD_STAT = "stat"; - @Autowired - private RestHighLevelClient _searchClient; - @Autowired - private ESBulkProcessor _bulkProcessor; - @Autowired - private ESIndexBuilder _esIndexBuilder; + @Nonnull + abstract protected RestHighLevelClient getSearchClient(); + + @Nonnull + abstract protected ESBulkProcessor getBulkProcessor(); + + @Nonnull + abstract protected ESIndexBuilder getIndexBuilder(); + private EntityRegistry _entityRegistry; private IndexConvention _indexConvention; private ElasticSearchTimeseriesAspectService _elasticSearchTimeseriesAspectService; @@ -116,9 +117,9 @@ public void setup() { @Nonnull private ElasticSearchTimeseriesAspectService buildService() { - return new ElasticSearchTimeseriesAspectService(_searchClient, _indexConvention, - new TimeseriesAspectIndexBuilders(_esIndexBuilder, _entityRegistry, - _indexConvention), _entityRegistry, _bulkProcessor, 1); + return new ElasticSearchTimeseriesAspectService(getSearchClient(), _indexConvention, + new TimeseriesAspectIndexBuilders(getIndexBuilder(), _entityRegistry, + _indexConvention), _entityRegistry, getBulkProcessor(), 1); } /* @@ -190,7 +191,7 @@ public void testUpsertProfiles() throws Exception { } }); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); } @Test(groups = "upsertUniqueMessageId") @@ -216,7 +217,7 @@ public void testUpsertProfilesWithUniqueMessageIds() throws Exception { } }); - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); List resultAspects = _elasticSearchTimeseriesAspectService.getAspectValues(urn, ENTITY_NAME, ASPECT_NAME, null, null, @@ -860,7 +861,7 @@ public void testCountByFilter() { @Test(groups = {"testCountAfterDelete"}, dependsOnGroups = {"deleteAspectValues1"}) public void testCountByFilterAfterDelete() throws InterruptedException { - syncAfterWrite(_bulkProcessor); + syncAfterWrite(getBulkProcessor()); // Test with filter Criterion hasUrnCriterion = new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java b/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java deleted file mode 100644 index f96a6c50af33d..0000000000000 --- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java +++ /dev/null @@ -1,22 +0,0 @@ -package io.datahub.test.fixtures.elasticsearch; - -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - -import static com.linkedin.metadata.Constants.*; - - -public class Utils { - private Utils() { - - } - final public static String FIXTURE_BASE = "src/test/resources/elasticsearch"; - - final public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - static { - int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - } -} diff --git a/metadata-io/src/test/java/io/datahub/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java similarity index 99% rename from metadata-io/src/test/java/io/datahub/test/DataGenerator.java rename to metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java index 3b374993cde16..cfa9c1258583d 100644 --- a/metadata-io/src/test/java/io/datahub/test/DataGenerator.java +++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java @@ -1,4 +1,4 @@ -package io.datahub.test; +package io.datahubproject.test; import com.linkedin.common.AuditStamp; import com.linkedin.common.GlossaryTermAssociation; @@ -111,7 +111,8 @@ public Stream> generateMCPs(String entityName, long }).map(mcp -> { // Expand with default aspects per normal return Stream.concat(Stream.of(mcp), - AspectUtils.getAdditionalChanges(mcp, entityService, true).stream()).collect(Collectors.toList()); + AspectUtils.getAdditionalChanges(mcp, entityService, true).stream()) + .collect(Collectors.toList()); }); } diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java similarity index 81% rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java index 5c34b9f549d9f..18fbf86f8668d 100644 --- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java @@ -1,15 +1,15 @@ -package io.datahub.test.fixtures.elasticsearch; +package io.datahubproject.test.fixtures.search; import lombok.Builder; import lombok.NonNull; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetMappingsRequest; -import org.elasticsearch.client.indices.GetMappingsResponse; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.SortBuilders; -import org.elasticsearch.search.sort.SortOrder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetMappingsRequest; +import org.opensearch.client.indices.GetMappingsResponse; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import java.io.IOException; import java.util.Set; diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java similarity index 93% rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java index a0c551b28b507..1b804a2346883 100644 --- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java @@ -1,12 +1,12 @@ -package io.datahub.test.fixtures.elasticsearch; +package io.datahubproject.test.fixtures.search; import com.fasterxml.jackson.core.JsonProcessingException; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import lombok.Builder; import lombok.NonNull; import org.apache.commons.io.FilenameUtils; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.common.xcontent.XContentType; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.common.xcontent.XContentType; import java.io.BufferedInputStream; import java.io.BufferedReader; @@ -23,12 +23,12 @@ import java.util.stream.Stream; import java.util.zip.GZIPInputStream; -import static io.datahub.test.fixtures.elasticsearch.Utils.OBJECT_MAPPER; +import static io.datahubproject.test.fixtures.search.SearchFixtureUtils.OBJECT_MAPPER; @Builder public class FixtureReader { @Builder.Default - private String inputBase = Utils.FIXTURE_BASE; + private String inputBase = SearchFixtureUtils.FIXTURE_BASE; @NonNull private ESBulkProcessor bulkProcessor; @NonNull diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java similarity index 75% rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java index 36b057bc22a37..0aefa006421fc 100644 --- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java @@ -1,13 +1,13 @@ -package io.datahub.test.fixtures.elasticsearch; +package io.datahubproject.test.fixtures.search; import com.fasterxml.jackson.core.JsonProcessingException; import lombok.Builder; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; import javax.annotation.Nullable; import java.io.BufferedWriter; @@ -15,8 +15,6 @@ import java.io.IOException; import java.util.function.BiConsumer; -import static io.datahub.test.fixtures.elasticsearch.Utils.OBJECT_MAPPER; - /** * */ @@ -26,7 +24,7 @@ public class FixtureWriter { private RestHighLevelClient client; @Builder.Default - private String outputBase = Utils.FIXTURE_BASE; + private String outputBase = SearchFixtureUtils.FIXTURE_BASE; public void write(SearchRequest searchRequest, String relativeOutput, boolean append) { write(searchRequest, relativeOutput, append, null, null, null); @@ -53,14 +51,14 @@ public void write(SearchRequest searchRequest, String relativeOutput, boo if (outputType == null) { bw.write(hit.getSourceAsString()); } else { - O doc = OBJECT_MAPPER.readValue(hit.getSourceAsString(), outputType); - bw.write(OBJECT_MAPPER.writeValueAsString(doc)); + O doc = SearchFixtureUtils.OBJECT_MAPPER.readValue(hit.getSourceAsString(), outputType); + bw.write(SearchFixtureUtils.OBJECT_MAPPER.writeValueAsString(doc)); } bw.newLine(); // Fire callback if (callback != null) { - callback.accept(hit, OBJECT_MAPPER.readValue(hit.getSourceAsString(), callbackType)); + callback.accept(hit, SearchFixtureUtils.OBJECT_MAPPER.readValue(hit.getSourceAsString(), callbackType)); } } catch (JsonProcessingException e) { throw new RuntimeException(e); diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java similarity index 95% rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java index 3b236b36cdce1..5db07ee6fb8bc 100644 --- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java @@ -1,14 +1,14 @@ -package io.datahub.test.fixtures.elasticsearch; +package io.datahubproject.test.fixtures.search; import com.google.common.collect.Lists; import lombok.Builder; import lombok.NonNull; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.SortBuilders; -import org.elasticsearch.search.sort.SortOrder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import java.net.URLDecoder; import java.net.URLEncoder; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java similarity index 94% rename from metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index ef9992db1fb25..45bbd912bc794 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -1,5 +1,6 @@ -package com.linkedin.metadata; +package io.datahubproject.test.fixtures.search; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.search.CustomConfiguration; @@ -30,9 +31,9 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.metadata.version.GitVersion; -import io.datahub.test.fixtures.elasticsearch.FixtureReader; + import java.util.Optional; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.context.TestConfiguration; @@ -46,15 +47,15 @@ import java.util.Map; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS; +import static io.datahubproject.test.search.config.SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS; import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @TestConfiguration -@Import(ESTestConfiguration.class) -public class ESSampleDataFixture { +@Import(SearchCommonTestConfiguration.class) +public class SampleDataFixtureConfiguration { /** * Interested in adding more fixtures? Here's what you will need to update? * 1. Create a new indexPrefix and FixtureName. Both are needed or else all fixtures will load on top of each other, @@ -118,7 +119,7 @@ protected EntityIndexBuilders entityIndexBuilders( @Bean(name = "longTailEntityIndexBuilders") protected EntityIndexBuilders longTailEntityIndexBuilders( - @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry, @Qualifier("longTailIndexConvention") IndexConvention indexConvention ) { return entityIndexBuildersHelper(longTailEntityRegistry, indexConvention); @@ -147,7 +148,7 @@ protected ElasticSearchService entitySearchService( @Bean(name = "longTailEntitySearchService") protected ElasticSearchService longTailEntitySearchService( - @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry, @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders, @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention ) throws IOException { @@ -186,7 +187,7 @@ protected SearchService searchService( @Bean(name = "longTailSearchService") @Nonnull protected SearchService longTailSearchService( - @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry, @Qualifier("longTailEntitySearchService") ElasticSearchService longTailEntitySearchService, @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailIndexBuilders, @Qualifier("longTailPrefix") String longTailPrefix, @@ -248,7 +249,7 @@ protected EntityClient entityClient( protected EntityClient longTailEntityClient( @Qualifier("sampleDataSearchService") SearchService searchService, @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, - @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry + @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry ) { return entityClientHelper(searchService, entitySearchService, longTailEntityRegistry); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java similarity index 67% rename from metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java index 914c5be9f5b09..d74dd041f082e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java @@ -1,26 +1,45 @@ -package com.linkedin.metadata; +package io.datahubproject.test.fixtures.search; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.datahubproject.test.search.ElasticsearchTestContainer; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; -import io.datahub.test.fixtures.elasticsearch.EntityExporter; -import io.datahub.test.fixtures.elasticsearch.FixtureReader; -import io.datahub.test.fixtures.elasticsearch.FixtureWriter; -import io.datahub.test.fixtures.elasticsearch.LineageExporter; -import io.datahub.test.models.DatasetAnonymized; -import org.elasticsearch.client.RestHighLevelClient; +import io.datahubproject.test.models.DatasetAnonymized; +import io.datahubproject.test.search.SearchTestUtils; +import org.opensearch.client.RestHighLevelClient; import org.springframework.boot.test.context.TestConfiguration; -import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Bean; +import org.testcontainers.containers.GenericContainer; import org.testng.annotations.Ignore; import org.testng.annotations.Test; import java.io.IOException; import java.util.Set; -import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS; -import static com.linkedin.metadata.ESTestUtils.environmentRestClientBuilder; +import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; +import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +/** + * This class is used for extracting and moving search fixture data. + */ @TestConfiguration -@Import(ESTestConfiguration.class) -public class ESTestFixtureUtils { +public class SearchFixtureUtils { + + final public static String FIXTURE_BASE = "src/test/resources/elasticsearch"; + + final public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + static { + int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); + OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + } + + @Bean(name = "testSearchContainer") + public GenericContainer testSearchContainer() { + return new ElasticsearchTestContainer().startContainer(); + } @Test @Ignore("Fixture capture lineage") @@ -37,7 +56,7 @@ private void extractSearchLineageTestFixture() throws IOException { String rootUrn = "urn:li:dataset:(urn:li:dataPlatform:teradata,teradata.simba.pp_bi_tables.tmis_daily_metrics_final_agg,PROD)"; // Set.of("system_metadata_service_v1", "datasetindex_v2", "graph_service_v1") - try (RestHighLevelClient client = new RestHighLevelClient(environmentRestClientBuilder())) { + try (RestHighLevelClient client = new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder())) { FixtureWriter fixtureWriter = FixtureWriter.builder() .client(client) .build(); @@ -76,7 +95,7 @@ private void extractEntityTestFixture() throws IOException { String prefix = ""; String commonSuffix = "index_v2"; - try (RestHighLevelClient client = new RestHighLevelClient(environmentRestClientBuilder())) { + try (RestHighLevelClient client = new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder())) { FixtureWriter fixtureWriter = FixtureWriter.builder() .client(client) .build(); @@ -102,7 +121,7 @@ private void extractEntityTestFixture() throws IOException { * 3. Uncomment and run test */ private void reindexTestFixtureData() throws IOException { - ESBulkProcessor bulkProcessor = ESBulkProcessor.builder(new RestHighLevelClient(environmentRestClientBuilder())) + ESBulkProcessor bulkProcessor = ESBulkProcessor.builder(new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder())) .async(true) .bulkRequestsLimit(1000) .retryInterval(1L) @@ -112,7 +131,7 @@ private void reindexTestFixtureData() throws IOException { FixtureReader reader = FixtureReader.builder() .bulkProcessor(bulkProcessor) .fixtureName("long_tail") - .refreshIntervalSeconds(REFRESH_INTERVAL_SECONDS) + .refreshIntervalSeconds(SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS) .build(); reader.read(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java similarity index 95% rename from metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index ade7435bf6652..93d3f108d9e47 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -1,5 +1,7 @@ -package com.linkedin.metadata; +package io.datahubproject.test.fixtures.search; +import io.datahubproject.test.search.config.SearchCommonTestConfiguration; +import io.datahubproject.test.search.config.SearchTestContainerConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; @@ -32,9 +34,10 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.metadata.version.GitVersion; -import io.datahub.test.fixtures.elasticsearch.FixtureReader; + import java.util.Optional; -import org.elasticsearch.client.RestHighLevelClient; + +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.context.TestConfiguration; @@ -48,12 +51,11 @@ import java.util.Map; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS; @TestConfiguration -@Import(ESTestConfiguration.class) -public class ESSearchLineageFixture { +@Import(SearchCommonTestConfiguration.class) +public class SearchLineageFixtureConfiguration { @Autowired private ESBulkProcessor _bulkProcessor; @@ -155,7 +157,7 @@ protected LineageSearchService lineageSearchService( .bulkProcessor(_bulkProcessor) .fixtureName(fixtureName) .targetIndexPrefix(prefix) - .refreshIntervalSeconds(REFRESH_INTERVAL_SECONDS) + .refreshIntervalSeconds(SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS) .build() .read(); diff --git a/metadata-io/src/test/java/io/datahub/test/models/Anonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java similarity index 97% rename from metadata-io/src/test/java/io/datahub/test/models/Anonymized.java rename to metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java index 1108097dff86a..6036473063059 100644 --- a/metadata-io/src/test/java/io/datahub/test/models/Anonymized.java +++ b/metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java @@ -1,4 +1,4 @@ -package io.datahub.test.models; +package io.datahubproject.test.models; import com.fasterxml.jackson.annotation.JsonSetter; diff --git a/metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java similarity index 97% rename from metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java rename to metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java index 225f52d993931..35813d22067a6 100644 --- a/metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java +++ b/metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java @@ -1,4 +1,4 @@ -package io.datahub.test.models; +package io.datahubproject.test.models; import com.fasterxml.jackson.annotation.JsonGetter; diff --git a/metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java similarity index 82% rename from metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java rename to metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java index 5e6c5d57e050e..3d2360ae04228 100644 --- a/metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java +++ b/metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java @@ -1,4 +1,4 @@ -package io.datahub.test.models; +package io.datahubproject.test.models; import com.fasterxml.jackson.annotation.JsonSetter; @@ -13,7 +13,7 @@ public static class GraphNode extends Anonymized { @JsonSetter("urn") public void setUrn(String urn) { - this.urn = Anonymized.anonymizeUrn(urn); + this.urn = anonymizeUrn(urn); } } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java new file mode 100644 index 0000000000000..233a667d078dd --- /dev/null +++ b/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java @@ -0,0 +1,42 @@ +package io.datahubproject.test.search; + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + + +import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine; + +public class ElasticsearchTestContainer implements SearchTestContainer { + private static final String ELASTIC_VERSION = "7.10.1"; + private static final String ELASTIC_IMAGE_NAME = "docker.elastic.co/elasticsearch/elasticsearch"; + private static final String ENV_ELASTIC_IMAGE_FULL_NAME = System.getenv("ELASTIC_IMAGE_FULL_NAME"); + private static final String ELASTIC_IMAGE_FULL_NAME = ENV_ELASTIC_IMAGE_FULL_NAME != null + ? ENV_ELASTIC_IMAGE_FULL_NAME : ELASTIC_IMAGE_NAME + ":" + ELASTIC_VERSION; + private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(ELASTIC_IMAGE_FULL_NAME) + .asCompatibleSubstituteFor(ELASTIC_IMAGE_NAME); + + protected static final GenericContainer ES_CONTAINER; + private boolean isStarted = false; + + // A helper method to create an ElasticsearchContainer defaulting to the current image and version, with the ability + // within firewalled environments to override with an environment variable to point to the offline repository. + static { + ES_CONTAINER = new org.testcontainers.elasticsearch.ElasticsearchContainer(DOCKER_IMAGE_NAME); + checkContainerEngine(ES_CONTAINER.getDockerClient()); + ES_CONTAINER.withEnv("ES_JAVA_OPTS", SEARCH_JAVA_OPTS).withStartupTimeout(STARTUP_TIMEOUT); + } + + @Override + public GenericContainer startContainer() { + if (!isStarted) { + ElasticsearchTestContainer.ES_CONTAINER.start(); + isStarted = true; + } + return ES_CONTAINER; + } + + @Override + public void stopContainer() { + ES_CONTAINER.stop(); + } +} diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java new file mode 100644 index 0000000000000..d94b88b466f89 --- /dev/null +++ b/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java @@ -0,0 +1,43 @@ +package io.datahubproject.test.search; + +import org.opensearch.testcontainers.OpensearchContainer; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + + +import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine; + +public class OpenSearchTestContainer implements SearchTestContainer { + private static final String OPENSEARCH_VERSION = "2.9.0"; + private static final String OPENSEARCH_IMAGE_NAME = "opensearchproject/opensearch"; + private static final String ENV_OPENSEARCH_IMAGE_FULL_NAME = System.getenv("OPENSEARCH_IMAGE_FULL_NAME"); + private static final String OPENSEARCH_IMAGE_FULL_NAME = ENV_OPENSEARCH_IMAGE_FULL_NAME != null + ? ENV_OPENSEARCH_IMAGE_FULL_NAME : OPENSEARCH_IMAGE_NAME + ":" + OPENSEARCH_VERSION; + private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(OPENSEARCH_IMAGE_FULL_NAME) + .asCompatibleSubstituteFor(OPENSEARCH_IMAGE_NAME); + + protected static final GenericContainer OS_CONTAINER; + private boolean isStarted = false; + + // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability + // within firewalled environments to override with an environment variable to point to the offline repository. + static { + OS_CONTAINER = new OpensearchContainer(DOCKER_IMAGE_NAME); + checkContainerEngine(OS_CONTAINER.getDockerClient()); + OS_CONTAINER.withEnv("OPENSEARCH_JAVA_OPTS", SEARCH_JAVA_OPTS).withStartupTimeout(STARTUP_TIMEOUT); + } + + @Override + public GenericContainer startContainer() { + if (!isStarted) { + OS_CONTAINER.start(); + isStarted = true; + } + return OS_CONTAINER; + } + + @Override + public void stopContainer() { + OS_CONTAINER.stop(); + } +} diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java new file mode 100644 index 0000000000000..67e1ee368f513 --- /dev/null +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java @@ -0,0 +1,14 @@ +package io.datahubproject.test.search; + +import org.testcontainers.containers.GenericContainer; + +import java.time.Duration; + +public interface SearchTestContainer { + String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456"; + Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min + + GenericContainer startContainer(); + + void stopContainer(); +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java similarity index 74% rename from metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java rename to metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index 7e9605cbe3db0..414b9f927fada 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata; +package io.datahubproject.test.search; import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; @@ -17,48 +17,32 @@ import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; -import java.time.Duration; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import javax.annotation.Nullable; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestClientBuilder; -import org.testcontainers.elasticsearch.ElasticsearchContainer; -import org.testcontainers.utility.DockerImageName; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.AUTO_COMPLETE_ENTITY_TYPES; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; -import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine; -public class ESTestUtils { - private ESTestUtils() { +public class SearchTestUtils { + private SearchTestUtils() { } - private static final String ELASTIC_VERSION = "7.10.1"; - private static final String ELASTIC_IMAGE_NAME = "docker.elastic.co/elasticsearch/elasticsearch"; - private static final String ENV_ELASTIC_IMAGE_FULL_NAME = System.getenv("ELASTIC_IMAGE_FULL_NAME"); - private static final String ELASTIC_IMAGE_FULL_NAME = ENV_ELASTIC_IMAGE_FULL_NAME != null - ? ENV_ELASTIC_IMAGE_FULL_NAME : ELASTIC_IMAGE_NAME + ":" + ELASTIC_VERSION; - private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(ELASTIC_IMAGE_FULL_NAME) - .asCompatibleSubstituteFor(ELASTIC_IMAGE_NAME); - - public static final ElasticsearchContainer ES_CONTAINER; - - // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability - // within firewalled environments to override with an environment variable to point to the offline repository. - static { - ES_CONTAINER = new ElasticsearchContainer(DOCKER_IMAGE_NAME); - checkContainerEngine(ES_CONTAINER.getDockerClient()); - ES_CONTAINER.withEnv("ES_JAVA_OPTS", "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456") - .withStartupTimeout(Duration.ofMinutes(5)); // usually < 1min + public static void syncAfterWrite(ESBulkProcessor bulkProcessor) throws InterruptedException { + bulkProcessor.flush(); + Thread.sleep(1000); } public final static List SEARCHABLE_ENTITIES; @@ -75,7 +59,7 @@ public static SearchResult searchAcrossEntities(SearchService searchService, Str public static SearchResult searchAcrossEntities(SearchService searchService, String query, @Nullable List facets) { return searchService.searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, 0, - 100, new SearchFlags().setFulltext(true).setSkipCache(true), facets); + 100, new SearchFlags().setFulltext(true).setSkipCache(true), facets); } public static SearchResult searchAcrossCustomEntities(SearchService searchService, String query, List searchableEntities) { @@ -89,12 +73,12 @@ public static SearchResult search(SearchService searchService, String query) { public static SearchResult search(SearchService searchService, List entities, String query) { return searchService.search(entities, query, null, null, 0, 100, - new SearchFlags().setFulltext(true).setSkipCache(true)); + new SearchFlags().setFulltext(true).setSkipCache(true)); } public static ScrollResult scroll(SearchService searchService, String query, int batchSize, @Nullable String scrollId) { return searchService.scrollAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, - scrollId, "3m", batchSize, new SearchFlags().setFulltext(true).setSkipCache(true)); + scrollId, "3m", batchSize, new SearchFlags().setFulltext(true).setSkipCache(true)); } public static SearchResult searchStructured(SearchService searchService, String query) { @@ -112,9 +96,9 @@ public static LineageSearchResult lineage(LineageSearchService lineageSearchServ .build()); return lineageSearchService.searchAcrossLineage(root, LineageDirection.DOWNSTREAM, - SEARCHABLE_ENTITY_TYPES.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()), - "*", hops, ResolverUtils.buildFilter(filters, List.of()), null, 0, 100, null, - null, new SearchFlags().setSkipCache(true)); + SEARCHABLE_ENTITY_TYPES.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()), + "*", hops, ResolverUtils.buildFilter(filters, List.of()), null, 0, 100, null, + null, new SearchFlags().setSkipCache(true)); } public static AutoCompleteResults autocomplete(SearchableEntityType searchableEntityType, String query) throws Exception { @@ -160,4 +144,4 @@ public HttpAsyncClientBuilder customizeHttpClient( } }); } -} \ No newline at end of file +} diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java new file mode 100644 index 0000000000000..530d3f4d53625 --- /dev/null +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java @@ -0,0 +1,63 @@ +package io.datahubproject.test.search.config; + +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.linkedin.metadata.config.search.CustomConfiguration; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.WordGramConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; + +/** + * This is common configuration for search regardless of which + * test container implementation. + */ +@TestConfiguration +public class SearchCommonTestConfiguration { + @Bean + public SearchConfiguration searchConfiguration() { + SearchConfiguration searchConfiguration = new SearchConfiguration(); + searchConfiguration.setMaxTermBucketSize(20); + + ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration(); + exactMatchConfiguration.setExclusive(false); + exactMatchConfiguration.setExactFactor(10.0f); + exactMatchConfiguration.setWithPrefix(true); + exactMatchConfiguration.setPrefixFactor(6.0f); + exactMatchConfiguration.setCaseSensitivityFactor(0.7f); + exactMatchConfiguration.setEnableStructured(true); + + WordGramConfiguration wordGramConfiguration = new WordGramConfiguration(); + wordGramConfiguration.setTwoGramFactor(1.2f); + wordGramConfiguration.setThreeGramFactor(1.5f); + wordGramConfiguration.setFourGramFactor(1.8f); + + PartialConfiguration partialConfiguration = new PartialConfiguration(); + partialConfiguration.setFactor(0.4f); + partialConfiguration.setUrnFactor(0.5f); + + searchConfiguration.setExactMatch(exactMatchConfiguration); + searchConfiguration.setWordGram(wordGramConfiguration); + searchConfiguration.setPartial(partialConfiguration); + return searchConfiguration; + } + + @Bean + public CustomSearchConfiguration customSearchConfiguration() throws Exception { + CustomConfiguration customConfiguration = new CustomConfiguration(); + customConfiguration.setEnabled(true); + customConfiguration.setFile("search_config_builder_test.yml"); + return customConfiguration.resolve(new YAMLMapper()); + } + + @Bean(name = "entityRegistry") + public EntityRegistry entityRegistry() throws EntityRegistryException { + return new ConfigEntityRegistry( + SearchCommonTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + } +} diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java new file mode 100644 index 0000000000000..2cfa9f9187825 --- /dev/null +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java @@ -0,0 +1,88 @@ +package io.datahubproject.test.search.config; + +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.version.GitVersion; +import java.util.Optional; + +import org.apache.http.HttpHost; +import org.apache.http.impl.nio.reactor.IOReactorConfig; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; +import org.testcontainers.containers.GenericContainer; + +import javax.annotation.Nonnull; + +import java.util.Map; + + +/** + * This configuration is for `test containers` it builds these objects tied to + * the test container instantiated for tests. Could be ES or OpenSearch, etc. + * + * Does your test required a running instance? If no, {@link io.datahubproject.test.search.config.SearchCommonTestConfiguration} instead. + */ +@TestConfiguration +public class SearchTestContainerConfiguration { + // This port is overridden by the specific test container instance + private static final int HTTP_PORT = 9200; + public static final int REFRESH_INTERVAL_SECONDS = 5; + + @Primary + @Bean(name = "searchRestHighLevelClient") + @Nonnull + public RestHighLevelClient getElasticsearchClient(@Qualifier("testSearchContainer") GenericContainer searchContainer) { + // A helper method to create a search test container defaulting to the current image and version, with the ability + // within firewalled environments to override with an environment variable to point to the offline repository. + // A helper method to construct a standard rest client for search. + final RestClientBuilder builder = + RestClient.builder(new HttpHost( + "localhost", + searchContainer.getMappedPort(HTTP_PORT), "http") + ).setHttpClientConfigCallback(httpAsyncClientBuilder -> + httpAsyncClientBuilder.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build())); + + builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder. + setConnectionRequestTimeout(30000)); + + return new RestHighLevelClient(builder); + } + + /* + Cannot use the factory class without circular dependencies + */ + @Primary + @Bean(name = "searchBulkProcessor") + @Nonnull + public ESBulkProcessor getBulkProcessor(@Qualifier("searchRestHighLevelClient") RestHighLevelClient searchClient) { + return ESBulkProcessor.builder(searchClient) + .async(true) + /* + * Force a refresh as part of this request. This refresh policy does not scale for high indexing or search throughput but is useful + * to present a consistent view to for indices with very low traffic. And it is wonderful for tests! + */ + .writeRequestRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .bulkRequestsLimit(10000) + .bulkFlushPeriod(REFRESH_INTERVAL_SECONDS - 1) + .retryInterval(1L) + .numRetries(1) + .build(); + } + + @Primary + @Bean(name = "searchIndexBuilder") + @Nonnull + protected ESIndexBuilder getIndexBuilder(@Qualifier("searchRestHighLevelClient") RestHighLevelClient searchClient) { + GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); + return new ESIndexBuilder(searchClient, 1, 1, 3, 1, Map.of(), + false, false, + new ElasticSearchConfiguration(), gitVersion); + } +} diff --git a/metadata-io/src/test/resources/testng-other.xml b/metadata-io/src/test/resources/testng-other.xml new file mode 100644 index 0000000000000..e214fdb8c1f61 --- /dev/null +++ b/metadata-io/src/test/resources/testng-other.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/metadata-io/src/test/resources/testng-search.xml b/metadata-io/src/test/resources/testng-search.xml new file mode 100644 index 0000000000000..3b32ae34c1f5a --- /dev/null +++ b/metadata-io/src/test/resources/testng-search.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/metadata-io/src/test/resources/testng.xml b/metadata-io/src/test/resources/testng.xml new file mode 100644 index 0000000000000..fdd1c1a6c8921 --- /dev/null +++ b/metadata-io/src/test/resources/testng.xml @@ -0,0 +1,14 @@ + + + + + + + + + \ No newline at end of file diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java index 3b44ede0f1d43..a214117f4e1bc 100644 --- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.kafka; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -24,7 +24,7 @@ public class MaeConsumerApplicationTestConfiguration { private EntityServiceImpl _entityServiceImpl; @MockBean - private RestliEntityClient restliEntityClient; + private SystemRestliEntityClient restliEntityClient; @MockBean private Database ebeanServer; diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle index 69fe2255a6916..d36fd0de40d03 100644 --- a/metadata-jobs/mae-consumer/build.gradle +++ b/metadata-jobs/mae-consumer/build.gradle @@ -44,6 +44,7 @@ dependencies { testImplementation externalDependency.mockito implementation externalDependency.awsMskIamAuth + testImplementation externalDependency.testng testImplementation externalDependency.springBootTest testRuntimeOnly externalDependency.logbackClassic } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java index 2d8c52566e2ae..a9e54e5354b42 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java @@ -1,10 +1,10 @@ package com.linkedin.metadata.kafka.config; -import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.google.common.collect.ImmutableSet; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.metadata.kafka.hydrator.EntityHydrator; +import com.linkedin.metadata.models.registry.EntityRegistry; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -13,19 +13,25 @@ @Configuration -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class}) +@Import({RestliEntityClientFactory.class}) public class EntityHydratorConfig { @Autowired - @Qualifier("systemAuthentication") - private Authentication _systemAuthentication; + @Qualifier("systemRestliEntityClient") + private SystemRestliEntityClient _entityClient; @Autowired - @Qualifier("restliEntityClient") - private RestliEntityClient _entityClient; + private EntityRegistry _entityRegistry; + + public final static ImmutableSet EXCLUDED_ASPECTS = ImmutableSet.builder() + .add("datasetUpstreamLineage", "upstreamLineage") + .add("dataJobInputOutput") + .add("dataProcessInstanceRelationships", "dataProcessInstanceInput", "dataProcessInstanceOutput") + .add("inputFields") + .build(); @Bean public EntityHydrator getEntityHydrator() { - return new EntityHydrator(_systemAuthentication, _entityClient); + return new EntityHydrator(_entityRegistry, _entityClient); } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java index 7ba04ecd2389e..b0fade24e26ad 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java @@ -2,7 +2,7 @@ import com.linkedin.events.metadata.ChangeType; import lombok.Data; -import org.elasticsearch.common.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentBuilder; @Data public abstract class ElasticEvent { diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java index afa69c9f1750e..bea75f7b282ee 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java @@ -5,11 +5,11 @@ import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.DocWriteRequest; -import org.elasticsearch.action.delete.DeleteRequest; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.common.xcontent.XContentType; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.common.xcontent.XContentType; @Slf4j diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java index d42464051d7ec..230cd8433e6ff 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java @@ -1,11 +1,11 @@ package com.linkedin.metadata.kafka.elasticsearch; -import org.elasticsearch.common.xcontent.DeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.common.xcontent.XContentType; import java.io.IOException; import javax.annotation.Nullable; diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java index 184efa1573b35..a3d6dca75068b 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java @@ -2,12 +2,12 @@ import com.linkedin.data.template.RecordTemplate; import com.datahub.util.RecordUtils; -import org.elasticsearch.common.xcontent.DeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.common.xcontent.XContentType; import java.io.IOException; import javax.annotation.Nullable; diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java index 55077c46a1526..3b65ecccad336 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java @@ -1,15 +1,12 @@ package com.linkedin.metadata.kafka.hook.event; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.SetMode; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.metadata.Constants; @@ -46,8 +43,7 @@ */ @Slf4j @Component -@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class, - SystemAuthenticationFactory.class}) +@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class}) public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { /** @@ -83,20 +79,18 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { */ private static final Set SUPPORTED_OPERATIONS = ImmutableSet.of("CREATE", "UPSERT", "DELETE"); private final EntityChangeEventGeneratorRegistry _entityChangeEventGeneratorRegistry; - private final EntityClient _entityClient; - private final Authentication _systemAuthentication; + private final SystemRestliEntityClient _entityClient; private final EntityRegistry _entityRegistry; private final Boolean _isEnabled; @Autowired public EntityChangeEventGeneratorHook( @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, - @Nonnull final RestliEntityClient entityClient, @Nonnull final Authentication systemAuthentication, + @Nonnull final SystemRestliEntityClient entityClient, @Nonnull final EntityRegistry entityRegistry, @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled) { _entityChangeEventGeneratorRegistry = Objects.requireNonNull(entityChangeEventGeneratorRegistry); _entityClient = Objects.requireNonNull(entityClient); - _systemAuthentication = Objects.requireNonNull(systemAuthentication); _entityRegistry = Objects.requireNonNull(entityRegistry); _isEnabled = isEnabled; } @@ -189,8 +183,7 @@ private void emitPlatformEvent(@Nonnull final PlatformEvent event, @Nonnull fina _entityClient.producePlatformEvent( Constants.CHANGE_EVENT_PLATFORM_EVENT_NAME, partitioningKey, - event, - _systemAuthentication + event ); } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index 06545ef3525dd..7cbe53dee9fe4 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.siblings; -import com.datahub.authentication.Authentication; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -13,9 +12,8 @@ import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; import com.linkedin.entity.EntityResponse; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; @@ -60,7 +58,7 @@ @Slf4j @Component @Singleton -@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class, SystemAuthenticationFactory.class}) +@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class}) public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SIBLING_ASSOCIATION_SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system_sibling_hook"; @@ -73,23 +71,20 @@ public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SOURCE_SUBTYPE_V2 = "Source"; private final EntityRegistry _entityRegistry; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; private final EntitySearchService _searchService; - private final Authentication _systemAuthentication; private final boolean _isEnabled; @Autowired public SiblingAssociationHook( @Nonnull final EntityRegistry entityRegistry, - @Nonnull final RestliEntityClient entityClient, + @Nonnull final SystemRestliEntityClient entityClient, @Nonnull final EntitySearchService searchService, - @Nonnull final Authentication systemAuthentication, @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled ) { _entityRegistry = entityRegistry; _entityClient = entityClient; _searchService = searchService; - _systemAuthentication = systemAuthentication; _isEnabled = isEnabled; } @@ -251,9 +246,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { dbtSiblingProposal.setEntityUrn(dbtUrn); try { - _entityClient.ingestProposal(dbtSiblingProposal, _systemAuthentication); + _entityClient.ingestProposal(dbtSiblingProposal, true); } catch (RemoteInvocationException e) { - log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString()); throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); } @@ -274,9 +269,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { List filteredNewSiblingsArray = newSiblingsUrnArray.stream().filter(urn -> { try { - return _entityClient.exists(urn, _systemAuthentication); + return _entityClient.exists(urn); } catch (RemoteInvocationException e) { - log.error("Error while checking existence of {}: {}", urn.toString(), e.toString()); + log.error("Error while checking existence of {}: {}", urn, e.toString()); throw new RuntimeException("Error checking existence. Skipping processing.", e); } }).collect(Collectors.toList()); @@ -294,9 +289,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { sourceSiblingProposal.setEntityUrn(sourceUrn); try { - _entityClient.ingestProposal(sourceSiblingProposal, _systemAuthentication); + _entityClient.ingestProposal(sourceSiblingProposal, true); } catch (RemoteInvocationException e) { - log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString()); throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); } } @@ -406,11 +401,8 @@ private SubTypes getSubtypesFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _systemAuthentication - ); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME)); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SUB_TYPES_ASPECT_NAME)) { return new SubTypes(entityResponse.getAspects().get(Constants.SUB_TYPES_ASPECT_NAME).getValue().data()); @@ -427,10 +419,8 @@ private UpstreamLineage getUpstreamLineageFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME), - _systemAuthentication + ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME) ); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { @@ -448,10 +438,8 @@ private Siblings getSiblingsFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(SIBLINGS_ASPECT_NAME), - _systemAuthentication + ImmutableSet.of(SIBLINGS_ASPECT_NAME) ); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SIBLINGS_ASPECT_NAME)) { diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java index d768ada1765fa..0a3b38517eaad 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java @@ -1,28 +1,32 @@ package com.linkedin.metadata.kafka.hydrator; -import com.datahub.authentication.Authentication; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; import java.util.Collections; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.kafka.config.EntityHydratorConfig.EXCLUDED_ASPECTS; @Slf4j @RequiredArgsConstructor public class EntityHydrator { - private final Authentication _systemAuthentication; - private final EntityClient _entityClient; - + private final EntityRegistry _entityRegistry; + private final SystemRestliEntityClient _entityClient; private final ChartHydrator _chartHydrator = new ChartHydrator(); private final CorpUserHydrator _corpUserHydrator = new CorpUserHydrator(); private final DashboardHydrator _dashboardHydrator = new DashboardHydrator(); @@ -43,8 +47,12 @@ public Optional getHydratedEntity(String entityTypeName, String urn) // Hydrate fields from snapshot EntityResponse entityResponse; try { - entityResponse = _entityClient.batchGetV2(entityTypeName, Collections.singleton(urnObj), null, - this._systemAuthentication).get(urnObj); + Set aspectNames = Optional.ofNullable(_entityRegistry.getEntitySpecs().get(urnObj.getEntityType())) + .map(spec -> spec.getAspectSpecs().stream().map(AspectSpec::getName) + .filter(aspectName -> !EXCLUDED_ASPECTS.contains(aspectName)) + .collect(Collectors.toSet())) + .orElse(Set.of()); + entityResponse = _entityClient.batchGetV2(Collections.singleton(urnObj), aspectNames).get(urnObj); } catch (RemoteInvocationException | URISyntaxException e) { log.error("Error while calling GMS to hydrate entity for urn {}", urn); return Optional.empty(); diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java index d8759da0fe1dd..7d9619f3e2d1c 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.event; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.assertion.AssertionResult; @@ -38,8 +37,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DatasetKey; @@ -66,6 +64,7 @@ import com.linkedin.platform.event.v1.Parameters; import java.net.URISyntaxException; import java.util.Map; + import org.mockito.Mockito; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -92,22 +91,19 @@ public class EntityChangeEventGeneratorHookTest { private static final String TEST_DATA_FLOW_URN = "urn:li:dataFlow:flow"; private static final String TEST_DATA_JOB_URN = "urn:li:dataJob:job"; private Urn actorUrn; - private Authentication _mockAuthentication; - private RestliEntityClient _mockClient; + private SystemRestliEntityClient _mockClient; private EntityService _mockEntityService; private EntityChangeEventGeneratorHook _entityChangeEventHook; @BeforeMethod public void setupTest() throws URISyntaxException { actorUrn = Urn.createFromString(TEST_ACTOR_URN); - _mockAuthentication = Mockito.mock(Authentication.class); - _mockClient = Mockito.mock(RestliEntityClient.class); + _mockClient = Mockito.mock(SystemRestliEntityClient.class); _mockEntityService = Mockito.mock(EntityService.class); EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry = createEntityChangeEventGeneratorRegistry(); _entityChangeEventHook = - new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, _mockAuthentication, - createMockEntityRegistry(), true); + new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, createMockEntityRegistry(), true); } @Test @@ -498,8 +494,7 @@ public void testInvokeDataProcessInstanceRunEventStart() throws Exception { final EntityResponse entityResponse = buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships)); - Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn), - any(), eq(_mockAuthentication))).thenReturn(entityResponse); + Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse); _entityChangeEventHook.invoke(event); @@ -540,8 +535,7 @@ public void testInvokeDataProcessInstanceRunEventComplete() throws Exception { final EntityResponse entityResponse = buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships)); - Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn), - any(), eq(_mockAuthentication))).thenReturn(entityResponse); + Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse); _entityChangeEventHook.invoke(event); @@ -618,7 +612,7 @@ private EntityChangeEventGeneratorRegistry createEntityChangeEventGeneratorRegis // Run change event generators registry.register(ASSERTION_RUN_EVENT_ASPECT_NAME, new AssertionRunEventChangeEventGenerator()); registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, - new DataProcessInstanceRunEventChangeEventGenerator(_mockClient, _mockAuthentication)); + new DataProcessInstanceRunEventChangeEventGenerator(_mockClient)); return registry; } @@ -668,14 +662,14 @@ private EntityRegistry createMockEntityRegistry() { return registry; } - private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent) throws Exception { + private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent) throws Exception { verifyProducePlatformEvent(mockClient, platformEvent, true); } - private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception { + private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception { // Verify event has been emitted. verify(mockClient, Mockito.times(1)).producePlatformEvent(eq(CHANGE_EVENT_PLATFORM_EVENT_NAME), Mockito.anyString(), - argThat(new PlatformEventMatcher(platformEvent)), Mockito.any(Authentication.class)); + argThat(new PlatformEventMatcher(platformEvent))); if (noMoreInteractions) { Mockito.verifyNoMoreInteractions(_mockClient); diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java index 78d304d67bfc0..6a2a05aa4b8c0 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.siblings; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.linkedin.common.FabricType; @@ -19,7 +18,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -44,19 +43,16 @@ public class SiblingAssociationHookTest { private SiblingAssociationHook _siblingAssociationHook; - RestliEntityClient _mockEntityClient; + SystemRestliEntityClient _mockEntityClient; EntitySearchService _mockSearchService; - Authentication _mockAuthentication; @BeforeMethod public void setupTest() { EntityRegistry registry = new ConfigEntityRegistry( SiblingAssociationHookTest.class.getClassLoader().getResourceAsStream("test-entity-registry-siblings.yml")); - _mockEntityClient = Mockito.mock(RestliEntityClient.class); + _mockEntityClient = Mockito.mock(SystemRestliEntityClient.class); _mockSearchService = Mockito.mock(EntitySearchService.class); - _mockAuthentication = Mockito.mock(Authentication.class); - _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, _mockAuthentication, - true); + _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, true); _siblingAssociationHook.setEnabled(true); } @@ -69,15 +65,13 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { EntityResponse mockResponse = new EntityResponse(); mockResponse.setAspects(mockResponseMap); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); Mockito.when( _mockEntityClient.getV2( - DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _mockAuthentication + ImmutableSet.of(SUB_TYPES_ASPECT_NAME) )).thenReturn(mockResponse); @@ -105,10 +99,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -121,10 +112,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test @@ -132,23 +120,20 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { SubTypes mockSourceSubtypesAspect = new SubTypes(); mockSourceSubtypesAspect.setTypeNames(new StringArray(ImmutableList.of("model"))); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); EnvelopedAspectMap mockResponseMap = new EnvelopedAspectMap(); mockResponseMap.put(SUB_TYPES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(mockSourceSubtypesAspect.data()))); EntityResponse mockResponse = new EntityResponse(); mockResponse.setAspects(mockResponseMap); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); Mockito.when( _mockEntityClient.getV2( - DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _mockAuthentication - )).thenReturn(mockResponse); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME))).thenReturn(mockResponse); MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); @@ -174,15 +159,12 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.eq(proposal), eq(true)); } @Test public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception { - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); @@ -208,10 +190,7 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -224,15 +203,12 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); SearchResult returnSearchResult = new SearchResult(); SearchEntityArray returnEntityArray = new SearchEntityArray(); @@ -271,10 +247,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -287,10 +260,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception { @@ -309,10 +279,7 @@ public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception { _siblingAssociationHook.invoke(event); - Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.any(), eq(true)); } @@ -335,12 +302,7 @@ public void testInvokeWhenSourceUrnHasTwoUpstreamsOneDbt() throws Exception { _siblingAssociationHook.invoke(event); - Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal( - Mockito.any(), - Mockito.eq(_mockAuthentication) - ); - - + Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal(Mockito.any(), eq(true)); } private MetadataChangeLog createEvent(String entityType, String aspectName, ChangeType changeType) { diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java index ef80c49ec4520..dc5a6cd23295b 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.metadata.ingestion.IngestionScheduler; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; @@ -44,8 +44,8 @@ public class MCLSpringTestConfiguration { @MockBean public IngestionScheduler ingestionScheduler; - @MockBean - public RestliEntityClient entityClient; + @MockBean(name = "systemRestliEntityClient") + public SystemRestliEntityClient entityClient; @MockBean public ElasticSearchService searchService; diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java index 9b4fe15c11fc5..f0c59240a9ba4 100644 --- a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.kafka; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java index 74679d30b2945..c30dd6e6f96dc 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java @@ -4,8 +4,7 @@ import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; import com.linkedin.entity.Entity; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; @@ -40,15 +39,14 @@ @Slf4j @Component @Conditional(MetadataChangeProposalProcessorCondition.class) -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class, - DataHubKafkaProducerFactory.class}) +@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class}) @EnableKafka @RequiredArgsConstructor public class MetadataChangeEventsProcessor { @NonNull private final Authentication systemAuthentication; - private final RestliEntityClient entityClient; + private final SystemRestliEntityClient entityClient; private final Producer kafkaProducer; private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index 289d70ef8c0e9..79f8c90af8ec7 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -2,9 +2,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; @@ -35,15 +33,13 @@ @Slf4j @Component -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class, - DataHubKafkaProducerFactory.class}) +@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class}) @Conditional(MetadataChangeProposalProcessorCondition.class) @EnableKafka @RequiredArgsConstructor public class MetadataChangeProposalsProcessor { - private final Authentication systemAuthentication; - private final RestliEntityClient entityClient; + private final SystemRestliEntityClient entityClient; private final Producer kafkaProducer; private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); @@ -64,7 +60,7 @@ public void consume(final ConsumerRecord consumerRecord) event = EventUtils.avroToPegasusMCP(record); log.debug("MetadataChangeProposal {}", event); // TODO: Get this from the event itself. - entityClient.ingestProposal(event, this.systemAuthentication, false); + entityClient.ingestProposal(event, false); } catch (Throwable throwable) { log.error("MCP Processor Error", throwable); log.error("Message: {}", record); diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index db01be3ccebdf..53e7765152aef 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -34,6 +34,7 @@ dependencies { swaggerCodegen externalDependency.swaggerCli testImplementation externalDependency.guava + testImplementation externalDependency.testngJava8 } sourceSets { diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle index 1ffeb99e7ad4a..60d622dea5447 100644 --- a/metadata-service/auth-impl/build.gradle +++ b/metadata-service/auth-impl/build.gradle @@ -24,4 +24,5 @@ dependencies { annotationProcessor externalDependency.lombok testImplementation externalDependency.mockito + testImplementation externalDependency.testng } \ No newline at end of file diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java b/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java index 85f25895e0d49..ac27e1a16c8b7 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java @@ -102,7 +102,8 @@ public void emitAnalyticsEvent(@Nonnull final JsonNode event) { try { _mixpanelAPI.sendMessage(_mixpanelMessageBuilder.event(getClientId(), eventType, sanitizedEvent)); } catch (IOException e) { - log.error("Failed to send event to Mixpanel", e); + log.info("Failed to send event to Mixpanel; this does not affect the functionality of the application"); + log.debug("Failed to send event to Mixpanel", e); } } diff --git a/metadata-service/configuration/build.gradle b/metadata-service/configuration/build.gradle index 30fa3079d29a4..bf79469633b0f 100644 --- a/metadata-service/configuration/build.gradle +++ b/metadata-service/configuration/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation externalDependency.slf4jApi implementation externalDependency.springCore + implementation externalDependency.springBeans compileOnly externalDependency.lombok diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java index 38934cb9a3d2f..aff0e23e3b337 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.config.cache; +import com.linkedin.metadata.config.cache.client.ClientCacheConfiguration; import lombok.Data; @@ -8,4 +9,5 @@ public class CacheConfiguration { PrimaryCacheConfiguration primary; HomepageCacheConfiguration homepage; SearchCacheConfiguration search; + ClientCacheConfiguration client; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java new file mode 100644 index 0000000000000..3cf7ef20797bb --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java @@ -0,0 +1,10 @@ +package com.linkedin.metadata.config.cache.client; + + +public interface ClientCacheConfig { + boolean isEnabled(); + boolean isStatsEnabled(); + int getStatsIntervalSeconds(); + int getDefaultTTLSeconds(); + int getMaxBytes(); +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java new file mode 100644 index 0000000000000..d940bbe135e55 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +@Data +public class ClientCacheConfiguration { + EntityClientCacheConfig entityClient; + UsageClientCacheConfig usageClient; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java new file mode 100644 index 0000000000000..595b614f2f599 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java @@ -0,0 +1,17 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +import java.util.Map; + +@Data +public class EntityClientCacheConfig implements ClientCacheConfig { + private boolean enabled; + private boolean statsEnabled; + private int statsIntervalSeconds; + private int defaultTTLSeconds; + private int maxBytes; + + // entityName -> aspectName -> cache ttl override + private Map> entityAspectTTLSeconds; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java new file mode 100644 index 0000000000000..3aebec9422ed8 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +@Data +public class UsageClientCacheConfig implements ClientCacheConfig { + private boolean enabled; + private boolean statsEnabled; + private int statsIntervalSeconds; + private int defaultTTLSeconds; + private int maxBytes; +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java similarity index 87% rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java index 1542407697d1b..c10399c4f3e70 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java @@ -1,14 +1,18 @@ -package com.linkedin.gms.factory.spring; +package com.linkedin.metadata.spring; -import java.io.IOException; -import java.util.Properties; import org.springframework.beans.factory.config.YamlPropertiesFactoryBean; import org.springframework.core.env.PropertiesPropertySource; import org.springframework.core.env.PropertySource; import org.springframework.core.io.support.EncodedResource; import org.springframework.core.io.support.PropertySourceFactory; +import java.io.IOException; +import java.util.Properties; + +/** + * Required for Spring to parse the application.yml provided by this module + */ public class YamlPropertySourceFactory implements PropertySourceFactory { @Override diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index ea959bebf25ad..42749d8205d21 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -327,3 +327,27 @@ cache: lineage: ttlSeconds: ${CACHE_SEARCH_LINEAGE_TTL_SECONDS:86400} # 1 day lightningThreshold: ${CACHE_SEARCH_LINEAGE_LIGHTNING_THRESHOLD:300} + client: + usageClient: + enabled: ${CACHE_CLIENT_USAGE_CLIENT_ENABLED:true} + statsEnabled: ${CACHE_CLIENT_USAGE_CLIENT_STATS_ENABLED:true} + statsIntervalSeconds: ${CACHE_CLIENT_USAGE_CLIENT_STATS_INTERVAL_SECONDS:120} + defaultTTLSeconds: ${CACHE_CLIENT_USAGE_CLIENT_TTL_SECONDS:86400} # 1 day + maxBytes: ${CACHE_CLIENT_USAGE_CLIENT_MAX_BYTES:52428800} # 50MB + entityClient: + enabled: ${CACHE_CLIENT_ENTITY_CLIENT_ENABLED:true} + statsEnabled: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_ENABLED:true} + statsIntervalSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_INTERVAL_SECONDS:120} + defaultTTLSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_TTL_SECONDS:0} # do not cache entity/aspects by default + maxBytes: ${CACHE_CLIENT_USAGE_ENTITY_MAX_BYTES:104857600} # 100MB + entityAspectTTLSeconds: + # cache user aspects for 20s + corpuser: + corpUserKey: 20 + corpUserInfo: 20 + corpUserEditableInfo: 20 + corpUserStatus: 20 + globalTags: 20 + status: 20 + corpUserCredentials: 20 + corpUserSettings: 20 diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java index ed072398178de..bf50a0c7b6473 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java @@ -19,7 +19,7 @@ import com.datahub.plugins.loader.PluginPermissionManagerImpl; import com.google.common.collect.ImmutableMap; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import java.nio.file.Path; import java.nio.file.Paths; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java index 30e03d87a8b56..5b298a453547a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java @@ -4,7 +4,7 @@ import com.datahub.authorization.DataHubAuthorizer; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java index fc010a1aa2cae..6b2a61882be90 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java @@ -1,9 +1,10 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.token.StatefulTokenService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import javax.annotation.Nonnull; + +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java index 9d29b8e77d02d..57598abf8095d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java @@ -4,7 +4,7 @@ import com.datahub.authentication.group.GroupService; import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java index 47f7ef0e0c1eb..105f4c677a9e4 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.invite.InviteTokenService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.secret.SecretService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java index ca52420b440b2..3df499ea9392e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java @@ -4,7 +4,7 @@ import com.datahub.authentication.user.NativeUserService; import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.secret.SecretService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java index 8e5e5e5cfc667..cc6f5c8272f9d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.post.PostService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java index 42f3e797c33bd..8a85f63cdd66d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java @@ -3,7 +3,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authorization.role.RoleService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java index d6c171dc741e4..5bdd8cbf83c65 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java @@ -3,7 +3,7 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import lombok.Data; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java index c1c5acbc1fddc..51c7db5e37366 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java @@ -3,7 +3,7 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java index 89f196b056ee0..504618ba9cc6a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.common; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.systemmetadata.ESSystemMetadataDAO; import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java index d57da336429d9..0dce80b98964b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.apache.http.ssl.SSLContextBuilder; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java index 02e31c7dc4f57..94593eb1fb84c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java index a2816830f33ce..ada8466d302e6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java index 5ab5b14160e27..6bf8ff123b221 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.ebean.config.ServerConfig; import io.ebean.datasource.DataSourceConfig; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java index a364504d443f7..65b6115d6638e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.concurrent.TimeUnit; import org.neo4j.driver.AuthTokens; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java index 1da66f3192f80..3c40b30bfc7d1 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.common; import com.linkedin.gms.factory.auth.AwsRequestSigningApacheInterceptor; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.io.IOException; import javax.annotation.Nonnull; import javax.net.ssl.HostnameVerifier; @@ -24,9 +24,9 @@ import org.apache.http.nio.reactor.IOReactorException; import org.apache.http.nio.reactor.IOReactorExceptionHandler; import org.apache.http.ssl.SSLContexts; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestClientBuilder; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index e07630111a567..465480be344c7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -12,7 +12,7 @@ import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java index c0f2c8e1f1223..6eab711603c52 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.dataproduct; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.service.DataProductService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java index c9c3953f4d998..e1c24b805437b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java @@ -1,8 +1,11 @@ package com.linkedin.gms.factory.entity; +import com.datahub.authentication.Authentication; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.entity.client.RestliEntityClient; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; +import com.linkedin.metadata.client.SystemJavaEntityClient; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.event.EventProducer; @@ -53,12 +56,8 @@ public class JavaEntityClientFactory { @Qualifier("kafkaEventProducer") private EventProducer _eventProducer; - @Autowired - @Qualifier("restliEntityClient") - private RestliEntityClient _restliEntityClient; - @Bean("javaEntityClient") - public JavaEntityClient getJavaEntityClient() { + public JavaEntityClient getJavaEntityClient(@Qualifier("restliEntityClient") final RestliEntityClient restliEntityClient) { return new JavaEntityClient( _entityService, _deleteEntityService, @@ -68,6 +67,24 @@ public JavaEntityClient getJavaEntityClient() { _lineageSearchService, _timeseriesAspectService, _eventProducer, - _restliEntityClient); + restliEntityClient); + } + + @Bean("systemJavaEntityClient") + public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider, + @Qualifier("systemAuthentication") final Authentication systemAuthentication, + @Qualifier("systemRestliEntityClient") final RestliEntityClient restliEntityClient) { + return new SystemJavaEntityClient( + _entityService, + _deleteEntityService, + _entitySearchService, + _cachingEntitySearchService, + _searchService, + _lineageSearchService, + _timeseriesAspectService, + _eventProducer, + restliEntityClient, + systemAuthentication, + configurationProvider.getCache().getClient().getEntityClient()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java index e149ecedfa6f6..dfc5e835392df 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java @@ -1,10 +1,14 @@ package com.linkedin.gms.factory.entity; +import com.datahub.authentication.Authentication; import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.restli.client.Client; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -48,4 +52,17 @@ public RestliEntityClient getRestliEntityClient() { } return new RestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries); } + + @Bean("systemRestliEntityClient") + public SystemRestliEntityClient systemRestliEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider, + @Qualifier("systemAuthentication") final Authentication systemAuthentication) { + final Client restClient; + if (gmsUri != null) { + restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol); + } else { + restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); + } + return new SystemRestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries, + systemAuthentication, configurationProvider.getCache().getClient().getEntityClient()); + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java index b13bf5813d47e..ff56f19e4f8fd 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.entity; import com.datastax.oss.driver.api.core.CqlSession; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java index 471f079683d60..cda21f8907867 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.entityregistry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import java.io.IOException; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java index 150e1e48f39af..6dbb07309c7cc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.entityregistry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import java.io.FileNotFoundException; import java.net.MalformedURLException; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 0be69e5dad58d..c50b4c9088bc2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -20,6 +20,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.recommendation.RecommendationServiceFactory; +import com.linkedin.metadata.client.SystemJavaEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; @@ -39,7 +40,7 @@ import com.linkedin.metadata.version.GitVersion; import com.linkedin.usage.UsageClient; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; @@ -65,6 +66,10 @@ public class GraphQLEngineFactory { @Qualifier("javaEntityClient") private JavaEntityClient _entityClient; + @Autowired + @Qualifier("systemJavaEntityClient") + private SystemJavaEntityClient _systemEntityClient; + @Autowired @Qualifier("graphClient") private GraphClient _graphClient; @@ -170,6 +175,7 @@ public class GraphQLEngineFactory { protected GraphQLEngine getInstance() { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); args.setEntityClient(_entityClient); + args.setSystemEntityClient(_systemEntityClient); args.setGraphClient(_graphClient); args.setUsageClient(_usageClient); if (isAnalyticsEnabled) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java index b310ee25cbcbb..9beb617c4f6e8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java @@ -6,7 +6,7 @@ import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java index 66f556066497f..675f015d9e378 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.kafka; import com.linkedin.gms.factory.common.TopicConventionFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.dao.producer.KafkaEventProducer; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.mxe.TopicConvention; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java index e58661b357e6a..c67a2e704681f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java @@ -6,7 +6,7 @@ import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.KafkaSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.Arrays; import java.util.Map; import org.apache.avro.generic.IndexedRecord; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java index 59f08e3733704..ac1cbbc5cc5ff 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java @@ -5,7 +5,7 @@ import com.amazonaws.services.schemaregistry.utils.AWSSchemaRegistryConstants; import com.amazonaws.services.schemaregistry.utils.AvroRecordType; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.HashMap; import java.util.Map; import java.util.Optional; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java index d0e11baab9089..7b72ba3f3bb88 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.kafka.schemaregistry; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClientConfig; import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; import io.confluent.kafka.serializers.KafkaAvroDeserializer; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java index f76549c90af68..8596a14b7fc24 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.lineage; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java index 512a0a1fa40ab..3a1f18692fdc6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.ownership; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.OwnershipTypeService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java index f2bdce908319e..f98c5bd50467d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.query; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.QueryService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java index c74f5e11cadce..c266b3635b16f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.recommendation.candidatesource.MostPopularSource; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java index 58584a4d957de..109cc8dbc82d1 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.recommendation.candidatesource.RecentlyEditedSource; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java index b3779a132284f..5209f65a2ec63 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java @@ -5,7 +5,7 @@ import com.linkedin.metadata.recommendation.candidatesource.RecentlySearchedSource; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java index d0505e8d2a3ea..aea40b4d8eb46 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.recommendation.candidatesource.RecentlyViewedSource; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import javax.annotation.Nonnull; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java index eeb32ae1ddbf9..c99d429e986b6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java @@ -2,13 +2,13 @@ import com.linkedin.gms.factory.common.IndexConventionFactory; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Value; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java index 7b20e798b79f2..845c63c32e0fd 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.search; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.client.CachingEntitySearchService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java index 956157f70e6bc..5deffdb01d247 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java @@ -1,13 +1,13 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java index decbc2e12a998..b619ee9516dce 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java @@ -6,14 +6,14 @@ import com.linkedin.gms.factory.common.IndexConventionFactory; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.version.GitVersion; import javax.annotation.Nonnull; import javax.annotation.Nullable; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 03dd2d072b4a0..a2a0dbaf89c79 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -7,7 +7,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java index 94b3f40849a13..e2eef83bc6e3f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.common.GraphServiceFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java index e1fe0399cb115..a186d2de770f3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.search; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java index 70307e51f3256..64bb0218a0d71 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.SearchService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java index b6bfef6ed8c78..840a370957706 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java index 006b992191cfa..60bcd9ea22be6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search.views; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.ViewService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java index 73ec79fa7ed08..2e22d43913493 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.settings; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.SettingsService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java index 2972316856a8d..2610ebd3528cd 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java @@ -12,7 +12,7 @@ import java.io.IOException; import java.util.Optional; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.joda.time.DateTime; import org.json.JSONObject; import org.springframework.scheduling.annotation.Scheduled; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java index b2982d1f8ed9d..8178ce1399aa3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.telemetry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.mixpanel.mixpanelapi.MixpanelAPI; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java index aa8596786ce11..5385c5e81f804 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.telemetry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.mixpanel.mixpanelapi.MessageBuilder; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java index c5501067ff393..7cdca996a8131 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java @@ -6,7 +6,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.version.GitVersion; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.RestHighLevelClient; +import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java index 3b53a6fe92810..bb166af5501b3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.telemetry; import com.datahub.telemetry.TrackingService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.secret.SecretService; import com.linkedin.metadata.version.GitVersion; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java index e9b9850c01a2b..89a7e7dd8d71a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.timeline; import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.timeline.eventgenerator.AssertionRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DataProcessInstanceRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DatasetPropertiesChangeEventGenerator; @@ -38,7 +38,7 @@ public class EntityChangeEventGeneratorRegistryFactory { @Singleton @Nonnull protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry() { - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); final com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry registry = @@ -74,7 +74,7 @@ protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGenerat // Data Process Instance differs registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, - new DataProcessInstanceRunEventChangeEventGenerator(entityClient, systemAuthentication)); + new DataProcessInstanceRunEventChangeEventGenerator(entityClient)); // TODO: Add ML models. diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java index df9d80eb63a02..baa22d401387f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.timeline; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.timeline.TimelineService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java index 717adf7d559b7..e3cc772f21c40 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java index e4cbb92cebbba..e83cbc82d8067 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java @@ -1,10 +1,14 @@ package com.linkedin.gms.factory.usage; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.datahub.authentication.Authentication; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.restli.client.Client; import com.linkedin.usage.UsageClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -33,10 +37,15 @@ public class UsageClientFactory { @Value("${usageClient.numRetries:3}") private int numRetries; + @Autowired + @Qualifier("configurationProvider") + private ConfigurationProvider configurationProvider; + @Bean("usageClient") - public UsageClient getUsageClient() { + public UsageClient getUsageClient(@Qualifier("systemAuthentication") final Authentication systemAuthentication) { Client restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); - return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries); + return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries, systemAuthentication, + configurationProvider.getCache().getClient().getUsageClient()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java index 91fc58d074ed6..e038cb230c458 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.boot.factories; import com.linkedin.gms.factory.entity.RetentionServiceFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; diff --git a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java index 859c8e18cacff..266039afb45d5 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; -import org.elasticsearch.action.support.WriteRequest; +import org.opensearch.action.support.WriteRequest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.boot.test.context.SpringBootTest; diff --git a/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java b/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java index 28c47f169a111..fe0d61986b4a6 100644 --- a/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java +++ b/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java @@ -7,8 +7,8 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import static org.junit.Assert.*; import static org.mockito.ArgumentMatchers.*; +import static org.testng.AssertJUnit.assertEquals; public class TelemetryUtilsTest { diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java b/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java index 02ca5182cd2be..c200e63e0d497 100644 --- a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java +++ b/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java @@ -11,11 +11,11 @@ import java.util.concurrent.TimeUnit; import java.util.function.Supplier; -import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest; -import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.cluster.health.ClusterHealthStatus; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.HttpStatus; diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java index 98f0db8fd10ef..83b1b3f87c724 100644 --- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java @@ -7,7 +7,7 @@ import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.AuthorizerChain; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; -import org.elasticsearch.action.search.SearchResponse; +import org.opensearch.action.search.SearchResponse; import org.mockito.Mockito; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java index 8f87b041a7e03..b4e87eedea542 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java @@ -1,7 +1,7 @@ package io.datahubproject.openapi.util; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; import io.datahubproject.openapi.dto.UpsertAspectRequest; @@ -17,8 +17,8 @@ import java.util.List; -import static org.junit.Assert.assertNotNull; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; @Import({OpenAPIEntityTestConfiguration.class}) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index 7910982a63133..f29461734ebfc 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -13,7 +13,7 @@ import io.swagger.v3.oas.annotations.tags.Tag; import java.util.List; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.tasks.GetTaskResponse; +import org.opensearch.client.tasks.GetTaskResponse; import org.json.JSONObject; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle index 45cf008d3ca7d..b1b778b45c0b5 100644 --- a/metadata-service/restli-client/build.gradle +++ b/metadata-service/restli-client/build.gradle @@ -7,6 +7,7 @@ dependencies { api project(path: ':metadata-service:restli-api', configuration: 'restClient') api project(':metadata-events:mxe-schemas') api project(':metadata-utils') + implementation project(':metadata-service:configuration') implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java new file mode 100644 index 0000000000000..8aa0984be57b9 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java @@ -0,0 +1,134 @@ +package com.linkedin.common.client; + +import com.codahale.metrics.Gauge; +import com.github.benmanes.caffeine.cache.CacheLoader; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Expiry; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.github.benmanes.caffeine.cache.stats.CacheStats; +import com.linkedin.metadata.config.cache.client.ClientCacheConfig; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import lombok.Builder; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import java.util.function.Function; + +/** + * Generic cache with common configuration for limited weight, per item expiry, and batch loading + * @param key + * @param value + */ +@Slf4j +@Builder +public class ClientCache { + @NonNull + protected final C config; + @NonNull + protected final LoadingCache cache; + @NonNull + private final Function, Map> loadFunction; + @NonNull + private final Weigher weigher; + @NonNull + private final BiFunction ttlSecondsFunction; + + public @Nullable V get(@NonNull K key) { + return cache.get(key); + } + + public @NonNull Map<@NonNull K, @NonNull V> getAll(@NonNull Iterable keys) { + return cache.getAll(keys); + } + + public void refresh(@NonNull K key) { + cache.refresh(key); + } + + public static class ClientCacheBuilder { + + private ClientCacheBuilder cache(LoadingCache cache) { + return null; + } + private ClientCache build() { + return null; + } + + public ClientCache build(Class metricClazz) { + // loads data from entity client + CacheLoader loader = new CacheLoader<>() { + @Override + public V load(@NonNull K key) { + return loadAll(List.of(key)).get(key); + } + + @Override + @NonNull + public Map loadAll(@NonNull Iterable keys) { + return loadFunction.apply(keys); + } + }; + + // build cache + Caffeine caffeine = Caffeine.newBuilder() + .maximumWeight(config.getMaxBytes()) + // limit total size + .weigher(weigher) + .softValues() + // define per entity/aspect ttls + .expireAfter(new Expiry() { + public long expireAfterCreate(@NonNull K key, @NonNull V aspect, long currentTime) { + int ttlSeconds = ttlSecondsFunction.apply(config, key); + if (ttlSeconds < 0) { + ttlSeconds = Integer.MAX_VALUE; + } + return TimeUnit.SECONDS.toNanos(ttlSeconds); + } + public long expireAfterUpdate(@NonNull K key, @NonNull V aspect, + long currentTime, long currentDuration) { + return currentDuration; + } + public long expireAfterRead(@NonNull K key, @NonNull V aspect, + long currentTime, long currentDuration) { + return currentDuration; + } + }); + + if (config.isStatsEnabled()) { + caffeine.recordStats(); + } + + LoadingCache cache = caffeine.build(loader); + + if (config.isStatsEnabled()) { + ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1); + executor.scheduleAtFixedRate(() -> { + CacheStats cacheStats = cache.stats(); + + MetricUtils.gauge(metricClazz, "hitRate", () -> (Gauge) cacheStats::hitRate); + MetricUtils.gauge(metricClazz, "loadFailureRate", () -> + (Gauge) cacheStats::loadFailureRate); + MetricUtils.gauge(metricClazz, "evictionCount", () -> + (Gauge) cacheStats::evictionCount); + MetricUtils.gauge(metricClazz, "loadFailureCount", () -> + (Gauge) cacheStats::loadFailureCount); + MetricUtils.gauge(metricClazz, "averageLoadPenalty", () -> + (Gauge) cacheStats::averageLoadPenalty); + MetricUtils.gauge(metricClazz, "evictionWeight", () -> + (Gauge) cacheStats::evictionWeight); + + log.debug(metricClazz.getSimpleName() + ": " + cacheStats); + }, 0, config.getStatsIntervalSeconds(), TimeUnit.SECONDS); + } + + return new ClientCache<>(config, cache, loadFunction, weigher, ttlSecondsFunction); + } + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java new file mode 100644 index 0000000000000..3b35dc528915a --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java @@ -0,0 +1,141 @@ +package com.linkedin.entity.client; + +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.linkedin.common.client.ClientCache; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.util.Pair; +import lombok.Builder; +import lombok.Data; +import lombok.NonNull; + +import javax.annotation.Nonnull; +import java.util.Collection; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + +@Builder +public class EntityClientCache { + @NonNull + private EntityClientCacheConfig config; + @NonNull + private final ClientCache cache; + @NonNull + private BiFunction, Set, Map> loadFunction; + + public EntityResponse getV2(@Nonnull final Urn urn, @Nonnull final Set aspectNames) { + return batchGetV2(Set.of(urn), aspectNames).get(urn); + } + + public Map batchGetV2(@Nonnull final Set urns, @Nonnull final Set aspectNames) { + final Map response; + + if (config.isEnabled()) { + Set keys = urns.stream() + .flatMap(urn -> aspectNames.stream() + .map(a -> Key.builder().urn(urn).aspectName(a).build())) + .collect(Collectors.toSet()); + Map envelopedAspects = cache.getAll(keys); + + Set responses = envelopedAspects.entrySet().stream() + .map(entry -> Pair.of(entry.getKey().getUrn(), entry.getValue())) + .collect(Collectors.groupingBy(Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet()))) + .entrySet().stream().map(e -> toEntityResponse(e.getKey(), e.getValue())) + .collect(Collectors.toSet()); + + response = responses.stream().collect(Collectors.toMap(EntityResponse::getUrn, Function.identity())); + } else { + response = loadFunction.apply(urns, aspectNames); + } + + return response; + } + + private static EntityResponse toEntityResponse(Urn urn, Collection envelopedAspects) { + final EntityResponse response = new EntityResponse(); + response.setUrn(urn); + response.setEntityName(urnToEntityName(urn)); + response.setAspects(new EnvelopedAspectMap( + envelopedAspects.stream() + .collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect)) + )); + return response; + } + + public static class EntityClientCacheBuilder { + + private EntityClientCacheBuilder cache(LoadingCache cache) { + return this; + } + + public EntityClientCache build(Class metricClazz) { + // estimate size + Weigher weighByEstimatedSize = (key, value) -> + value.getValue().data().values().parallelStream() + .mapToInt(o -> o.toString().getBytes().length) + .sum(); + + // batch loads data from entity client (restli or java) + Function, Map> loader = (Iterable keys) -> { + Map> keysByEntity = StreamSupport.stream(keys.spliterator(), true) + .collect(Collectors.groupingBy(Key::getEntityName, Collectors.toSet())); + + Stream> results = keysByEntity.entrySet().parallelStream() + .flatMap(entry -> { + Set urns = entry.getValue().stream() + .map(Key::getUrn) + .collect(Collectors.toSet()); + Set aspects = entry.getValue().stream() + .map(Key::getEntityName) + .collect(Collectors.toSet()); + return loadFunction.apply(urns, aspects).entrySet().stream(); + }) + .flatMap(resp -> resp.getValue().getAspects().values().stream() + .map(envAspect -> { + Key key = Key.builder().urn(resp.getKey()).aspectName(envAspect.getName()).build(); + return Map.entry(key, envAspect); + })); + + return results.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + }; + + // ideally the cache time comes from caching headers from service, but configuration driven for now + BiFunction ttlSeconds = (config, key) -> + Optional.ofNullable(config.getEntityAspectTTLSeconds()).orElse(Map.of()) + .getOrDefault(key.getEntityName(), Map.of()) + .getOrDefault(key.getAspectName(), config.getDefaultTTLSeconds()); + + cache = ClientCache.builder() + .weigher(weighByEstimatedSize) + .config(config) + .loadFunction(loader) + .ttlSecondsFunction(ttlSeconds) + .build(metricClazz); + + return new EntityClientCache(config, cache, loadFunction); + } + } + + @Data + @Builder + protected static class Key { + private final Urn urn; + private final String aspectName; + + public String getEntityName() { + return urn.getEntityType(); + } + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java new file mode 100644 index 0000000000000..94067abd0cf65 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java @@ -0,0 +1,91 @@ +package com.linkedin.entity.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.PlatformEvent; +import com.linkedin.r2.RemoteInvocationException; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.net.URISyntaxException; +import java.util.Map; +import java.util.Set; + +/** + * Adds entity/aspect cache and assumes system authentication + */ +public interface SystemEntityClient extends EntityClient { + + EntityClientCache getEntityClientCache(); + Authentication getSystemAuthentication(); + + /** + * Builds the cache + * @param systemAuthentication system authentication + * @param cacheConfig cache configuration + * @return the cache + */ + default EntityClientCache buildEntityClientCache(Class metricClazz, Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) { + return EntityClientCache.builder() + .config(cacheConfig) + .loadFunction((Set urns, Set aspectNames) -> { + try { + String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); + + if (urns.stream().anyMatch(urn -> !urn.getEntityType().equals(entityName))) { + throw new IllegalArgumentException("Urns must be of the same entity type. RestliEntityClient API limitation."); + } + + return batchGetV2(entityName, urns, aspectNames, systemAuthentication); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + }).build(metricClazz); + } + + /** + * Get an entity by urn with the given aspects + * @param urn the id of the entity + * @param aspectNames aspects of the entity + * @return response object + * @throws RemoteInvocationException + * @throws URISyntaxException + */ + @Nullable + default EntityResponse getV2(@Nonnull Urn urn, @Nonnull Set aspectNames) + throws RemoteInvocationException, URISyntaxException { + return getEntityClientCache().getV2(urn, aspectNames); + } + + /** + * Batch get a set of aspects for a single entity type, multiple ids with the given aspects. + * + * @param urns the urns of the entities to batch get + * @param aspectNames the aspect names to batch get + * @throws RemoteInvocationException + */ + @Nonnull + default Map batchGetV2(@Nonnull Set urns, @Nonnull Set aspectNames) + throws RemoteInvocationException, URISyntaxException { + return getEntityClientCache().batchGetV2(urns, aspectNames); + } + + default void producePlatformEvent(@Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event) throws Exception { + producePlatformEvent(name, key, event, getSystemAuthentication()); + } + + default boolean exists(@Nonnull Urn urn) throws RemoteInvocationException { + return exists(urn, getSystemAuthentication()); + } + + default String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, final boolean async) throws RemoteInvocationException { + return ingestProposal(metadataChangeProposal, getSystemAuthentication(), async); + } + + default void setWritable(boolean canWrite) throws RemoteInvocationException { + setWritable(canWrite, getSystemAuthentication()); + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java new file mode 100644 index 0000000000000..f3c343534209c --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java @@ -0,0 +1,25 @@ +package com.linkedin.entity.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.parseq.retry.backoff.BackoffPolicy; +import com.linkedin.restli.client.Client; +import lombok.Getter; + +import javax.annotation.Nonnull; + +/** + * Restli backed SystemEntityClient + */ +@Getter +public class SystemRestliEntityClient extends RestliEntityClient implements SystemEntityClient { + private final EntityClientCache entityClientCache; + private final Authentication systemAuthentication; + + public SystemRestliEntityClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, + Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) { + super(restliClient, backoffPolicy, retryCount); + this.systemAuthentication = systemAuthentication; + this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, systemAuthentication, cacheConfig); + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java index 47a15ccdd3ffc..d2b8499615e8d 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java @@ -5,6 +5,7 @@ import com.linkedin.common.WindowDuration; import com.linkedin.common.client.BaseClient; +import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig; import com.linkedin.parseq.retry.backoff.BackoffPolicy; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.restli.client.Client; @@ -17,19 +18,39 @@ public class UsageClient extends BaseClient { private static final UsageStatsRequestBuilders USAGE_STATS_REQUEST_BUILDERS = new UsageStatsRequestBuilders(); - public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount) { + private final UsageClientCache usageClientCache; + + public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, + Authentication systemAuthentication, UsageClientCacheConfig cacheConfig) { super(restliClient, backoffPolicy, retryCount); + this.usageClientCache = UsageClientCache.builder() + .config(cacheConfig) + .loadFunction((String resource, UsageTimeRange range) -> { + try { + return getUsageStats(resource, range, systemAuthentication); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + }).build(); + } + + /** + * Gets a specific version of downstream {@link EntityRelationships} for the given dataset. + * Using cache and system authentication. + * Validate permissions before use! + */ + @Nonnull + public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) { + return usageClientCache.getUsageStats(resource, range); } /** * Gets a specific version of downstream {@link EntityRelationships} for the given dataset. */ @Nonnull - public UsageQueryResult getUsageStats( - @Nonnull String resource, - @Nonnull UsageTimeRange range, - @Nonnull Authentication authentication - ) throws RemoteInvocationException, URISyntaxException { + private UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range, + @Nonnull Authentication authentication) + throws RemoteInvocationException, URISyntaxException { final UsageStatsDoQueryRangeRequestBuilder requestBuilder = USAGE_STATS_REQUEST_BUILDERS.actionQueryRange() .resourceParam(resource) .durationParam(WindowDuration.DAY) diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java new file mode 100644 index 0000000000000..a04c1e90fb4a3 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java @@ -0,0 +1,75 @@ +package com.linkedin.usage; + +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.linkedin.common.client.ClientCache; +import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig; +import lombok.Builder; +import lombok.Data; +import lombok.NonNull; + +import javax.annotation.Nonnull; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + + +@Builder +public class UsageClientCache { + @NonNull + private UsageClientCacheConfig config; + @NonNull + private final ClientCache cache; + @NonNull + private BiFunction loadFunction; + + public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) { + if (config.isEnabled()) { + return cache.get(Key.builder().resource(resource).range(range).build()); + } else { + return loadFunction.apply(resource, range); + } + } + + public static class UsageClientCacheBuilder { + + private UsageClientCacheBuilder cache(LoadingCache cache) { + return this; + } + + public UsageClientCache build() { + // estimate size + Weigher weighByEstimatedSize = (key, value) -> + value.data().values().parallelStream() + .mapToInt(o -> o.toString().getBytes().length) + .sum(); + + // batch loads data from usage client + Function, Map> loader = (Iterable keys) -> + StreamSupport.stream(keys.spliterator(), true) + .map(k -> Map.entry(k, loadFunction.apply(k.getResource(), k.getRange()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // default ttl only + BiFunction ttlSeconds = (config, key) -> config.getDefaultTTLSeconds(); + + cache = ClientCache.builder() + .weigher(weighByEstimatedSize) + .config(config) + .loadFunction(loader) + .ttlSecondsFunction(ttlSeconds) + .build(UsageClientCache.class); + + return new UsageClientCache(config, cache, loadFunction); + } + } + + @Data + @Builder + protected static class Key { + private final String resource; + private final UsageTimeRange range; + } +} diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 17de9ceea35a3..1e6523e774d66 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -35,7 +35,7 @@ import javax.inject.Inject; import javax.inject.Named; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.client.tasks.GetTaskResponse; +import org.opensearch.client.tasks.GetTaskResponse; import org.json.JSONObject; import static com.linkedin.metadata.Constants.*; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index ddfdec0315f6b..be70cf9c494ef 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -380,8 +380,10 @@ public Task query(@ActionParam(PARAM_RESOURCE) @Nonnull String public Task queryRange(@ActionParam(PARAM_RESOURCE) @Nonnull String resource, @ActionParam(PARAM_DURATION) @Nonnull WindowDuration duration, @ActionParam(PARAM_RANGE) UsageTimeRange range) { Authentication auth = AuthenticationContext.getAuthentication(); + Urn resourceUrn = UrnUtils.getUrn(resource); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE), + new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to query usage."); } diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java index 665bc3cfc277c..470c6e87040ec 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java @@ -3,12 +3,13 @@ import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.util.Pair; import java.util.List; -import junit.framework.TestCase; import mock.MockTimeseriesAspectService; import org.testng.annotations.Test; +import static org.testng.AssertJUnit.*; -public class OperationsResourceTest extends TestCase { + +public class OperationsResourceTest { private static final String TASK_ID = "taskId123"; diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle index 99345d6f6bc3f..22c62af324c12 100644 --- a/metadata-service/services/build.gradle +++ b/metadata-service/services/build.gradle @@ -63,8 +63,6 @@ dependencies { } test { - // https://docs.gradle.org/current/userguide/performance.html - maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 testLogging.showStandardStreams = true testLogging.exceptionFormat = 'full' } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java index ac17c882c24b6..357a5df2edd44 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java @@ -20,18 +20,18 @@ import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.client.indices.GetIndexRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; -import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.builder.SearchSourceBuilder; @Slf4j diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java index e6f2106bd5c3e..ea59885e8b6d5 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java @@ -8,7 +8,7 @@ import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import org.elasticsearch.client.tasks.GetTaskResponse; +import org.opensearch.client.tasks.GetTaskResponse; public interface SystemMetadataService { diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index 2c26c00e9c4d6..d788222c5d87b 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -9,16 +9,16 @@ import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchAllQueryBuilder; -import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.SimpleQueryStringBuilder; -import org.elasticsearch.index.query.TermQueryBuilder; -import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder; -import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; -import org.elasticsearch.index.query.functionscore.WeightBuilder; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.index.query.functionscore.WeightBuilder; import org.springframework.web.context.WebApplicationContext; import org.springframework.web.context.support.WebApplicationContextUtils; diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java b/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java index e9d1308e857d8..79d4f7077b797 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java @@ -2,8 +2,8 @@ import lombok.Builder; -import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder; -import org.elasticsearch.index.query.functionscore.WeightBuilder; +import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder; +import org.opensearch.index.query.functionscore.WeightBuilder; import java.io.PrintWriter; import java.util.stream.Collectors; diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 9f8ef70a0e728..1c1c368611488 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -26,6 +26,7 @@ dependencies { testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') + testImplementation externalDependency.testng constraints { implementation(externalDependency.log4jCore) { diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java index 8b4b500dfc455..69bd3b461eb12 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java @@ -18,8 +18,8 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; import javax.annotation.Nonnull; import javax.annotation.Nullable; diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java index 3d90cba85b0fb..9a8848e090fb8 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.utils.metrics; import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.SharedMetricRegistries; import com.codahale.metrics.Timer; @@ -48,4 +49,8 @@ public static Timer timer(Class klass, String metricName) { public static Timer timer(String metricName) { return REGISTRY.timer(MetricRegistry.name(metricName)); } + + public static > T gauge(Class clazz, String metricName, MetricRegistry.MetricSupplier supplier) { + return REGISTRY.gauge(MetricRegistry.name(clazz, metricName), supplier); + } } diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh index 41dca90acc9fc..93f03d36cbd19 100755 --- a/smoke-test/cypress-dev.sh +++ b/smoke-test/cypress-dev.sh @@ -17,4 +17,5 @@ npm install source ../../set-cypress-creds.sh -npx cypress open +npx cypress open \ + --env "ADMIN_DISPLAYNAME=$CYPRESS_ADMIN_DISPLAYNAME,ADMIN_USERNAME=$CYPRESS_ADMIN_USERNAME,ADMIN_PASSWORD=$CYPRESS_ADMIN_PASSWORD" diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index 050b5d2db95c9..cd747321ad602 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -12,7 +12,16 @@ pip install -r requirements.txt mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props +DATAHUB_SEARCH_IMAGE="${DATAHUB_SEARCH_IMAGE:=opensearchproject/opensearch}" +DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}" +XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}" +ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}" +USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}" + echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_TELEMETRY_ENABLED=false \ DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \ +DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \ +XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \ +USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \ datahub docker quickstart --version ${DATAHUB_VERSION} --standalone_consumers --dump-logs-on-failure --kafka-setup diff --git a/smoke-test/set-cypress-creds.sh b/smoke-test/set-cypress-creds.sh index 0512724e9a269..82fe736b0a7e1 100644 --- a/smoke-test/set-cypress-creds.sh +++ b/smoke-test/set-cypress-creds.sh @@ -1,4 +1,5 @@ #!/bin/bash export CYPRESS_ADMIN_USERNAME=${ADMIN_USERNAME:-datahub} -export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub} \ No newline at end of file +export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub} +export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js index de9fa7ecda1f0..e0d2bf240d74d 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js @@ -20,8 +20,7 @@ describe("glossary sidebar navigation test", () => { cy.waitTextVisible("No documentation yet"); cy.openThreeDotDropdown(); cy.clickOptionWithText("Move"); - cy.get('[role="dialog"] [data-icon="close-circle"]').click({force: true}); - cy.get('[role="dialog"]').contains(glossaryTermGroup).click(); + cy.get('[role="dialog"]').contains(glossaryTermGroup).click({force: true}); cy.get('[role="dialog"]').contains(glossaryTermGroup).should("be.visible"); cy.get("button").contains("Move").click(); cy.waitTextVisible("Moved Glossary Term!"); @@ -33,8 +32,7 @@ describe("glossary sidebar navigation test", () => { cy.clickOptionWithText(glossaryTermGroup); cy.openThreeDotDropdown(); cy.clickOptionWithText("Move"); - cy.get('[role="dialog"] [data-icon="close-circle"]').click({force: true}); - cy.get('[role="dialog"]').contains(glossaryParentGroup).click(); + cy.get('[role="dialog"]').contains(glossaryParentGroup).click({force: true}); cy.get('[role="dialog"]').contains(glossaryParentGroup).should("be.visible"); cy.get("button").contains("Move").click(); cy.waitTextVisible("Moved Term Group!"); diff --git a/smoke-test/tests/cypress/cypress/e2e/login/login.js b/smoke-test/tests/cypress/cypress/e2e/login/login.js index f86741b5afe01..309eedb10b6da 100644 --- a/smoke-test/tests/cypress/cypress/e2e/login/login.js +++ b/smoke-test/tests/cypress/cypress/e2e/login/login.js @@ -4,6 +4,6 @@ describe('login', () => { cy.get('input[data-testid=username]').type(Cypress.env('ADMIN_USERNAME')); cy.get('input[data-testid=password]').type(Cypress.env('ADMIN_PASSWORD')); cy.contains('Sign In').click(); - cy.contains('Welcome back, DataHub'); + cy.contains('Welcome back, ' + Cypress.env('ADMIN_DISPLAYNAME')); }); }) diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js new file mode 100644 index 0000000000000..466bb2ef0757e --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -0,0 +1,105 @@ +const number = Math.floor(Math.random() * 100000); +const accound_id = `account${number}`; +const warehouse_id = `warehouse${number}`; +const username = `user${number}`; +const password = `password${number}`; +const role = `role${number}`; +const ingestion_source_name = `ingestion source ${number}`; + +describe("managing secrets for ingestion creation", () => { + it("create a secret, create ingestion source using a secret, remove a secret", () => { + cy.loginWithCredentials(); + //navigate to the manage ingestion page → secrets + cy.goToIngestionPage(); + cy.clickOptionWithText("Secrets"); + //create a new secret + cy.clickOptionWithText("Create new secret"); + cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); + cy.get('[role="dialog"] #name').type(`secretname${number}`); + cy.get('[role="dialog"] #value').type(`secretvalue${number}`); + cy.get('[role="dialog"] #description').type(`secretdescription${number}`); + cy.get('#createSecretButton').click(); + cy.waitTextVisible("Successfully created Secret!"); + cy.waitTextVisible(`secretname${number}`); + cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret + //create an ingestion source using a secret + cy.goToIngestionPage(); + cy.clickOptionWithText("Create new source"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").click().wait(1000); + cy.contains(`secretname${number}`).click({force: true}); + cy.focused().blur(); + cy.get("#role").type(role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data + cy.waitTextVisible(ingestion_source_name); + cy.get("button").contains("Pending...").should("be.visible"); + //remove a secret + cy.clickOptionWithText("Secrets"); + cy.waitTextVisible(`secretname${number}`); + cy.get('[data-icon="delete"]').first().click(); + cy.waitTextVisible("Confirm Secret Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed secret."); + cy.ensureTextNotPresent(`secretname${number}`); + cy.ensureTextNotPresent(`secretdescription${number}`); + //remove ingestion source + cy.goToIngestionPage(); + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name) + //verify secret is not present during ingestion source creation for password dropdown + cy.clickOptionWithText("Create new source"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").click().wait(1000); + cy.ensureTextNotPresent(`secretname${number}`); + //verify secret can be added during ingestion source creation and used successfully + cy.clickOptionWithText("Create Secret"); + cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); + cy.get('[role="dialog"] #name').type(`secretname${number}`); + cy.get('[role="dialog"] #value').type(`secretvalue${number}`); + cy.get('[role="dialog"] #description').type(`secretdescription${number}`); + cy.get('#createSecretButton').click(); + cy.waitTextVisible("Created secret!"); + cy.get("#role").type(role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data + cy.waitTextVisible(ingestion_source_name); + cy.get("button").contains("Pending...").should("be.visible"); + //Remove ingestion source and secret + cy.goToIngestionPage(); + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name) + cy.clickOptionWithText("Secrets"); + cy.waitTextVisible(`secretname${number}`); + cy.get('[data-icon="delete"]').first().click(); + cy.waitTextVisible("Confirm Secret Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed secret."); + cy.ensureTextNotPresent(`secretname${number}`); + cy.ensureTextNotPresent(`secretdescription${number}`); + }) +}); \ No newline at end of file