diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index eb95078b1a640..98f3b82285eda 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -11,16 +11,19 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.util.Configuration; +import config.ConfigurationProvider; import controllers.SsoCallbackController; + import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; + import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; @@ -34,6 +37,7 @@ import org.pac4j.play.store.PlayCookieSessionStore; import org.pac4j.play.store.PlaySessionStore; import org.pac4j.play.store.ShiroAesDataEncrypter; +import org.springframework.context.annotation.AnnotationConfigApplicationContext; import play.Environment; import play.cache.SyncCacheApi; import utils.ConfigUtil; @@ -104,7 +108,7 @@ protected void configure() { bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor( SsoManager.class, Authentication.class, - EntityClient.class, + SystemEntityClient.class, AuthServiceClient.class, com.typesafe.config.Config.class)); } catch (NoSuchMethodException | SecurityException e) { @@ -161,10 +165,19 @@ protected Authentication provideSystemAuthentication() { @Provides @Singleton - protected EntityClient provideEntityClient() { - return new RestliEntityClient(buildRestliClient(), + protected ConfigurationProvider provideConfigurationProvider() { + AnnotationConfigApplicationContext context = new AnnotationConfigApplicationContext(ConfigurationProvider.class); + return context.getBean(ConfigurationProvider.class); + } + + @Provides + @Singleton + protected SystemEntityClient provideEntityClient(final Authentication systemAuthentication, + final ConfigurationProvider configurationProvider) { + return new SystemRestliEntityClient(buildRestliClient(), new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)), - _configs.getInt(ENTITY_CLIENT_NUM_RETRIES)); + _configs.getInt(ENTITY_CLIENT_NUM_RETRIES), systemAuthentication, + configurationProvider.getCache().getClient().getEntityClient()); } @Provides diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java index 85139d1db0868..4bde0872fc082 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java @@ -13,7 +13,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.SetMode; import com.linkedin.entity.Entity; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.identity.CorpGroupInfo; import com.linkedin.identity.CorpUserEditableInfo; @@ -78,13 +78,14 @@ public class OidcCallbackLogic extends DefaultCallbackLogic { private final SsoManager _ssoManager; - private final EntityClient _entityClient; + private final SystemEntityClient _entityClient; private final Authentication _systemAuthentication; private final AuthServiceClient _authClient; private final CookieConfigs _cookieConfigs; public OidcCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication, - final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { + final SystemEntityClient entityClient, final AuthServiceClient authClient, + final CookieConfigs cookieConfigs) { _ssoManager = ssoManager; _systemAuthentication = systemAuthentication; _entityClient = entityClient; diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java new file mode 100644 index 0000000000000..00a5472ec3476 --- /dev/null +++ b/datahub-frontend/app/config/ConfigurationProvider.java @@ -0,0 +1,27 @@ +package config; + +import com.linkedin.metadata.config.cache.CacheConfiguration; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; +import lombok.Data; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.PropertySource; + + +/** + * Minimal sharing between metadata-service and frontend + * Initially for use of client caching configuration. + * Does not use the factories module to avoid transitive dependencies. + */ +@EnableConfigurationProperties +@PropertySource(value = "application.yml", factory = YamlPropertySourceFactory.class) +@ConfigurationProperties +@Data +public class ConfigurationProvider { + + /** + * Configuration for caching + */ + private CacheConfiguration cache; +} diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 5a36d833deceb..7a4b5585cc21a 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -3,7 +3,7 @@ import auth.CookieConfigs; import client.AuthServiceClient; import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; @@ -40,7 +40,7 @@ public class SsoCallbackController extends CallbackController { public SsoCallbackController( @Nonnull SsoManager ssoManager, @Nonnull Authentication systemAuthentication, - @Nonnull EntityClient entityClient, + @Nonnull SystemEntityClient entityClient, @Nonnull AuthServiceClient authClient, @Nonnull com.typesafe.config.Config configs) { _ssoManager = ssoManager; @@ -79,7 +79,7 @@ public class SsoCallbackLogic implements CallbackLogic { private final OidcCallbackLogic _oidcCallbackLogic; SsoCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication, - final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { + final SystemEntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) { _oidcCallbackLogic = new OidcCallbackLogic(ssoManager, systemAuthentication, entityClient, authClient, cookieConfigs); } diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index e40f8e3eeb96d..daecba16cbf72 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -16,9 +16,6 @@ dependencies { implementation project(':datahub-web-react') constraints { - play(externalDependency.springCore) - play(externalDependency.springBeans) - play(externalDependency.springContext) play(externalDependency.jacksonDataBind) play('com.nimbusds:oauth2-oidc-sdk:8.36.2') play('com.nimbusds:nimbus-jose-jwt:8.18') @@ -35,7 +32,12 @@ dependencies { implementation project(":metadata-service:restli-client") implementation project(":metadata-service:auth-config") + implementation project(":metadata-service:configuration") + implementation externalDependency.springCore + implementation externalDependency.springBeans + implementation externalDependency.springContext + implementation externalDependency.springBootAutoconfigure implementation externalDependency.jettyJaas implementation externalDependency.graphqlJava implementation externalDependency.antlr4Runtime diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index d86234cf59306..3ba0cc1f747e3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -302,6 +302,7 @@ import com.linkedin.datahub.graphql.types.test.TestType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -364,6 +365,7 @@ public class GmsGraphQLEngine { private final EntityClient entityClient; + private final SystemEntityClient systemEntityClient; private final GraphClient graphClient; private final UsageClient usageClient; private final SiblingGraphService siblingGraphService; @@ -476,6 +478,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.graphQLPlugins.forEach(plugin -> plugin.init(args)); this.entityClient = args.entityClient; + this.systemEntityClient = args.systemEntityClient; this.graphClient = args.graphClient; this.usageClient = args.usageClient; this.siblingGraphService = args.siblingGraphService; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index cbcf42c4f93d9..157fb10ce7078 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -38,6 +39,7 @@ @Data public class GmsGraphQLEngineArgs { EntityClient entityClient; + SystemEntityClient systemEntityClient; GraphClient graphClient; UsageClient usageClient; AnalyticsService analyticsService; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java index f27fd604a746f..23be49c7e7140 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java @@ -1,13 +1,16 @@ package com.linkedin.datahub.graphql.resolvers.dataset; +import com.datahub.authorization.ResourceSpec; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.usage.UsageClient; import com.linkedin.usage.UsageTimeRange; import com.linkedin.usage.UserUsageCounts; @@ -15,6 +18,7 @@ import graphql.schema.DataFetchingEnvironment; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -55,8 +59,15 @@ public CompletableFuture get(DataFetchingEnvironment enviro try { + if (!isAuthorized(resourceUrn, context)) { + log.debug("User {} is not authorized to view profile information for dataset {}", + context.getActorUrn(), + resourceUrn.toString()); + return null; + } + com.linkedin.usage.UsageQueryResult - usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH, context.getAuthentication()); + usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH); final DatasetStatsSummary result = new DatasetStatsSummary(); result.setQueryCountLast30Days(usageQueryResult.getAggregations().getTotalSqlQueries()); @@ -90,4 +101,10 @@ private CorpUser createPartialUser(final Urn userUrn) { result.setUrn(userUrn.toString()); return result; } + + private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) { + return AuthorizationUtils.isAuthorized(context, + Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())), + PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java index 0476963b92e9a..20361830ad5a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java @@ -9,12 +9,10 @@ import com.linkedin.datahub.graphql.generated.UsageQueryResult; import com.linkedin.datahub.graphql.types.usage.UsageQueryResultMapper; import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.r2.RemoteInvocationException; import com.linkedin.usage.UsageClient; import com.linkedin.usage.UsageTimeRange; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; -import java.net.URISyntaxException; import java.util.Optional; import java.util.concurrent.CompletableFuture; import lombok.extern.slf4j.Slf4j; @@ -44,10 +42,10 @@ public CompletableFuture get(DataFetchingEnvironment environme } try { com.linkedin.usage.UsageQueryResult - usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range, context.getAuthentication()); + usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range); return UsageQueryResultMapper.map(usageQueryResult); - } catch (RemoteInvocationException | URISyntaxException e) { - throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn.toString()), e); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn), e); } }); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java index 163628c1bc590..6a9617ea41b44 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java @@ -117,8 +117,7 @@ public void testGetException() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DASHBOARD_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenThrow(RuntimeException.class); // Execute resolver diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java index bd3edf65bf7ad..013e23b779c51 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java @@ -1,6 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dataset; import com.datahub.authentication.Authentication; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -53,13 +55,18 @@ public void testGetSuccess() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenReturn(testResult); // Execute resolver DatasetStatsSummaryResolver resolver = new DatasetStatsSummaryResolver(mockClient); QueryContext mockContext = Mockito.mock(QueryContext.class); + Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); + Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); + AuthorizationResult mockAuthorizerResult = Mockito.mock(AuthorizationResult.class); + Mockito.when(mockAuthorizerResult.getType()).thenReturn(AuthorizationResult.Type.ALLOW); + Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(mockAuthorizerResult); + Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getSource()).thenReturn(TEST_SOURCE); @@ -79,8 +86,7 @@ public void testGetSuccess() throws Exception { newResult.setAggregations(new UsageQueryResultAggregations()); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenReturn(newResult); // Then verify that the new result is _not_ returned (cache hit) @@ -116,8 +122,7 @@ public void testGetException() throws Exception { UsageClient mockClient = Mockito.mock(UsageClient.class); Mockito.when(mockClient.getUsageStats( Mockito.eq(TEST_DATASET_URN), - Mockito.eq(UsageTimeRange.MONTH), - Mockito.any(Authentication.class) + Mockito.eq(UsageTimeRange.MONTH) )).thenThrow(RuntimeException.class); // Execute resolver diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java index e205fd2f5c20e..270aa11c7b070 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java @@ -1,11 +1,10 @@ package com.linkedin.datahub.upgrade.common.steps; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import java.util.function.Function; import lombok.RequiredArgsConstructor; @@ -13,8 +12,7 @@ @RequiredArgsConstructor public class GMSDisableWriteModeStep implements UpgradeStep { - private final Authentication _systemAuthentication; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; @Override public String id() { @@ -30,7 +28,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - _entityClient.setWritable(false, _systemAuthentication); + _entityClient.setWritable(false); } catch (Exception e) { e.printStackTrace(); context.report().addLine("Failed to turn write mode off in GMS"); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java index 270eff8df227c..8df02123983e8 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java @@ -1,20 +1,17 @@ package com.linkedin.datahub.upgrade.common.steps; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import java.util.function.Function; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor public class GMSEnableWriteModeStep implements UpgradeStep { - - private final Authentication _systemAuthentication; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; @Override public String id() { @@ -30,7 +27,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - _entityClient.setWritable(true, _systemAuthentication); + _entityClient.setWritable(true); } catch (Exception e) { e.printStackTrace(); context.report().addLine("Failed to turn write mode back on in GMS"); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java index 30175c6fa78c8..cd264e529e9a5 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java @@ -1,8 +1,7 @@ package com.linkedin.datahub.upgrade.config; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.models.registry.EntityRegistry; import io.ebean.Database; @@ -21,15 +20,14 @@ public class NoCodeUpgradeConfig { ApplicationContext applicationContext; @Bean(name = "noCodeUpgrade") - @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "entityRegistry"}) + @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "entityRegistry"}) @Nonnull public NoCodeUpgrade createInstance() { final Database ebeanServer = applicationContext.getBean(Database.class); final EntityService entityService = applicationContext.getBean(EntityService.class); - final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class); - return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient); + return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, entityClient); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java index 9b0fcf279abf5..97a08800534de 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java @@ -1,8 +1,7 @@ package com.linkedin.datahub.upgrade.config; -import com.datahub.authentication.Authentication; import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -22,19 +21,18 @@ public class RestoreBackupConfig { ApplicationContext applicationContext; @Bean(name = "restoreBackup") - @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "graphService", + @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "graphService", "searchService", "entityRegistry"}) @Nonnull public RestoreBackup createInstance() { final Database ebeanServer = applicationContext.getBean(Database.class); final EntityService entityService = applicationContext.getBean(EntityService.class); - final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final GraphService graphClient = applicationContext.getBean(GraphService.class); final EntitySearchService searchClient = applicationContext.getBean(EntitySearchService.class); final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class); - return new RestoreBackup(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient, + return new RestoreBackup(ebeanServer, entityService, entityRegistry, entityClient, graphClient, searchClient); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java index ee4a3bc504e77..a299deb874721 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java @@ -1,13 +1,12 @@ package com.linkedin.datahub.upgrade.nocode; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableMap; import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep; import com.linkedin.datahub.upgrade.common.steps.GMSQualificationStep; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.models.registry.EntityRegistry; import io.ebean.Database; @@ -30,12 +29,10 @@ public NoCodeUpgrade( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient) { + final SystemRestliEntityClient entityClient) { _steps = buildUpgradeSteps( server, entityService, entityRegistry, - systemAuthentication, entityClient); _cleanupSteps = buildCleanupSteps(); } @@ -63,15 +60,14 @@ private List buildUpgradeSteps( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient) { + final SystemRestliEntityClient entityClient) { final List steps = new ArrayList<>(); steps.add(new RemoveAspectV2TableStep(server)); steps.add(new GMSQualificationStep(ImmutableMap.of("noCode", "true"))); steps.add(new UpgradeQualificationStep(server)); steps.add(new CreateAspectTableStep(server)); steps.add(new DataMigrationStep(server, entityService, entityRegistry)); - steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSEnableWriteModeStep(entityClient)); return steps; } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java index 67718a6739beb..9175ad606e3c8 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java @@ -1,6 +1,5 @@ package com.linkedin.datahub.upgrade.restorebackup; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; @@ -9,7 +8,7 @@ import com.linkedin.datahub.upgrade.common.steps.ClearSearchServiceStep; import com.linkedin.datahub.upgrade.common.steps.GMSDisableWriteModeStep; import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -27,11 +26,10 @@ public RestoreBackup( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient, + final SystemRestliEntityClient entityClient, final GraphService graphClient, final EntitySearchService searchClient) { - _steps = buildSteps(server, entityService, entityRegistry, systemAuthentication, entityClient, graphClient, searchClient); + _steps = buildSteps(server, entityService, entityRegistry, entityClient, graphClient, searchClient); } @Override @@ -48,17 +46,16 @@ private List buildSteps( final Database server, final EntityService entityService, final EntityRegistry entityRegistry, - final Authentication systemAuthentication, - final RestliEntityClient entityClient, + final SystemRestliEntityClient entityClient, final GraphService graphClient, final EntitySearchService searchClient) { final List steps = new ArrayList<>(); - steps.add(new GMSDisableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSDisableWriteModeStep(entityClient)); steps.add(new ClearSearchServiceStep(searchClient, true)); steps.add(new ClearGraphServiceStep(graphClient, true)); steps.add(new ClearAspectV2TableStep(server)); steps.add(new RestoreStorageStep(entityService, entityRegistry)); - steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient)); + steps.add(new GMSEnableWriteModeStep(entityClient)); return steps; } diff --git a/docs-website/markdown-link-check-config.json b/docs-website/markdown-link-check-config.json index 26e040edde6f7..2f5a51ada324e 100644 --- a/docs-website/markdown-link-check-config.json +++ b/docs-website/markdown-link-check-config.json @@ -1,50 +1,41 @@ { "ignorePatterns": [ { - "pattern": "^http://demo\\.datahubproject\\.io" + "pattern": "^https?://demo\\.datahubproject\\.io" }, { - "pattern": "^http://localhost" + "pattern": "^http://localhost" }, { - "pattern": "^http://www.famfamfam.com" + "pattern": "^/docs" }, { - "pattern": "^http://www.linkedin.com" + "pattern": "^/integrations" }, { - "pattern": "\\.md$" + "pattern": "^https?://www.linkedin.com" }, { - "pattern":"\\.json$" + "pattern": "\\.md(#.*)?$" }, { - "pattern":"\\.txt$" + "pattern": "\\.json$" }, { - "pattern": "\\.java$" + "pattern": "\\.txt$" }, { - "pattern": "\\.md#.*$" + "pattern": "\\.java$" }, { - "pattern": "^https://oauth2.googleapis.com/token" + "pattern": "^https://oauth2.googleapis.com/token" }, { - "pattern": "^https://login.microsoftonline.com/common/oauth2/na$" + "pattern": "^https://login.microsoftonline.com/common/oauth2/na$" }, { - "pattern": "#v(\\d+)-(\\d+)-(\\d+)" - }, - { - "pattern": "^https://github.com/mohdsiddique$" - }, - { - "pattern": "^https://github.com/2x$" - }, - { - "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$" + "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$" } ], - "aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403] -} \ No newline at end of file + "aliveStatusCodes": [200, 206, 0, 999] +} diff --git a/docs-website/package.json b/docs-website/package.json index 1722f92169692..eca6e5814d3c6 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -17,8 +17,10 @@ "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs", "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs", "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js", - "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json", - "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js" + "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js", + "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js", + "_list-link-check-files": "find ./genDocs -name '*.md' -not \\( -path './genDocs/python-sdk/*' -o -path './genDocs/releases.md' \\)", + "check-links": "yarn run -s _list-link-check-files -print0 | xargs -0 -n1 -t markdown-link-check -q -c markdown-link-check-config.json" }, "dependencies": { "@ant-design/icons": "^4.7.0", diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index b371ab181e133..38e965f7f6587 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -44,14 +44,17 @@ TelemetryClientIdClass, ) from datahub.utilities.perf_timer import PerfTimer -from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.urn import Urn, guess_entity_type if TYPE_CHECKING: from datahub.ingestion.source.state.entity_removal_state import ( GenericCheckpointState, ) - from datahub.utilities.sqlglot_lineage import SchemaResolver, SqlParsingResult + from datahub.utilities.sqlglot_lineage import ( + GraphQLSchemaMetadata, + SchemaResolver, + SqlParsingResult, + ) logger = logging.getLogger(__name__) @@ -543,129 +546,110 @@ def get_container_urns_by_filter( logger.debug(f"yielding {x['entity']}") yield x["entity"] - def get_urns_by_filter( + def _bulk_fetch_schema_info_by_filter( self, *, - entity_types: Optional[List[str]] = None, platform: Optional[str] = None, platform_instance: Optional[str] = None, env: Optional[str] = None, query: Optional[str] = None, container: Optional[str] = None, status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED, - batch_size: int = 10000, + batch_size: int = 100, extraFilters: Optional[List[SearchFilterRule]] = None, - ) -> Iterable[str]: - """Fetch all urns that match all of the given filters. + ) -> Iterable[Tuple[str, "GraphQLSchemaMetadata"]]: + """Fetch schema info for datasets that match all of the given filters. - Filters are combined conjunctively. If multiple filters are specified, the results will match all of them. - Note that specifying a platform filter will automatically exclude all entity types that do not have a platform. - The same goes for the env filter. + :return: An iterable of (urn, schema info) tuple that match the filters. + """ + types = [_graphql_entity_type("dataset")] - :param entity_types: List of entity types to include. If None, all entity types will be returned. - :param platform: Platform to filter on. If None, all platforms will be returned. - :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned. - :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned. - :param query: Query string to filter on. If None, all entities will be returned. - :param container: A container urn that entities must be within. - This works recursively, so it will include entities within sub-containers as well. - If None, all entities will be returned. - Note that this requires browsePathV2 aspects (added in 0.10.4+). - :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities. - :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters. + # Add the query default of * if no query is specified. + query = query or "*" - :return: An iterable of urns that match the filters. - """ + orFilters = self.generate_filter( + platform, platform_instance, env, container, status, extraFilters + ) - types: Optional[List[str]] = None - if entity_types is not None: - if not entity_types: - raise ValueError( - "entity_types cannot be an empty list; use None for all entities" - ) + graphql_query = textwrap.dedent( + """ + query scrollUrnsWithFilters( + $types: [EntityType!], + $query: String!, + $orFilters: [AndFilterInput!], + $batchSize: Int!, + $scrollId: String) { - types = [_graphql_entity_type(entity_type) for entity_type in entity_types] + scrollAcrossEntities(input: { + query: $query, + count: $batchSize, + scrollId: $scrollId, + types: $types, + orFilters: $orFilters, + searchFlags: { + skipHighlighting: true + skipAggregates: true + } + }) { + nextScrollId + searchResults { + entity { + urn + ... on Dataset { + schemaMetadata(version: 0) { + fields { + fieldPath + nativeDataType + } + } + } + } + } + } + } + """ + ) - # Add the query default of * if no query is specified. - query = query or "*" + variables = { + "types": types, + "query": query, + "orFilters": orFilters, + "batchSize": batch_size, + } + + for entity in self._scroll_across_entities(graphql_query, variables): + if entity.get("schemaMetadata"): + yield entity["urn"], entity["schemaMetadata"] + def generate_filter( + self, + platform: Optional[str], + platform_instance: Optional[str], + env: Optional[str], + container: Optional[str], + status: RemovedStatusFilter, + extraFilters: Optional[List[SearchFilterRule]], + ) -> List[Dict[str, List[SearchFilterRule]]]: andFilters: List[SearchFilterRule] = [] # Platform filter. if platform: - andFilters += [ - { - "field": "platform.keyword", - "values": [make_data_platform_urn(platform)], - "condition": "EQUAL", - } - ] + andFilters.append(self._get_platform_filter(platform)) # Platform instance filter. if platform_instance: - if platform: - # Massage the platform instance into a fully qualified urn, if necessary. - platform_instance = make_dataplatform_instance_urn( - platform, platform_instance - ) - - # Warn if platform_instance is not a fully qualified urn. - # TODO: Change this once we have a first-class data platform instance urn type. - if guess_entity_type(platform_instance) != "dataPlatformInstance": - raise ValueError( - f"Invalid data platform instance urn: {platform_instance}" - ) - - andFilters += [ - { - "field": "platformInstance", - "values": [platform_instance], - "condition": "EQUAL", - } - ] + andFilters.append( + self._get_platform_instance_filter(platform, platform_instance) + ) # Browse path v2 filter. if container: - # Warn if container is not a fully qualified urn. - # TODO: Change this once we have a first-class container urn type. - if guess_entity_type(container) != "container": - raise ValueError(f"Invalid container urn: {container}") - - andFilters += [ - { - "field": "browsePathV2", - "values": [container], - "condition": "CONTAIN", - } - ] + andFilters.append(self._get_container_filter(container)) # Status filter. - if status == RemovedStatusFilter.NOT_SOFT_DELETED: - # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the - # removed field is simply not present in the ElasticSearch document. Ideally this - # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to - # use a negated filter. - andFilters.append( - { - "field": "removed", - "values": ["true"], - "condition": "EQUAL", - "negated": True, - } - ) - elif status == RemovedStatusFilter.ONLY_SOFT_DELETED: - andFilters.append( - { - "field": "removed", - "values": ["true"], - "condition": "EQUAL", - } - ) - elif status == RemovedStatusFilter.ALL: - # We don't need to add a filter for this case. - pass - else: - raise ValueError(f"Invalid status filter: {status}") + status_filter = self._get_status_filer(status) + if status_filter: + andFilters.append(status_filter) # Extra filters. if extraFilters: @@ -673,33 +657,9 @@ def get_urns_by_filter( orFilters: List[Dict[str, List[SearchFilterRule]]] = [{"and": andFilters}] - # Env filter. + # Env filter if env: - # The env filter is a bit more tricky since it's not always stored - # in the same place in ElasticSearch. - - envOrConditions: List[SearchFilterRule] = [ - # For most entity types, we look at the origin field. - { - "field": "origin", - "value": env, - "condition": "EQUAL", - }, - # For containers, we look at the customProperties field. - # For any containers created after https://github.com/datahub-project/datahub/pull/8027, - # we look for the "env" property. Otherwise, we use the "instance" property. - { - "field": "customProperties", - "value": f"env={env}", - }, - { - "field": "customProperties", - "value": f"instance={env}", - }, - # Note that not all entity types have an env (e.g. dashboards / charts). - # If the env filter is specified, these will be excluded. - ] - + envOrConditions = self._get_env_or_conditions(env) # This matches ALL of the andFilters and at least one of the envOrConditions. orFilters = [ {"and": andFilters["and"] + [extraCondition]} @@ -707,6 +667,52 @@ def get_urns_by_filter( for andFilters in orFilters ] + return orFilters + + def get_urns_by_filter( + self, + *, + entity_types: Optional[List[str]] = None, + platform: Optional[str] = None, + platform_instance: Optional[str] = None, + env: Optional[str] = None, + query: Optional[str] = None, + container: Optional[str] = None, + status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED, + batch_size: int = 10000, + extraFilters: Optional[List[SearchFilterRule]] = None, + ) -> Iterable[str]: + """Fetch all urns that match all of the given filters. + + Filters are combined conjunctively. If multiple filters are specified, the results will match all of them. + Note that specifying a platform filter will automatically exclude all entity types that do not have a platform. + The same goes for the env filter. + + :param entity_types: List of entity types to include. If None, all entity types will be returned. + :param platform: Platform to filter on. If None, all platforms will be returned. + :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned. + :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned. + :param query: Query string to filter on. If None, all entities will be returned. + :param container: A container urn that entities must be within. + This works recursively, so it will include entities within sub-containers as well. + If None, all entities will be returned. + Note that this requires browsePathV2 aspects (added in 0.10.4+). + :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities. + :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters. + + :return: An iterable of urns that match the filters. + """ + + types = self._get_types(entity_types) + + # Add the query default of * if no query is specified. + query = query or "*" + + # Env filter. + orFilters = self.generate_filter( + platform, platform_instance, env, container, status, extraFilters + ) + graphql_query = textwrap.dedent( """ query scrollUrnsWithFilters( @@ -738,18 +744,26 @@ def get_urns_by_filter( """ ) + variables = { + "types": types, + "query": query, + "orFilters": orFilters, + "batchSize": batch_size, + } + + for entity in self._scroll_across_entities(graphql_query, variables): + yield entity["urn"] + + def _scroll_across_entities( + self, graphql_query: str, variables_orig: dict + ) -> Iterable[dict]: + variables = variables_orig.copy() first_iter = True scroll_id: Optional[str] = None while first_iter or scroll_id: first_iter = False + variables["scrollId"] = scroll_id - variables = { - "types": types, - "query": query, - "orFilters": orFilters, - "batchSize": batch_size, - "scrollId": scroll_id, - } response = self.execute_graphql( graphql_query, variables=variables, @@ -757,13 +771,116 @@ def get_urns_by_filter( data = response["scrollAcrossEntities"] scroll_id = data["nextScrollId"] for entry in data["searchResults"]: - yield entry["entity"]["urn"] + yield entry["entity"] if scroll_id: logger.debug( f"Scrolling to next scrollAcrossEntities page: {scroll_id}" ) + def _get_env_or_conditions(self, env: str) -> List[SearchFilterRule]: + # The env filter is a bit more tricky since it's not always stored + # in the same place in ElasticSearch. + return [ + # For most entity types, we look at the origin field. + { + "field": "origin", + "value": env, + "condition": "EQUAL", + }, + # For containers, we look at the customProperties field. + # For any containers created after https://github.com/datahub-project/datahub/pull/8027, + # we look for the "env" property. Otherwise, we use the "instance" property. + { + "field": "customProperties", + "value": f"env={env}", + }, + { + "field": "customProperties", + "value": f"instance={env}", + }, + # Note that not all entity types have an env (e.g. dashboards / charts). + # If the env filter is specified, these will be excluded. + ] + + def _get_status_filer( + self, status: RemovedStatusFilter + ) -> Optional[SearchFilterRule]: + if status == RemovedStatusFilter.NOT_SOFT_DELETED: + # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the + # removed field is simply not present in the ElasticSearch document. Ideally this + # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to + # use a negated filter. + return { + "field": "removed", + "values": ["true"], + "condition": "EQUAL", + "negated": True, + } + + elif status == RemovedStatusFilter.ONLY_SOFT_DELETED: + return { + "field": "removed", + "values": ["true"], + "condition": "EQUAL", + } + + elif status == RemovedStatusFilter.ALL: + # We don't need to add a filter for this case. + return None + else: + raise ValueError(f"Invalid status filter: {status}") + + def _get_container_filter(self, container: str) -> SearchFilterRule: + # Warn if container is not a fully qualified urn. + # TODO: Change this once we have a first-class container urn type. + if guess_entity_type(container) != "container": + raise ValueError(f"Invalid container urn: {container}") + + return { + "field": "browsePathV2", + "values": [container], + "condition": "CONTAIN", + } + + def _get_platform_instance_filter( + self, platform: Optional[str], platform_instance: str + ) -> SearchFilterRule: + if platform: + # Massage the platform instance into a fully qualified urn, if necessary. + platform_instance = make_dataplatform_instance_urn( + platform, platform_instance + ) + + # Warn if platform_instance is not a fully qualified urn. + # TODO: Change this once we have a first-class data platform instance urn type. + if guess_entity_type(platform_instance) != "dataPlatformInstance": + raise ValueError(f"Invalid data platform instance urn: {platform_instance}") + + return { + "field": "platformInstance", + "values": [platform_instance], + "condition": "EQUAL", + } + + def _get_platform_filter(self, platform: str) -> SearchFilterRule: + return { + "field": "platform.keyword", + "values": [make_data_platform_urn(platform)], + "condition": "EQUAL", + } + + def _get_types(self, entity_types: Optional[List[str]]) -> Optional[List[str]]: + types: Optional[List[str]] = None + if entity_types is not None: + if not entity_types: + raise ValueError( + "entity_types cannot be an empty list; use None for all entities" + ) + + types = [_graphql_entity_type(entity_type) for entity_type in entity_types] + return types + def get_latest_pipeline_checkpoint( self, pipeline_name: str, platform: str ) -> Optional[Checkpoint["GenericCheckpointState"]]: @@ -1033,43 +1150,36 @@ def initialize_schema_resolver_from_datahub( self, platform: str, platform_instance: Optional[str], env: str ) -> Tuple["SchemaResolver", Set[str]]: logger.info("Initializing schema resolver") - - # TODO: Filter on platform instance? - logger.info(f"Fetching urns for platform {platform}, env {env}") - with PerfTimer() as timer: - urns = set( - self.get_urns_by_filter( - entity_types=[DatasetUrn.ENTITY_TYPE], - platform=platform, - env=env, - batch_size=3000, - ) - ) - logger.info( - f"Fetched {len(urns)} urns in {timer.elapsed_seconds()} seconds" - ) - schema_resolver = self._make_schema_resolver( platform, platform_instance, env, include_graph=False ) + + logger.info(f"Fetching schemas for platform {platform}, env {env}") + urns = [] + count = 0 with PerfTimer() as timer: - count = 0 - for i, urn in enumerate(urns): - if i % 1000 == 0: - logger.debug(f"Loaded {i} schema metadata") + for urn, schema_info in self._bulk_fetch_schema_info_by_filter( + platform=platform, + platform_instance=platform_instance, + env=env, + ): try: - schema_metadata = self.get_aspect(urn, SchemaMetadataClass) - if schema_metadata: - schema_resolver.add_schema_metadata(urn, schema_metadata) - count += 1 + urns.append(urn) + schema_resolver.add_graphql_schema_metadata(urn, schema_info) + count += 1 except Exception: - logger.warning("Failed to load schema metadata", exc_info=True) + logger.warning("Failed to add schema info", exc_info=True) + + if count % 1000 == 0: + logger.debug( + f"Loaded {count} schema info in {timer.elapsed_seconds()} seconds" + ) logger.info( - f"Loaded {count} schema metadata in {timer.elapsed_seconds()} seconds" + f"Finished loading total {count} schema info in {timer.elapsed_seconds()} seconds" ) logger.info("Finished initializing schema resolver") - return schema_resolver, urns + return schema_resolver, set(urns) def parse_sql_lineage( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index ae49a4ba17c11..8a16b1a4a5f6b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -285,9 +285,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): # Maps view ref -> actual sql self.view_definitions: FileBackedDict[str] = FileBackedDict() - self.sql_parser_schema_resolver = SchemaResolver( - platform=self.platform, env=self.config.env - ) + self.sql_parser_schema_resolver = self._init_schema_resolver() self.add_config_to_report() atexit.register(cleanup, config) @@ -446,6 +444,27 @@ def test_connection(config_dict: dict) -> TestConnectionReport: ) return test_report + def _init_schema_resolver(self) -> SchemaResolver: + schema_resolution_required = ( + self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser + ) + schema_ingestion_enabled = ( + self.config.include_views and self.config.include_tables + ) + + if schema_resolution_required and not schema_ingestion_enabled: + if self.ctx.graph: + return self.ctx.graph.initialize_schema_resolver_from_datahub( + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + )[0] + else: + logger.warning( + "Failed to load schema info from DataHub as DataHubGraph is missing.", + ) + return SchemaResolver(platform=self.platform, env=self.config.env) + def get_dataplatform_instance_aspect( self, dataset_urn: str, project_id: str ) -> MetadataWorkUnit: diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index d677b0874b985..f18235af3d1fd 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -14,6 +14,7 @@ import sqlglot.optimizer.qualify import sqlglot.optimizer.qualify_columns from pydantic import BaseModel +from typing_extensions import TypedDict from datahub.emitter.mce_builder import ( DEFAULT_ENV, @@ -36,6 +37,15 @@ SQL_PARSE_RESULT_CACHE_SIZE = 1000 +class GraphQLSchemaField(TypedDict): + fieldPath: str + nativeDataType: str + + +class GraphQLSchemaMetadata(TypedDict): + fields: List[GraphQLSchemaField] + + class QueryType(enum.Enum): CREATE = "CREATE" SELECT = "SELECT" @@ -330,6 +340,12 @@ def add_schema_metadata( def add_raw_schema_info(self, urn: str, schema_info: SchemaInfo) -> None: self._save_to_cache(urn, schema_info) + def add_graphql_schema_metadata( + self, urn: str, schema_metadata: GraphQLSchemaMetadata + ) -> None: + schema_info = self.convert_graphql_schema_metadata_to_info(schema_metadata) + self._save_to_cache(urn, schema_info) + def _save_to_cache(self, urn: str, schema_info: Optional[SchemaInfo]) -> None: self._schema_cache[urn] = schema_info @@ -356,6 +372,24 @@ def _convert_schema_aspect_to_info( not in DatasetUrn.get_simple_field_path_from_v2_field_path(col.fieldPath) } + @classmethod + def convert_graphql_schema_metadata_to_info( + cls, schema: GraphQLSchemaMetadata + ) -> SchemaInfo: + return { + DatasetUrn.get_simple_field_path_from_v2_field_path(field["fieldPath"]): ( + # The actual types are more of a "nice to have". + field["nativeDataType"] + or "str" + ) + for field in schema["fields"] + # TODO: We can't generate lineage to columns nested within structs yet. + if "." + not in DatasetUrn.get_simple_field_path_from_v2_field_path( + field["fieldPath"] + ) + } + # TODO add a method to load all from graphql def close(self) -> None: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java new file mode 100644 index 0000000000000..6b5a3d5bfb06e --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -0,0 +1,39 @@ +package com.linkedin.metadata.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.entity.client.EntityClientCache; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.metadata.entity.DeleteEntityService; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.event.EventProducer; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.LineageSearchService; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.client.CachingEntitySearchService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import lombok.Getter; + + +/** + * Java backed SystemEntityClient + */ +@Getter +public class SystemJavaEntityClient extends JavaEntityClient implements SystemEntityClient { + + private final EntityClientCache entityClientCache; + private final Authentication systemAuthentication; + + public SystemJavaEntityClient(EntityService entityService, DeleteEntityService deleteEntityService, + EntitySearchService entitySearchService, CachingEntitySearchService cachingEntitySearchService, + SearchService searchService, LineageSearchService lineageSearchService, + TimeseriesAspectService timeseriesAspectService, EventProducer eventProducer, + RestliEntityClient restliEntityClient, Authentication systemAuthentication, + EntityClientCacheConfig cacheConfig) { + super(entityService, deleteEntityService, entitySearchService, cachingEntitySearchService, searchService, + lineageSearchService, timeseriesAspectService, eventProducer, restliEntityClient); + this.systemAuthentication = systemAuthentication; + this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, systemAuthentication, cacheConfig); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java index fee9cd9bca56e..a3e5a051a47e3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.timeline.eventgenerator; -import com.datahub.authentication.Authentication; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.dataprocess.DataProcessInstanceRelationships; @@ -8,7 +7,7 @@ import com.linkedin.dataprocess.DataProcessRunStatus; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeEvent; import com.linkedin.metadata.timeline.data.ChangeOperation; @@ -27,9 +26,8 @@ public class DataProcessInstanceRunEventChangeEventGenerator private static final String COMPLETED_STATUS = "COMPLETED"; private static final String STARTED_STATUS = "STARTED"; - public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final EntityClient entityClient, @Nonnull final - Authentication authentication) { - super(entityClient, authentication); + public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) { + super(entityClient); } @Override @@ -108,8 +106,8 @@ private DataProcessInstanceRelationships getRelationships(@Nonnull final String EntityResponse entityResponse; try { entityUrn = Urn.createFromString(entityUrnString); - entityResponse = _entityClient.getV2(DATA_PROCESS_INSTANCE_ENTITY_NAME, entityUrn, - Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME), _authentication); + entityResponse = _entityClient.getV2(entityUrn, + Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME)); } catch (Exception e) { return null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java index 7f6aa5e53268e..d5539ec3d3822 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java @@ -5,7 +5,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeEvent; @@ -19,16 +19,14 @@ */ public abstract class EntityChangeEventGenerator { // TODO: Add a check for supported aspects - protected EntityClient _entityClient; + protected SystemEntityClient _entityClient; protected Authentication _authentication; public EntityChangeEventGenerator() { } - public EntityChangeEventGenerator(@Nonnull final EntityClient entityClient, - @Nonnull final Authentication authentication) { + public EntityChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) { _entityClient = entityClient; - _authentication = authentication; } @Deprecated diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java index 3b44ede0f1d43..a214117f4e1bc 100644 --- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java @@ -1,6 +1,6 @@ package com.linkedin.metadata.kafka; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -24,7 +24,7 @@ public class MaeConsumerApplicationTestConfiguration { private EntityServiceImpl _entityServiceImpl; @MockBean - private RestliEntityClient restliEntityClient; + private SystemRestliEntityClient restliEntityClient; @MockBean private Database ebeanServer; diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java index 2d8c52566e2ae..a9e54e5354b42 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java @@ -1,10 +1,10 @@ package com.linkedin.metadata.kafka.config; -import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.google.common.collect.ImmutableSet; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.metadata.kafka.hydrator.EntityHydrator; +import com.linkedin.metadata.models.registry.EntityRegistry; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -13,19 +13,25 @@ @Configuration -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class}) +@Import({RestliEntityClientFactory.class}) public class EntityHydratorConfig { @Autowired - @Qualifier("systemAuthentication") - private Authentication _systemAuthentication; + @Qualifier("systemRestliEntityClient") + private SystemRestliEntityClient _entityClient; @Autowired - @Qualifier("restliEntityClient") - private RestliEntityClient _entityClient; + private EntityRegistry _entityRegistry; + + public final static ImmutableSet EXCLUDED_ASPECTS = ImmutableSet.builder() + .add("datasetUpstreamLineage", "upstreamLineage") + .add("dataJobInputOutput") + .add("dataProcessInstanceRelationships", "dataProcessInstanceInput", "dataProcessInstanceOutput") + .add("inputFields") + .build(); @Bean public EntityHydrator getEntityHydrator() { - return new EntityHydrator(_systemAuthentication, _entityClient); + return new EntityHydrator(_entityRegistry, _entityClient); } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java index 55077c46a1526..3b65ecccad336 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java @@ -1,15 +1,12 @@ package com.linkedin.metadata.kafka.hook.event; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.SetMode; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.metadata.Constants; @@ -46,8 +43,7 @@ */ @Slf4j @Component -@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class, - SystemAuthenticationFactory.class}) +@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class}) public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { /** @@ -83,20 +79,18 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { */ private static final Set SUPPORTED_OPERATIONS = ImmutableSet.of("CREATE", "UPSERT", "DELETE"); private final EntityChangeEventGeneratorRegistry _entityChangeEventGeneratorRegistry; - private final EntityClient _entityClient; - private final Authentication _systemAuthentication; + private final SystemRestliEntityClient _entityClient; private final EntityRegistry _entityRegistry; private final Boolean _isEnabled; @Autowired public EntityChangeEventGeneratorHook( @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, - @Nonnull final RestliEntityClient entityClient, @Nonnull final Authentication systemAuthentication, + @Nonnull final SystemRestliEntityClient entityClient, @Nonnull final EntityRegistry entityRegistry, @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled) { _entityChangeEventGeneratorRegistry = Objects.requireNonNull(entityChangeEventGeneratorRegistry); _entityClient = Objects.requireNonNull(entityClient); - _systemAuthentication = Objects.requireNonNull(systemAuthentication); _entityRegistry = Objects.requireNonNull(entityRegistry); _isEnabled = isEnabled; } @@ -189,8 +183,7 @@ private void emitPlatformEvent(@Nonnull final PlatformEvent event, @Nonnull fina _entityClient.producePlatformEvent( Constants.CHANGE_EVENT_PLATFORM_EVENT_NAME, partitioningKey, - event, - _systemAuthentication + event ); } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index 06545ef3525dd..7cbe53dee9fe4 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.siblings; -import com.datahub.authentication.Authentication; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -13,9 +12,8 @@ import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; import com.linkedin.entity.EntityResponse; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; @@ -60,7 +58,7 @@ @Slf4j @Component @Singleton -@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class, SystemAuthenticationFactory.class}) +@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class}) public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SIBLING_ASSOCIATION_SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system_sibling_hook"; @@ -73,23 +71,20 @@ public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SOURCE_SUBTYPE_V2 = "Source"; private final EntityRegistry _entityRegistry; - private final RestliEntityClient _entityClient; + private final SystemRestliEntityClient _entityClient; private final EntitySearchService _searchService; - private final Authentication _systemAuthentication; private final boolean _isEnabled; @Autowired public SiblingAssociationHook( @Nonnull final EntityRegistry entityRegistry, - @Nonnull final RestliEntityClient entityClient, + @Nonnull final SystemRestliEntityClient entityClient, @Nonnull final EntitySearchService searchService, - @Nonnull final Authentication systemAuthentication, @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled ) { _entityRegistry = entityRegistry; _entityClient = entityClient; _searchService = searchService; - _systemAuthentication = systemAuthentication; _isEnabled = isEnabled; } @@ -251,9 +246,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { dbtSiblingProposal.setEntityUrn(dbtUrn); try { - _entityClient.ingestProposal(dbtSiblingProposal, _systemAuthentication); + _entityClient.ingestProposal(dbtSiblingProposal, true); } catch (RemoteInvocationException e) { - log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString()); throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); } @@ -274,9 +269,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { List filteredNewSiblingsArray = newSiblingsUrnArray.stream().filter(urn -> { try { - return _entityClient.exists(urn, _systemAuthentication); + return _entityClient.exists(urn); } catch (RemoteInvocationException e) { - log.error("Error while checking existence of {}: {}", urn.toString(), e.toString()); + log.error("Error while checking existence of {}: {}", urn, e.toString()); throw new RuntimeException("Error checking existence. Skipping processing.", e); } }).collect(Collectors.toList()); @@ -294,9 +289,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { sourceSiblingProposal.setEntityUrn(sourceUrn); try { - _entityClient.ingestProposal(sourceSiblingProposal, _systemAuthentication); + _entityClient.ingestProposal(sourceSiblingProposal, true); } catch (RemoteInvocationException e) { - log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString()); throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); } } @@ -406,11 +401,8 @@ private SubTypes getSubtypesFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _systemAuthentication - ); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME)); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SUB_TYPES_ASPECT_NAME)) { return new SubTypes(entityResponse.getAspects().get(Constants.SUB_TYPES_ASPECT_NAME).getValue().data()); @@ -427,10 +419,8 @@ private UpstreamLineage getUpstreamLineageFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME), - _systemAuthentication + ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME) ); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { @@ -448,10 +438,8 @@ private Siblings getSiblingsFromEntityClient( ) { try { EntityResponse entityResponse = _entityClient.getV2( - DATASET_ENTITY_NAME, urn, - ImmutableSet.of(SIBLINGS_ASPECT_NAME), - _systemAuthentication + ImmutableSet.of(SIBLINGS_ASPECT_NAME) ); if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SIBLINGS_ASPECT_NAME)) { diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java index d768ada1765fa..0a3b38517eaad 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java @@ -1,28 +1,32 @@ package com.linkedin.metadata.kafka.hydrator; -import com.datahub.authentication.Authentication; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; -import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; import java.util.Collections; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.kafka.config.EntityHydratorConfig.EXCLUDED_ASPECTS; @Slf4j @RequiredArgsConstructor public class EntityHydrator { - private final Authentication _systemAuthentication; - private final EntityClient _entityClient; - + private final EntityRegistry _entityRegistry; + private final SystemRestliEntityClient _entityClient; private final ChartHydrator _chartHydrator = new ChartHydrator(); private final CorpUserHydrator _corpUserHydrator = new CorpUserHydrator(); private final DashboardHydrator _dashboardHydrator = new DashboardHydrator(); @@ -43,8 +47,12 @@ public Optional getHydratedEntity(String entityTypeName, String urn) // Hydrate fields from snapshot EntityResponse entityResponse; try { - entityResponse = _entityClient.batchGetV2(entityTypeName, Collections.singleton(urnObj), null, - this._systemAuthentication).get(urnObj); + Set aspectNames = Optional.ofNullable(_entityRegistry.getEntitySpecs().get(urnObj.getEntityType())) + .map(spec -> spec.getAspectSpecs().stream().map(AspectSpec::getName) + .filter(aspectName -> !EXCLUDED_ASPECTS.contains(aspectName)) + .collect(Collectors.toSet())) + .orElse(Set.of()); + entityResponse = _entityClient.batchGetV2(Collections.singleton(urnObj), aspectNames).get(urnObj); } catch (RemoteInvocationException | URISyntaxException e) { log.error("Error while calling GMS to hydrate entity for urn {}", urn); return Optional.empty(); diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java index d8759da0fe1dd..7d9619f3e2d1c 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.event; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.assertion.AssertionResult; @@ -38,8 +37,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DatasetKey; @@ -66,6 +64,7 @@ import com.linkedin.platform.event.v1.Parameters; import java.net.URISyntaxException; import java.util.Map; + import org.mockito.Mockito; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -92,22 +91,19 @@ public class EntityChangeEventGeneratorHookTest { private static final String TEST_DATA_FLOW_URN = "urn:li:dataFlow:flow"; private static final String TEST_DATA_JOB_URN = "urn:li:dataJob:job"; private Urn actorUrn; - private Authentication _mockAuthentication; - private RestliEntityClient _mockClient; + private SystemRestliEntityClient _mockClient; private EntityService _mockEntityService; private EntityChangeEventGeneratorHook _entityChangeEventHook; @BeforeMethod public void setupTest() throws URISyntaxException { actorUrn = Urn.createFromString(TEST_ACTOR_URN); - _mockAuthentication = Mockito.mock(Authentication.class); - _mockClient = Mockito.mock(RestliEntityClient.class); + _mockClient = Mockito.mock(SystemRestliEntityClient.class); _mockEntityService = Mockito.mock(EntityService.class); EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry = createEntityChangeEventGeneratorRegistry(); _entityChangeEventHook = - new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, _mockAuthentication, - createMockEntityRegistry(), true); + new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, createMockEntityRegistry(), true); } @Test @@ -498,8 +494,7 @@ public void testInvokeDataProcessInstanceRunEventStart() throws Exception { final EntityResponse entityResponse = buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships)); - Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn), - any(), eq(_mockAuthentication))).thenReturn(entityResponse); + Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse); _entityChangeEventHook.invoke(event); @@ -540,8 +535,7 @@ public void testInvokeDataProcessInstanceRunEventComplete() throws Exception { final EntityResponse entityResponse = buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships)); - Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn), - any(), eq(_mockAuthentication))).thenReturn(entityResponse); + Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse); _entityChangeEventHook.invoke(event); @@ -618,7 +612,7 @@ private EntityChangeEventGeneratorRegistry createEntityChangeEventGeneratorRegis // Run change event generators registry.register(ASSERTION_RUN_EVENT_ASPECT_NAME, new AssertionRunEventChangeEventGenerator()); registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, - new DataProcessInstanceRunEventChangeEventGenerator(_mockClient, _mockAuthentication)); + new DataProcessInstanceRunEventChangeEventGenerator(_mockClient)); return registry; } @@ -668,14 +662,14 @@ private EntityRegistry createMockEntityRegistry() { return registry; } - private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent) throws Exception { + private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent) throws Exception { verifyProducePlatformEvent(mockClient, platformEvent, true); } - private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception { + private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception { // Verify event has been emitted. verify(mockClient, Mockito.times(1)).producePlatformEvent(eq(CHANGE_EVENT_PLATFORM_EVENT_NAME), Mockito.anyString(), - argThat(new PlatformEventMatcher(platformEvent)), Mockito.any(Authentication.class)); + argThat(new PlatformEventMatcher(platformEvent))); if (noMoreInteractions) { Mockito.verifyNoMoreInteractions(_mockClient); diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java index 78d304d67bfc0..6a2a05aa4b8c0 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.kafka.hook.siblings; -import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.linkedin.common.FabricType; @@ -19,7 +18,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -44,19 +43,16 @@ public class SiblingAssociationHookTest { private SiblingAssociationHook _siblingAssociationHook; - RestliEntityClient _mockEntityClient; + SystemRestliEntityClient _mockEntityClient; EntitySearchService _mockSearchService; - Authentication _mockAuthentication; @BeforeMethod public void setupTest() { EntityRegistry registry = new ConfigEntityRegistry( SiblingAssociationHookTest.class.getClassLoader().getResourceAsStream("test-entity-registry-siblings.yml")); - _mockEntityClient = Mockito.mock(RestliEntityClient.class); + _mockEntityClient = Mockito.mock(SystemRestliEntityClient.class); _mockSearchService = Mockito.mock(EntitySearchService.class); - _mockAuthentication = Mockito.mock(Authentication.class); - _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, _mockAuthentication, - true); + _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, true); _siblingAssociationHook.setEnabled(true); } @@ -69,15 +65,13 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { EntityResponse mockResponse = new EntityResponse(); mockResponse.setAspects(mockResponseMap); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); Mockito.when( _mockEntityClient.getV2( - DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _mockAuthentication + ImmutableSet.of(SUB_TYPES_ASPECT_NAME) )).thenReturn(mockResponse); @@ -105,10 +99,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -121,10 +112,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test @@ -132,23 +120,20 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { SubTypes mockSourceSubtypesAspect = new SubTypes(); mockSourceSubtypesAspect.setTypeNames(new StringArray(ImmutableList.of("model"))); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); EnvelopedAspectMap mockResponseMap = new EnvelopedAspectMap(); mockResponseMap.put(SUB_TYPES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(mockSourceSubtypesAspect.data()))); EntityResponse mockResponse = new EntityResponse(); mockResponse.setAspects(mockResponseMap); - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); Mockito.when( _mockEntityClient.getV2( - DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - ImmutableSet.of(SUB_TYPES_ASPECT_NAME), - _mockAuthentication - )).thenReturn(mockResponse); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME))).thenReturn(mockResponse); MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED); @@ -174,15 +159,12 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.eq(proposal), eq(true)); } @Test public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception { - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT); @@ -208,10 +190,7 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -224,15 +203,12 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { - Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true); SearchResult returnSearchResult = new SearchResult(); SearchEntityArray returnEntityArray = new SearchEntityArray(); @@ -271,10 +247,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true)); final Siblings sourceSiblingsAspect = new Siblings() .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)")))) @@ -287,10 +260,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true)); } @Test public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception { @@ -309,10 +279,7 @@ public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception { _siblingAssociationHook.invoke(event); - Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.eq(_mockAuthentication) - ); + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.any(), eq(true)); } @@ -335,12 +302,7 @@ public void testInvokeWhenSourceUrnHasTwoUpstreamsOneDbt() throws Exception { _siblingAssociationHook.invoke(event); - Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal( - Mockito.any(), - Mockito.eq(_mockAuthentication) - ); - - + Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal(Mockito.any(), eq(true)); } private MetadataChangeLog createEvent(String entityType, String aspectName, ChangeType changeType) { diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java index ef80c49ec4520..dc5a6cd23295b 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.metadata.ingestion.IngestionScheduler; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; @@ -44,8 +44,8 @@ public class MCLSpringTestConfiguration { @MockBean public IngestionScheduler ingestionScheduler; - @MockBean - public RestliEntityClient entityClient; + @MockBean(name = "systemRestliEntityClient") + public SystemRestliEntityClient entityClient; @MockBean public ElasticSearchService searchService; diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java index 9b4fe15c11fc5..f0c59240a9ba4 100644 --- a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.kafka; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java index 74679d30b2945..c30dd6e6f96dc 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java @@ -4,8 +4,7 @@ import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; import com.linkedin.entity.Entity; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; @@ -40,15 +39,14 @@ @Slf4j @Component @Conditional(MetadataChangeProposalProcessorCondition.class) -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class, - DataHubKafkaProducerFactory.class}) +@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class}) @EnableKafka @RequiredArgsConstructor public class MetadataChangeEventsProcessor { @NonNull private final Authentication systemAuthentication; - private final RestliEntityClient entityClient; + private final SystemRestliEntityClient entityClient; private final Producer kafkaProducer; private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index 289d70ef8c0e9..79f8c90af8ec7 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -2,9 +2,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; @@ -35,15 +33,13 @@ @Slf4j @Component -@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class, - DataHubKafkaProducerFactory.class}) +@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class}) @Conditional(MetadataChangeProposalProcessorCondition.class) @EnableKafka @RequiredArgsConstructor public class MetadataChangeProposalsProcessor { - private final Authentication systemAuthentication; - private final RestliEntityClient entityClient; + private final SystemRestliEntityClient entityClient; private final Producer kafkaProducer; private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); @@ -64,7 +60,7 @@ public void consume(final ConsumerRecord consumerRecord) event = EventUtils.avroToPegasusMCP(record); log.debug("MetadataChangeProposal {}", event); // TODO: Get this from the event itself. - entityClient.ingestProposal(event, this.systemAuthentication, false); + entityClient.ingestProposal(event, false); } catch (Throwable throwable) { log.error("MCP Processor Error", throwable); log.error("Message: {}", record); diff --git a/metadata-service/configuration/build.gradle b/metadata-service/configuration/build.gradle index 30fa3079d29a4..bf79469633b0f 100644 --- a/metadata-service/configuration/build.gradle +++ b/metadata-service/configuration/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation externalDependency.slf4jApi implementation externalDependency.springCore + implementation externalDependency.springBeans compileOnly externalDependency.lombok diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java index 38934cb9a3d2f..aff0e23e3b337 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.config.cache; +import com.linkedin.metadata.config.cache.client.ClientCacheConfiguration; import lombok.Data; @@ -8,4 +9,5 @@ public class CacheConfiguration { PrimaryCacheConfiguration primary; HomepageCacheConfiguration homepage; SearchCacheConfiguration search; + ClientCacheConfiguration client; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java new file mode 100644 index 0000000000000..3cf7ef20797bb --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java @@ -0,0 +1,10 @@ +package com.linkedin.metadata.config.cache.client; + + +public interface ClientCacheConfig { + boolean isEnabled(); + boolean isStatsEnabled(); + int getStatsIntervalSeconds(); + int getDefaultTTLSeconds(); + int getMaxBytes(); +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java new file mode 100644 index 0000000000000..d940bbe135e55 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +@Data +public class ClientCacheConfiguration { + EntityClientCacheConfig entityClient; + UsageClientCacheConfig usageClient; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java new file mode 100644 index 0000000000000..595b614f2f599 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java @@ -0,0 +1,17 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +import java.util.Map; + +@Data +public class EntityClientCacheConfig implements ClientCacheConfig { + private boolean enabled; + private boolean statsEnabled; + private int statsIntervalSeconds; + private int defaultTTLSeconds; + private int maxBytes; + + // entityName -> aspectName -> cache ttl override + private Map> entityAspectTTLSeconds; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java new file mode 100644 index 0000000000000..3aebec9422ed8 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.config.cache.client; + +import lombok.Data; + +@Data +public class UsageClientCacheConfig implements ClientCacheConfig { + private boolean enabled; + private boolean statsEnabled; + private int statsIntervalSeconds; + private int defaultTTLSeconds; + private int maxBytes; +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java similarity index 87% rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java index 1542407697d1b..c10399c4f3e70 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java @@ -1,14 +1,18 @@ -package com.linkedin.gms.factory.spring; +package com.linkedin.metadata.spring; -import java.io.IOException; -import java.util.Properties; import org.springframework.beans.factory.config.YamlPropertiesFactoryBean; import org.springframework.core.env.PropertiesPropertySource; import org.springframework.core.env.PropertySource; import org.springframework.core.io.support.EncodedResource; import org.springframework.core.io.support.PropertySourceFactory; +import java.io.IOException; +import java.util.Properties; + +/** + * Required for Spring to parse the application.yml provided by this module + */ public class YamlPropertySourceFactory implements PropertySourceFactory { @Override diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index ea959bebf25ad..42749d8205d21 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -327,3 +327,27 @@ cache: lineage: ttlSeconds: ${CACHE_SEARCH_LINEAGE_TTL_SECONDS:86400} # 1 day lightningThreshold: ${CACHE_SEARCH_LINEAGE_LIGHTNING_THRESHOLD:300} + client: + usageClient: + enabled: ${CACHE_CLIENT_USAGE_CLIENT_ENABLED:true} + statsEnabled: ${CACHE_CLIENT_USAGE_CLIENT_STATS_ENABLED:true} + statsIntervalSeconds: ${CACHE_CLIENT_USAGE_CLIENT_STATS_INTERVAL_SECONDS:120} + defaultTTLSeconds: ${CACHE_CLIENT_USAGE_CLIENT_TTL_SECONDS:86400} # 1 day + maxBytes: ${CACHE_CLIENT_USAGE_CLIENT_MAX_BYTES:52428800} # 50MB + entityClient: + enabled: ${CACHE_CLIENT_ENTITY_CLIENT_ENABLED:true} + statsEnabled: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_ENABLED:true} + statsIntervalSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_INTERVAL_SECONDS:120} + defaultTTLSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_TTL_SECONDS:0} # do not cache entity/aspects by default + maxBytes: ${CACHE_CLIENT_USAGE_ENTITY_MAX_BYTES:104857600} # 100MB + entityAspectTTLSeconds: + # cache user aspects for 20s + corpuser: + corpUserKey: 20 + corpUserInfo: 20 + corpUserEditableInfo: 20 + corpUserStatus: 20 + globalTags: 20 + status: 20 + corpUserCredentials: 20 + corpUserSettings: 20 diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java index ed072398178de..bf50a0c7b6473 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java @@ -19,7 +19,7 @@ import com.datahub.plugins.loader.PluginPermissionManagerImpl; import com.google.common.collect.ImmutableMap; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import java.nio.file.Path; import java.nio.file.Paths; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java index 30e03d87a8b56..5b298a453547a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java @@ -4,7 +4,7 @@ import com.datahub.authorization.DataHubAuthorizer; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java index fc010a1aa2cae..6b2a61882be90 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java @@ -1,9 +1,10 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.token.StatefulTokenService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import javax.annotation.Nonnull; + +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java index 9d29b8e77d02d..57598abf8095d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java @@ -4,7 +4,7 @@ import com.datahub.authentication.group.GroupService; import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java index 47f7ef0e0c1eb..105f4c677a9e4 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.invite.InviteTokenService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.secret.SecretService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java index ca52420b440b2..3df499ea9392e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java @@ -4,7 +4,7 @@ import com.datahub.authentication.user.NativeUserService; import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.secret.SecretService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java index 8e5e5e5cfc667..cc6f5c8272f9d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authentication.post.PostService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java index 42f3e797c33bd..8a85f63cdd66d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java @@ -3,7 +3,7 @@ package com.linkedin.gms.factory.auth; import com.datahub.authorization.role.RoleService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java index d6c171dc741e4..5bdd8cbf83c65 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java @@ -3,7 +3,7 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import lombok.Data; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java index c1c5acbc1fddc..51c7db5e37366 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java @@ -3,7 +3,7 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java index 89f196b056ee0..504618ba9cc6a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.common; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.systemmetadata.ESSystemMetadataDAO; import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java index d57da336429d9..0dce80b98964b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import org.apache.http.ssl.SSLContextBuilder; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java index 02e31c7dc4f57..94593eb1fb84c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java index a2816830f33ce..ada8466d302e6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java index 5ab5b14160e27..6bf8ff123b221 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.ebean.config.ServerConfig; import io.ebean.datasource.DataSourceConfig; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java index a364504d443f7..65b6115d6638e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.common; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.concurrent.TimeUnit; import org.neo4j.driver.AuthTokens; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java index 5f50b8f7f0508..3c40b30bfc7d1 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.common; import com.linkedin.gms.factory.auth.AwsRequestSigningApacheInterceptor; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.io.IOException; import javax.annotation.Nonnull; import javax.net.ssl.HostnameVerifier; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index e07630111a567..465480be344c7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -12,7 +12,7 @@ import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java index c0f2c8e1f1223..6eab711603c52 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.dataproduct; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.service.DataProductService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java index c9c3953f4d998..e1c24b805437b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java @@ -1,8 +1,11 @@ package com.linkedin.gms.factory.entity; +import com.datahub.authentication.Authentication; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.entity.client.RestliEntityClient; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; +import com.linkedin.metadata.client.SystemJavaEntityClient; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.event.EventProducer; @@ -53,12 +56,8 @@ public class JavaEntityClientFactory { @Qualifier("kafkaEventProducer") private EventProducer _eventProducer; - @Autowired - @Qualifier("restliEntityClient") - private RestliEntityClient _restliEntityClient; - @Bean("javaEntityClient") - public JavaEntityClient getJavaEntityClient() { + public JavaEntityClient getJavaEntityClient(@Qualifier("restliEntityClient") final RestliEntityClient restliEntityClient) { return new JavaEntityClient( _entityService, _deleteEntityService, @@ -68,6 +67,24 @@ public JavaEntityClient getJavaEntityClient() { _lineageSearchService, _timeseriesAspectService, _eventProducer, - _restliEntityClient); + restliEntityClient); + } + + @Bean("systemJavaEntityClient") + public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider, + @Qualifier("systemAuthentication") final Authentication systemAuthentication, + @Qualifier("systemRestliEntityClient") final RestliEntityClient restliEntityClient) { + return new SystemJavaEntityClient( + _entityService, + _deleteEntityService, + _entitySearchService, + _cachingEntitySearchService, + _searchService, + _lineageSearchService, + _timeseriesAspectService, + _eventProducer, + restliEntityClient, + systemAuthentication, + configurationProvider.getCache().getClient().getEntityClient()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java index e149ecedfa6f6..dfc5e835392df 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java @@ -1,10 +1,14 @@ package com.linkedin.gms.factory.entity; +import com.datahub.authentication.Authentication; import com.linkedin.entity.client.RestliEntityClient; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.entity.client.SystemRestliEntityClient; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.restli.client.Client; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -48,4 +52,17 @@ public RestliEntityClient getRestliEntityClient() { } return new RestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries); } + + @Bean("systemRestliEntityClient") + public SystemRestliEntityClient systemRestliEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider, + @Qualifier("systemAuthentication") final Authentication systemAuthentication) { + final Client restClient; + if (gmsUri != null) { + restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol); + } else { + restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); + } + return new SystemRestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries, + systemAuthentication, configurationProvider.getCache().getClient().getEntityClient()); + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java index b13bf5813d47e..ff56f19e4f8fd 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.entity; import com.datastax.oss.driver.api.core.CqlSession; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java index 471f079683d60..cda21f8907867 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.entityregistry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import java.io.IOException; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java index 150e1e48f39af..6dbb07309c7cc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.entityregistry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import java.io.FileNotFoundException; import java.net.MalformedURLException; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index d7aee59ca6dd1..c50b4c9088bc2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -20,6 +20,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.recommendation.RecommendationServiceFactory; +import com.linkedin.metadata.client.SystemJavaEntityClient; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; @@ -65,6 +66,10 @@ public class GraphQLEngineFactory { @Qualifier("javaEntityClient") private JavaEntityClient _entityClient; + @Autowired + @Qualifier("systemJavaEntityClient") + private SystemJavaEntityClient _systemEntityClient; + @Autowired @Qualifier("graphClient") private GraphClient _graphClient; @@ -170,6 +175,7 @@ public class GraphQLEngineFactory { protected GraphQLEngine getInstance() { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); args.setEntityClient(_entityClient); + args.setSystemEntityClient(_systemEntityClient); args.setGraphClient(_graphClient); args.setUsageClient(_usageClient); if (isAnalyticsEnabled) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java index b310ee25cbcbb..9beb617c4f6e8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java @@ -6,7 +6,7 @@ import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entity.RestliEntityClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java index 66f556066497f..675f015d9e378 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.kafka; import com.linkedin.gms.factory.common.TopicConventionFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.dao.producer.KafkaEventProducer; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.mxe.TopicConvention; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java index e58661b357e6a..c67a2e704681f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java @@ -6,7 +6,7 @@ import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.KafkaSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.Arrays; import java.util.Map; import org.apache.avro.generic.IndexedRecord; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java index 59f08e3733704..ac1cbbc5cc5ff 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java @@ -5,7 +5,7 @@ import com.amazonaws.services.schemaregistry.utils.AWSSchemaRegistryConstants; import com.amazonaws.services.schemaregistry.utils.AvroRecordType; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import java.util.HashMap; import java.util.Map; import java.util.Optional; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java index d0e11baab9089..7b72ba3f3bb88 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.kafka.schemaregistry; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClientConfig; import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; import io.confluent.kafka.serializers.KafkaAvroDeserializer; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java index f76549c90af68..8596a14b7fc24 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.lineage; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java index 512a0a1fa40ab..3a1f18692fdc6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.ownership; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.OwnershipTypeService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java index f2bdce908319e..f98c5bd50467d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.query; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.QueryService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java index 620af803723e7..c99d429e986b6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.common.IndexConventionFactory; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java index 7b20e798b79f2..845c63c32e0fd 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.search; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.client.CachingEntitySearchService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java index fc6f92b2678f3..5deffdb01d247 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import javax.annotation.Nonnull; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java index 495d77ccbb29f..b619ee9516dce 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java @@ -6,7 +6,7 @@ import com.linkedin.gms.factory.common.IndexConventionFactory; import com.linkedin.gms.factory.common.RestHighLevelClientFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.version.GitVersion; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 03dd2d072b4a0..a2a0dbaf89c79 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -7,7 +7,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java index 94b3f40849a13..e2eef83bc6e3f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.common.GraphServiceFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java index e1fe0399cb115..a186d2de770f3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.search; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java index 70307e51f3256..64bb0218a0d71 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.SearchService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java index b6bfef6ed8c78..840a370957706 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import org.springframework.beans.factory.annotation.Autowired; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java index 006b992191cfa..60bcd9ea22be6 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.search.views; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.ViewService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java index 73ec79fa7ed08..2e22d43913493 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.settings; import com.datahub.authentication.Authentication; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.service.SettingsService; import javax.annotation.Nonnull; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java index b2982d1f8ed9d..8178ce1399aa3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.telemetry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.mixpanel.mixpanelapi.MixpanelAPI; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java index aa8596786ce11..5385c5e81f804 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.telemetry; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.mixpanel.mixpanelapi.MessageBuilder; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java index 3b53a6fe92810..bb166af5501b3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.telemetry; import com.datahub.telemetry.TrackingService; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.secret.SecretService; import com.linkedin.metadata.version.GitVersion; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java index e9b9850c01a2b..89a7e7dd8d71a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.timeline; import com.datahub.authentication.Authentication; -import com.linkedin.entity.client.RestliEntityClient; +import com.linkedin.entity.client.SystemRestliEntityClient; import com.linkedin.metadata.timeline.eventgenerator.AssertionRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DataProcessInstanceRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DatasetPropertiesChangeEventGenerator; @@ -38,7 +38,7 @@ public class EntityChangeEventGeneratorRegistryFactory { @Singleton @Nonnull protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry() { - final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class); + final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class); final Authentication systemAuthentication = applicationContext.getBean(Authentication.class); final com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry registry = @@ -74,7 +74,7 @@ protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGenerat // Data Process Instance differs registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, - new DataProcessInstanceRunEventChangeEventGenerator(entityClient, systemAuthentication)); + new DataProcessInstanceRunEventChangeEventGenerator(entityClient)); // TODO: Add ML models. diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java index df9d80eb63a02..baa22d401387f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.timeline; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.timeline.TimelineService; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java index 717adf7d559b7..e3cc772f21c40 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java @@ -2,7 +2,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java index e4cbb92cebbba..e83cbc82d8067 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java @@ -1,10 +1,14 @@ package com.linkedin.gms.factory.usage; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.datahub.authentication.Authentication; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.restli.DefaultRestliClientFactory; import com.linkedin.parseq.retry.backoff.ExponentialBackoff; import com.linkedin.restli.client.Client; import com.linkedin.usage.UsageClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -33,10 +37,15 @@ public class UsageClientFactory { @Value("${usageClient.numRetries:3}") private int numRetries; + @Autowired + @Qualifier("configurationProvider") + private ConfigurationProvider configurationProvider; + @Bean("usageClient") - public UsageClient getUsageClient() { + public UsageClient getUsageClient(@Qualifier("systemAuthentication") final Authentication systemAuthentication) { Client restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); - return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries); + return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries, systemAuthentication, + configurationProvider.getCache().getClient().getUsageClient()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java index 91fc58d074ed6..e038cb230c458 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.boot.factories; import com.linkedin.gms.factory.entity.RetentionServiceFactory; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java index e0fec07452302..b4e87eedea542 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java @@ -1,7 +1,7 @@ package io.datahubproject.openapi.util; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; import io.datahubproject.openapi.dto.UpsertAspectRequest; diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle index 45cf008d3ca7d..b1b778b45c0b5 100644 --- a/metadata-service/restli-client/build.gradle +++ b/metadata-service/restli-client/build.gradle @@ -7,6 +7,7 @@ dependencies { api project(path: ':metadata-service:restli-api', configuration: 'restClient') api project(':metadata-events:mxe-schemas') api project(':metadata-utils') + implementation project(':metadata-service:configuration') implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java new file mode 100644 index 0000000000000..8aa0984be57b9 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java @@ -0,0 +1,134 @@ +package com.linkedin.common.client; + +import com.codahale.metrics.Gauge; +import com.github.benmanes.caffeine.cache.CacheLoader; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Expiry; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.github.benmanes.caffeine.cache.stats.CacheStats; +import com.linkedin.metadata.config.cache.client.ClientCacheConfig; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import lombok.Builder; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import java.util.function.Function; + +/** + * Generic cache with common configuration for limited weight, per item expiry, and batch loading + * @param key + * @param value + */ +@Slf4j +@Builder +public class ClientCache { + @NonNull + protected final C config; + @NonNull + protected final LoadingCache cache; + @NonNull + private final Function, Map> loadFunction; + @NonNull + private final Weigher weigher; + @NonNull + private final BiFunction ttlSecondsFunction; + + public @Nullable V get(@NonNull K key) { + return cache.get(key); + } + + public @NonNull Map<@NonNull K, @NonNull V> getAll(@NonNull Iterable keys) { + return cache.getAll(keys); + } + + public void refresh(@NonNull K key) { + cache.refresh(key); + } + + public static class ClientCacheBuilder { + + private ClientCacheBuilder cache(LoadingCache cache) { + return null; + } + private ClientCache build() { + return null; + } + + public ClientCache build(Class metricClazz) { + // loads data from entity client + CacheLoader loader = new CacheLoader<>() { + @Override + public V load(@NonNull K key) { + return loadAll(List.of(key)).get(key); + } + + @Override + @NonNull + public Map loadAll(@NonNull Iterable keys) { + return loadFunction.apply(keys); + } + }; + + // build cache + Caffeine caffeine = Caffeine.newBuilder() + .maximumWeight(config.getMaxBytes()) + // limit total size + .weigher(weigher) + .softValues() + // define per entity/aspect ttls + .expireAfter(new Expiry() { + public long expireAfterCreate(@NonNull K key, @NonNull V aspect, long currentTime) { + int ttlSeconds = ttlSecondsFunction.apply(config, key); + if (ttlSeconds < 0) { + ttlSeconds = Integer.MAX_VALUE; + } + return TimeUnit.SECONDS.toNanos(ttlSeconds); + } + public long expireAfterUpdate(@NonNull K key, @NonNull V aspect, + long currentTime, long currentDuration) { + return currentDuration; + } + public long expireAfterRead(@NonNull K key, @NonNull V aspect, + long currentTime, long currentDuration) { + return currentDuration; + } + }); + + if (config.isStatsEnabled()) { + caffeine.recordStats(); + } + + LoadingCache cache = caffeine.build(loader); + + if (config.isStatsEnabled()) { + ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1); + executor.scheduleAtFixedRate(() -> { + CacheStats cacheStats = cache.stats(); + + MetricUtils.gauge(metricClazz, "hitRate", () -> (Gauge) cacheStats::hitRate); + MetricUtils.gauge(metricClazz, "loadFailureRate", () -> + (Gauge) cacheStats::loadFailureRate); + MetricUtils.gauge(metricClazz, "evictionCount", () -> + (Gauge) cacheStats::evictionCount); + MetricUtils.gauge(metricClazz, "loadFailureCount", () -> + (Gauge) cacheStats::loadFailureCount); + MetricUtils.gauge(metricClazz, "averageLoadPenalty", () -> + (Gauge) cacheStats::averageLoadPenalty); + MetricUtils.gauge(metricClazz, "evictionWeight", () -> + (Gauge) cacheStats::evictionWeight); + + log.debug(metricClazz.getSimpleName() + ": " + cacheStats); + }, 0, config.getStatsIntervalSeconds(), TimeUnit.SECONDS); + } + + return new ClientCache<>(config, cache, loadFunction, weigher, ttlSecondsFunction); + } + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java new file mode 100644 index 0000000000000..3b35dc528915a --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java @@ -0,0 +1,141 @@ +package com.linkedin.entity.client; + +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.linkedin.common.client.ClientCache; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.util.Pair; +import lombok.Builder; +import lombok.Data; +import lombok.NonNull; + +import javax.annotation.Nonnull; +import java.util.Collection; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + +@Builder +public class EntityClientCache { + @NonNull + private EntityClientCacheConfig config; + @NonNull + private final ClientCache cache; + @NonNull + private BiFunction, Set, Map> loadFunction; + + public EntityResponse getV2(@Nonnull final Urn urn, @Nonnull final Set aspectNames) { + return batchGetV2(Set.of(urn), aspectNames).get(urn); + } + + public Map batchGetV2(@Nonnull final Set urns, @Nonnull final Set aspectNames) { + final Map response; + + if (config.isEnabled()) { + Set keys = urns.stream() + .flatMap(urn -> aspectNames.stream() + .map(a -> Key.builder().urn(urn).aspectName(a).build())) + .collect(Collectors.toSet()); + Map envelopedAspects = cache.getAll(keys); + + Set responses = envelopedAspects.entrySet().stream() + .map(entry -> Pair.of(entry.getKey().getUrn(), entry.getValue())) + .collect(Collectors.groupingBy(Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet()))) + .entrySet().stream().map(e -> toEntityResponse(e.getKey(), e.getValue())) + .collect(Collectors.toSet()); + + response = responses.stream().collect(Collectors.toMap(EntityResponse::getUrn, Function.identity())); + } else { + response = loadFunction.apply(urns, aspectNames); + } + + return response; + } + + private static EntityResponse toEntityResponse(Urn urn, Collection envelopedAspects) { + final EntityResponse response = new EntityResponse(); + response.setUrn(urn); + response.setEntityName(urnToEntityName(urn)); + response.setAspects(new EnvelopedAspectMap( + envelopedAspects.stream() + .collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect)) + )); + return response; + } + + public static class EntityClientCacheBuilder { + + private EntityClientCacheBuilder cache(LoadingCache cache) { + return this; + } + + public EntityClientCache build(Class metricClazz) { + // estimate size + Weigher weighByEstimatedSize = (key, value) -> + value.getValue().data().values().parallelStream() + .mapToInt(o -> o.toString().getBytes().length) + .sum(); + + // batch loads data from entity client (restli or java) + Function, Map> loader = (Iterable keys) -> { + Map> keysByEntity = StreamSupport.stream(keys.spliterator(), true) + .collect(Collectors.groupingBy(Key::getEntityName, Collectors.toSet())); + + Stream> results = keysByEntity.entrySet().parallelStream() + .flatMap(entry -> { + Set urns = entry.getValue().stream() + .map(Key::getUrn) + .collect(Collectors.toSet()); + Set aspects = entry.getValue().stream() + .map(Key::getEntityName) + .collect(Collectors.toSet()); + return loadFunction.apply(urns, aspects).entrySet().stream(); + }) + .flatMap(resp -> resp.getValue().getAspects().values().stream() + .map(envAspect -> { + Key key = Key.builder().urn(resp.getKey()).aspectName(envAspect.getName()).build(); + return Map.entry(key, envAspect); + })); + + return results.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + }; + + // ideally the cache time comes from caching headers from service, but configuration driven for now + BiFunction ttlSeconds = (config, key) -> + Optional.ofNullable(config.getEntityAspectTTLSeconds()).orElse(Map.of()) + .getOrDefault(key.getEntityName(), Map.of()) + .getOrDefault(key.getAspectName(), config.getDefaultTTLSeconds()); + + cache = ClientCache.builder() + .weigher(weighByEstimatedSize) + .config(config) + .loadFunction(loader) + .ttlSecondsFunction(ttlSeconds) + .build(metricClazz); + + return new EntityClientCache(config, cache, loadFunction); + } + } + + @Data + @Builder + protected static class Key { + private final Urn urn; + private final String aspectName; + + public String getEntityName() { + return urn.getEntityType(); + } + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java new file mode 100644 index 0000000000000..94067abd0cf65 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java @@ -0,0 +1,91 @@ +package com.linkedin.entity.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.PlatformEvent; +import com.linkedin.r2.RemoteInvocationException; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.net.URISyntaxException; +import java.util.Map; +import java.util.Set; + +/** + * Adds entity/aspect cache and assumes system authentication + */ +public interface SystemEntityClient extends EntityClient { + + EntityClientCache getEntityClientCache(); + Authentication getSystemAuthentication(); + + /** + * Builds the cache + * @param systemAuthentication system authentication + * @param cacheConfig cache configuration + * @return the cache + */ + default EntityClientCache buildEntityClientCache(Class metricClazz, Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) { + return EntityClientCache.builder() + .config(cacheConfig) + .loadFunction((Set urns, Set aspectNames) -> { + try { + String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); + + if (urns.stream().anyMatch(urn -> !urn.getEntityType().equals(entityName))) { + throw new IllegalArgumentException("Urns must be of the same entity type. RestliEntityClient API limitation."); + } + + return batchGetV2(entityName, urns, aspectNames, systemAuthentication); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + }).build(metricClazz); + } + + /** + * Get an entity by urn with the given aspects + * @param urn the id of the entity + * @param aspectNames aspects of the entity + * @return response object + * @throws RemoteInvocationException + * @throws URISyntaxException + */ + @Nullable + default EntityResponse getV2(@Nonnull Urn urn, @Nonnull Set aspectNames) + throws RemoteInvocationException, URISyntaxException { + return getEntityClientCache().getV2(urn, aspectNames); + } + + /** + * Batch get a set of aspects for a single entity type, multiple ids with the given aspects. + * + * @param urns the urns of the entities to batch get + * @param aspectNames the aspect names to batch get + * @throws RemoteInvocationException + */ + @Nonnull + default Map batchGetV2(@Nonnull Set urns, @Nonnull Set aspectNames) + throws RemoteInvocationException, URISyntaxException { + return getEntityClientCache().batchGetV2(urns, aspectNames); + } + + default void producePlatformEvent(@Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event) throws Exception { + producePlatformEvent(name, key, event, getSystemAuthentication()); + } + + default boolean exists(@Nonnull Urn urn) throws RemoteInvocationException { + return exists(urn, getSystemAuthentication()); + } + + default String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, final boolean async) throws RemoteInvocationException { + return ingestProposal(metadataChangeProposal, getSystemAuthentication(), async); + } + + default void setWritable(boolean canWrite) throws RemoteInvocationException { + setWritable(canWrite, getSystemAuthentication()); + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java new file mode 100644 index 0000000000000..f3c343534209c --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java @@ -0,0 +1,25 @@ +package com.linkedin.entity.client; + +import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; +import com.linkedin.parseq.retry.backoff.BackoffPolicy; +import com.linkedin.restli.client.Client; +import lombok.Getter; + +import javax.annotation.Nonnull; + +/** + * Restli backed SystemEntityClient + */ +@Getter +public class SystemRestliEntityClient extends RestliEntityClient implements SystemEntityClient { + private final EntityClientCache entityClientCache; + private final Authentication systemAuthentication; + + public SystemRestliEntityClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, + Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) { + super(restliClient, backoffPolicy, retryCount); + this.systemAuthentication = systemAuthentication; + this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, systemAuthentication, cacheConfig); + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java index 47a15ccdd3ffc..d2b8499615e8d 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java @@ -5,6 +5,7 @@ import com.linkedin.common.WindowDuration; import com.linkedin.common.client.BaseClient; +import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig; import com.linkedin.parseq.retry.backoff.BackoffPolicy; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.restli.client.Client; @@ -17,19 +18,39 @@ public class UsageClient extends BaseClient { private static final UsageStatsRequestBuilders USAGE_STATS_REQUEST_BUILDERS = new UsageStatsRequestBuilders(); - public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount) { + private final UsageClientCache usageClientCache; + + public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, + Authentication systemAuthentication, UsageClientCacheConfig cacheConfig) { super(restliClient, backoffPolicy, retryCount); + this.usageClientCache = UsageClientCache.builder() + .config(cacheConfig) + .loadFunction((String resource, UsageTimeRange range) -> { + try { + return getUsageStats(resource, range, systemAuthentication); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + }).build(); + } + + /** + * Gets a specific version of downstream {@link EntityRelationships} for the given dataset. + * Using cache and system authentication. + * Validate permissions before use! + */ + @Nonnull + public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) { + return usageClientCache.getUsageStats(resource, range); } /** * Gets a specific version of downstream {@link EntityRelationships} for the given dataset. */ @Nonnull - public UsageQueryResult getUsageStats( - @Nonnull String resource, - @Nonnull UsageTimeRange range, - @Nonnull Authentication authentication - ) throws RemoteInvocationException, URISyntaxException { + private UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range, + @Nonnull Authentication authentication) + throws RemoteInvocationException, URISyntaxException { final UsageStatsDoQueryRangeRequestBuilder requestBuilder = USAGE_STATS_REQUEST_BUILDERS.actionQueryRange() .resourceParam(resource) .durationParam(WindowDuration.DAY) diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java new file mode 100644 index 0000000000000..a04c1e90fb4a3 --- /dev/null +++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java @@ -0,0 +1,75 @@ +package com.linkedin.usage; + +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import com.linkedin.common.client.ClientCache; +import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig; +import lombok.Builder; +import lombok.Data; +import lombok.NonNull; + +import javax.annotation.Nonnull; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + + +@Builder +public class UsageClientCache { + @NonNull + private UsageClientCacheConfig config; + @NonNull + private final ClientCache cache; + @NonNull + private BiFunction loadFunction; + + public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) { + if (config.isEnabled()) { + return cache.get(Key.builder().resource(resource).range(range).build()); + } else { + return loadFunction.apply(resource, range); + } + } + + public static class UsageClientCacheBuilder { + + private UsageClientCacheBuilder cache(LoadingCache cache) { + return this; + } + + public UsageClientCache build() { + // estimate size + Weigher weighByEstimatedSize = (key, value) -> + value.data().values().parallelStream() + .mapToInt(o -> o.toString().getBytes().length) + .sum(); + + // batch loads data from usage client + Function, Map> loader = (Iterable keys) -> + StreamSupport.stream(keys.spliterator(), true) + .map(k -> Map.entry(k, loadFunction.apply(k.getResource(), k.getRange()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // default ttl only + BiFunction ttlSeconds = (config, key) -> config.getDefaultTTLSeconds(); + + cache = ClientCache.builder() + .weigher(weighByEstimatedSize) + .config(config) + .loadFunction(loader) + .ttlSecondsFunction(ttlSeconds) + .build(UsageClientCache.class); + + return new UsageClientCache(config, cache, loadFunction); + } + } + + @Data + @Builder + protected static class Key { + private final String resource; + private final UsageTimeRange range; + } +} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java index 3d90cba85b0fb..9a8848e090fb8 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.utils.metrics; import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.SharedMetricRegistries; import com.codahale.metrics.Timer; @@ -48,4 +49,8 @@ public static Timer timer(Class klass, String metricName) { public static Timer timer(String metricName) { return REGISTRY.timer(MetricRegistry.name(metricName)); } + + public static > T gauge(Class clazz, String metricName, MetricRegistry.MetricSupplier supplier) { + return REGISTRY.gauge(MetricRegistry.name(clazz, metricName), supplier); + } }