diff --git a/build.gradle b/build.gradle index 025c588da2b52..cf55a59cfe694 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,7 @@ buildscript { dependencies { classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4' - classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.8.1' + classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0' classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0" classpath "com.palantir.gradle.gitversion:gradle-git-version:3.0.0" @@ -67,8 +67,8 @@ project.ext.externalDependency = [ 'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2', 'antlr4': 'org.antlr:antlr4:4.7.2', 'assertJ': 'org.assertj:assertj-core:3.11.1', - 'avro_1_7': 'org.apache.avro:avro:1.7.7', - 'avroCompiler_1_7': 'org.apache.avro:avro-compiler:1.7.7', + 'avro': 'org.apache.avro:avro:1.11.3', + 'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3', 'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10', 'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1', 'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8', @@ -127,7 +127,6 @@ project.ext.externalDependency = [ 'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1', 'jna': 'net.java.dev.jna:jna:5.12.1', 'jsonPatch': 'com.github.java-json-tools:json-patch:1.13', - 'jsonSchemaAvro': 'com.github.fge:json-schema-avro:0.1.4', 'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1', 'jsonSmart': 'net.minidev:json-smart:2.4.9', 'json': 'org.json:json:20230227', diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 65b3780431db9..1f9d30d520171 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -5,7 +5,14 @@ buildscript { } dependencies { - implementation('io.acryl:json-schema-avro:0.1.5') { + /** + * Forked version of abandoned repository: https://github.com/fge/json-schema-avro + * Maintainer last active 2014, we maintain an active fork of this repository to utilize mapping Avro schemas to Json Schemas, + * repository is as close to official library for this as you can get. Original maintainer is one of the authors of Json Schema spec. + * Other companies are also separately maintaining forks (like: https://github.com/java-json-tools/json-schema-avro). + * We have built several customizations on top of it for various bug fixes, especially around union scheams + */ + implementation('io.acryl:json-schema-avro:0.2.2') { exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind' exclude group: 'com.google.guava', module: 'guava' } diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index 98f3b82285eda..fe04c3629fe58 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -56,7 +56,7 @@ public class AuthModule extends AbstractModule { * Pac4j Stores Session State in a browser-side cookie in encrypted fashion. This configuration * value provides a stable encryption base from which to derive the encryption key. * - * We hash this value (SHA1), then take the first 16 bytes as the AES key. + * We hash this value (SHA256), then take the first 16 bytes as the AES key. */ private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key"; private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider"; @@ -93,7 +93,7 @@ protected void configure() { // it to hex and slice the first 16 bytes, because AES key length must strictly // have a specific length. final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF); - final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8)); + final String aesKeyHash = DigestUtils.sha256Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8)); final String aesEncryptionKey = aesKeyHash.substring(0, 16); playCacheCookieStore = new PlayCookieSessionStore( new ShiroAesDataEncrypter(aesEncryptionKey.getBytes())); diff --git a/datahub-frontend/app/auth/AuthUtils.java b/datahub-frontend/app/auth/AuthUtils.java index 80bd631d0db70..386eee725c83d 100644 --- a/datahub-frontend/app/auth/AuthUtils.java +++ b/datahub-frontend/app/auth/AuthUtils.java @@ -41,6 +41,11 @@ public class AuthUtils { */ public static final String SYSTEM_CLIENT_SECRET_CONFIG_PATH = "systemClientSecret"; + /** + * Cookie name for redirect url that is manually separated from the session to reduce size + */ + public static final String REDIRECT_URL_COOKIE_NAME = "REDIRECT_URL"; + public static final CorpuserUrn DEFAULT_ACTOR_URN = new CorpuserUrn("datahub"); public static final String LOGIN_ROUTE = "/login"; @@ -77,7 +82,9 @@ public static boolean isEligibleForForwarding(Http.Request req) { * as well as their agreement to determine authentication status. */ public static boolean hasValidSessionCookie(final Http.Request req) { - return req.session().data().containsKey(ACTOR) + Map sessionCookie = req.session().data(); + return sessionCookie.containsKey(ACCESS_TOKEN) + && sessionCookie.containsKey(ACTOR) && req.getCookie(ACTOR).isPresent() && req.session().data().get(ACTOR).equals(req.getCookie(ACTOR).get().value()); } diff --git a/datahub-frontend/app/auth/cookie/CustomCookiesModule.java b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java new file mode 100644 index 0000000000000..a6dbd69a93889 --- /dev/null +++ b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java @@ -0,0 +1,22 @@ +package auth.cookie; + +import com.google.inject.AbstractModule; +import play.api.libs.crypto.CookieSigner; +import play.api.libs.crypto.CookieSignerProvider; +import play.api.mvc.DefaultFlashCookieBaker; +import play.api.mvc.FlashCookieBaker; +import play.api.mvc.SessionCookieBaker; + + +public class CustomCookiesModule extends AbstractModule { + + @Override + public void configure() { + bind(CookieSigner.class).toProvider(CookieSignerProvider.class); + // We override the session cookie baker to not use a fallback, this prevents using an old URL Encoded cookie + bind(SessionCookieBaker.class).to(CustomSessionCookieBaker.class); + // We don't care about flash cookies, we don't use them + bind(FlashCookieBaker.class).to(DefaultFlashCookieBaker.class); + } + +} diff --git a/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala new file mode 100644 index 0000000000000..6f0a6604fa64b --- /dev/null +++ b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala @@ -0,0 +1,25 @@ +package auth.cookie + +import com.google.inject.Inject +import play.api.http.{SecretConfiguration, SessionConfiguration} +import play.api.libs.crypto.CookieSigner +import play.api.mvc.DefaultSessionCookieBaker + +import scala.collection.immutable.Map + +/** + * Overrides default fallback to URL Encoding behavior, prevents usage of old URL encoded session cookies + * @param config + * @param secretConfiguration + * @param cookieSigner + */ +class CustomSessionCookieBaker @Inject() ( + override val config: SessionConfiguration, + override val secretConfiguration: SecretConfiguration, + cookieSigner: CookieSigner +) extends DefaultSessionCookieBaker(config, secretConfiguration, cookieSigner) { + // Has to be a Scala class because it extends a trait with concrete implementations, Scala does compilation tricks + + // Forces use of jwt encoding and disallows fallback to legacy url encoding + override def decode(encodedData: String): Map[String, String] = jwtCodec.decode(encodedData) +} diff --git a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java index 3f864ed5abddf..baca144610ec4 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java @@ -1,19 +1,9 @@ package auth.sso.oidc; -import java.text.ParseException; import java.util.Map.Entry; import java.util.Optional; -import com.nimbusds.jose.Algorithm; -import com.nimbusds.jose.Header; -import com.nimbusds.jose.JWEAlgorithm; -import com.nimbusds.jose.JWSAlgorithm; -import com.nimbusds.jose.util.Base64URL; -import com.nimbusds.jose.util.JSONObjectUtils; -import com.nimbusds.jwt.EncryptedJWT; import com.nimbusds.jwt.JWTParser; -import com.nimbusds.jwt.SignedJWT; -import net.minidev.json.JSONObject; import org.pac4j.core.authorization.generator.AuthorizationGenerator; import org.pac4j.core.context.WebContext; import org.pac4j.core.profile.AttributeLocation; @@ -63,32 +53,5 @@ public Optional generate(WebContext context, UserProfile profile) { return Optional.ofNullable(profile); } - - private static JWT parse(final String s) throws ParseException { - final int firstDotPos = s.indexOf("."); - - if (firstDotPos == -1) { - throw new ParseException("Invalid JWT serialization: Missing dot delimiter(s)", 0); - } - - Base64URL header = new Base64URL(s.substring(0, firstDotPos)); - JSONObject jsonObject; - - try { - jsonObject = JSONObjectUtils.parse(header.decodeToString()); - } catch (ParseException e) { - throw new ParseException("Invalid unsecured/JWS/JWE header: " + e.getMessage(), 0); - } - - Algorithm alg = Header.parseAlgorithm(jsonObject); - - if (alg instanceof JWSAlgorithm) { - return SignedJWT.parse(s); - } else if (alg instanceof JWEAlgorithm) { - return EncryptedJWT.parse(s); - } else { - throw new AssertionError("Unexpected algorithm type: " + alg); - } - } } diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java index 4bde0872fc082..7164710f4e0de 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java @@ -38,6 +38,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -49,19 +50,21 @@ import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.pac4j.core.config.Config; +import org.pac4j.core.context.Cookie; import org.pac4j.core.engine.DefaultCallbackLogic; import org.pac4j.core.http.adapter.HttpActionAdapter; import org.pac4j.core.profile.CommonProfile; import org.pac4j.core.profile.ProfileManager; import org.pac4j.core.profile.UserProfile; +import org.pac4j.core.util.Pac4jConstants; import org.pac4j.play.PlayWebContext; import play.mvc.Result; import auth.sso.SsoManager; -import static auth.AuthUtils.createActorCookie; -import static auth.AuthUtils.createSessionMap; +import static auth.AuthUtils.*; import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME; +import static org.pac4j.play.store.PlayCookieSessionStore.*; import static play.mvc.Results.internalServerError; @@ -97,6 +100,9 @@ public OidcCallbackLogic(final SsoManager ssoManager, final Authentication syste public Result perform(PlayWebContext context, Config config, HttpActionAdapter httpActionAdapter, String defaultUrl, Boolean saveInSession, Boolean multiProfile, Boolean renewSession, String defaultClient) { + + setContextRedirectUrl(context); + final Result result = super.perform(context, config, httpActionAdapter, defaultUrl, saveInSession, multiProfile, renewSession, defaultClient); @@ -111,6 +117,15 @@ public Result perform(PlayWebContext context, Config config, return handleOidcCallback(oidcConfigs, result, context, getProfileManager(context)); } + @SuppressWarnings("unchecked") + private void setContextRedirectUrl(PlayWebContext context) { + Optional redirectUrl = context.getRequestCookies().stream() + .filter(cookie -> REDIRECT_URL_COOKIE_NAME.equals(cookie.getName())).findFirst(); + redirectUrl.ifPresent( + cookie -> context.getSessionStore().set(context, Pac4jConstants.REQUESTED_URL, + JAVA_SER_HELPER.deserializeFromBytes(uncompressBytes(Base64.getDecoder().decode(cookie.getValue()))))); + } + private Result handleOidcCallback(final OidcConfigs oidcConfigs, final Result result, final PlayWebContext context, final ProfileManager profileManager) { diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java index e9ddfb2611ceb..4f89f4f67e149 100644 --- a/datahub-frontend/app/controllers/AuthenticationController.java +++ b/datahub-frontend/app/controllers/AuthenticationController.java @@ -13,14 +13,15 @@ import com.typesafe.config.Config; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Optional; import javax.annotation.Nonnull; import javax.inject.Inject; import org.apache.commons.lang3.StringUtils; import org.pac4j.core.client.Client; +import org.pac4j.core.context.Cookie; import org.pac4j.core.exception.http.FoundAction; import org.pac4j.core.exception.http.RedirectionAction; -import org.pac4j.core.util.Pac4jConstants; import org.pac4j.play.PlayWebContext; import org.pac4j.play.http.PlayHttpActionAdapter; import org.pac4j.play.store.PlaySessionStore; @@ -33,18 +34,9 @@ import play.mvc.Results; import security.AuthenticationManager; -import static auth.AuthUtils.DEFAULT_ACTOR_URN; -import static auth.AuthUtils.EMAIL; -import static auth.AuthUtils.FULL_NAME; -import static auth.AuthUtils.INVITE_TOKEN; -import static auth.AuthUtils.LOGIN_ROUTE; -import static auth.AuthUtils.PASSWORD; -import static auth.AuthUtils.RESET_TOKEN; -import static auth.AuthUtils.TITLE; -import static auth.AuthUtils.USER_NAME; -import static auth.AuthUtils.createActorCookie; -import static auth.AuthUtils.createSessionMap; +import static auth.AuthUtils.*; import static org.pac4j.core.client.IndirectClient.ATTEMPTED_AUTHENTICATION_SUFFIX; +import static org.pac4j.play.store.PlayCookieSessionStore.*; // TODO add logging. @@ -297,8 +289,12 @@ private Optional redirectToIdentityProvider(Http.RequestHeader request, } private void configurePac4jSessionStore(PlayWebContext context, Client client, String redirectPath) { - // Set the originally requested path for post-auth redirection. - _playSessionStore.set(context, Pac4jConstants.REQUESTED_URL, new FoundAction(redirectPath)); + // Set the originally requested path for post-auth redirection. We split off into a separate cookie from the session + // to reduce size of the session cookie + FoundAction foundAction = new FoundAction(redirectPath); + byte[] javaSerBytes = JAVA_SER_HELPER.serializeToBytes(foundAction); + String serialized = Base64.getEncoder().encodeToString(compressBytes(javaSerBytes)); + context.addResponseCookie(new Cookie(REDIRECT_URL_COOKIE_NAME, serialized)); // This is to prevent previous login attempts from being cached. // We replicate the logic here, which is buried in the Pac4j client. if (_playSessionStore.get(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX) != null) { diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 18d901d5ee7dd..1a62c8547e721 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -22,11 +22,16 @@ play.application.loader = play.inject.guice.GuiceApplicationLoader play.http.parser.maxMemoryBuffer = 10MB play.http.parser.maxMemoryBuffer = ${?DATAHUB_PLAY_MEM_BUFFER_SIZE} -# TODO: Disable legacy URL encoding eventually +play.modules.disabled += "play.api.mvc.LegacyCookiesModule" play.modules.disabled += "play.api.mvc.CookiesModule" -play.modules.enabled += "play.api.mvc.LegacyCookiesModule" +play.modules.enabled += "auth.cookie.CustomCookiesModule" play.modules.enabled += "auth.AuthModule" +jwt { + # 'alg' https://tools.ietf.org/html/rfc7515#section-4.1.1 + signatureAlgorithm = "HS256" +} + # We override the Akka server provider to allow setting the max header count to a higher value # This is useful while using proxies like Envoy that result in the frontend server rejecting GMS # responses as there's more than the max of 64 allowed headers @@ -199,10 +204,14 @@ auth.native.enabled = ${?AUTH_NATIVE_ENABLED} # auth.native.enabled = false # auth.oidc.enabled = false # (or simply omit oidc configurations) -# Login session expiration time +# Login session expiration time, controls when the actor cookie is expired on the browser side auth.session.ttlInHours = 24 auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS} +# Control the length of time a session token is valid +play.http.session.maxAge = 24h +play.http.session.maxAge = ${?MAX_SESSION_TOKEN_AGE} + analytics.enabled = true analytics.enabled = ${?DATAHUB_ANALYTICS_ENABLED} diff --git a/datahub-frontend/test/app/ApplicationTest.java b/datahub-frontend/test/app/ApplicationTest.java index 417fd79e76bbd..f27fefdb79669 100644 --- a/datahub-frontend/test/app/ApplicationTest.java +++ b/datahub-frontend/test/app/ApplicationTest.java @@ -1,6 +1,11 @@ package app; +import com.nimbusds.jwt.JWT; +import com.nimbusds.jwt.JWTClaimsSet; +import com.nimbusds.jwt.JWTParser; import controllers.routes; +import java.text.ParseException; +import java.util.Date; import no.nav.security.mock.oauth2.MockOAuth2Server; import no.nav.security.mock.oauth2.token.DefaultOAuth2TokenCallback; import okhttp3.mockwebserver.MockResponse; @@ -27,8 +32,6 @@ import java.io.IOException; import java.net.InetAddress; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -149,7 +152,7 @@ public void testOpenIdConfig() { } @Test - public void testHappyPathOidc() throws InterruptedException { + public void testHappyPathOidc() throws ParseException { browser.goTo("/authenticate"); assertEquals("", browser.url()); @@ -157,8 +160,23 @@ public void testHappyPathOidc() throws InterruptedException { assertEquals(TEST_USER, actorCookie.getValue()); Cookie sessionCookie = browser.getCookie("PLAY_SESSION"); - assertTrue(sessionCookie.getValue().contains("token=" + TEST_TOKEN)); - assertTrue(sessionCookie.getValue().contains("actor=" + URLEncoder.encode(TEST_USER, StandardCharsets.UTF_8))); + String jwtStr = sessionCookie.getValue(); + JWT jwt = JWTParser.parse(jwtStr); + JWTClaimsSet claims = jwt.getJWTClaimsSet(); + Map data = (Map) claims.getClaim("data"); + assertEquals(TEST_TOKEN, data.get("token")); + assertEquals(TEST_USER, data.get("actor")); + // Default expiration is 24h, so should always be less than current time + 1 day since it stamps the time before this executes + assertTrue(claims.getExpirationTime().compareTo(new Date(System.currentTimeMillis() + (24 * 60 * 60 * 1000))) < 0); + } + + @Test + public void testAPI() throws ParseException { + testHappyPathOidc(); + int requestCount = _gmsServer.getRequestCount(); + + browser.goTo("/api/v2/graphql/"); + assertEquals(++requestCount, _gmsServer.getRequestCount()); } @Test diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 3ba0cc1f747e3..b99f712034fe0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -821,6 +821,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("glossaryNode", getResolver(glossaryNodeType)) .dataFetcher("domain", getResolver((domainType))) .dataFetcher("dataPlatform", getResolver(dataPlatformType)) + .dataFetcher("dataPlatformInstance", getResolver(dataPlatformInstanceType)) .dataFetcher("mlFeatureTable", getResolver(mlFeatureTableType)) .dataFetcher("mlFeature", getResolver(mlFeatureType)) .dataFetcher("mlPrimaryKey", getResolver(mlPrimaryKeyType)) @@ -1291,7 +1292,8 @@ private void configureCorpUserResolvers(final RuntimeWiring.Builder builder) { */ private void configureCorpGroupResolvers(final RuntimeWiring.Builder builder) { builder.type("CorpGroup", typeWiring -> typeWiring - .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))); + .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) + .dataFetcher("exists", new EntityExistsResolver(entityService))); builder.type("CorpGroupInfo", typeWiring -> typeWiring .dataFetcher("admins", new LoadableTypeBatchResolver<>(corpUserType, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index 3089b8c8fc2db..03e63c7fb472f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -4,7 +4,7 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -90,7 +90,7 @@ public static boolean canManageTags(@Nonnull QueryContext context) { } public static boolean canDeleteEntity(@Nonnull Urn entityUrn, @Nonnull QueryContext context) { - return isAuthorized(context, Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE); + return isAuthorized(context, Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE); } public static boolean canManageUserCredentials(@Nonnull QueryContext context) { @@ -173,7 +173,7 @@ public static boolean canDeleteQuery(@Nonnull Urn entityUrn, @Nonnull List public static boolean isAuthorized( @Nonnull QueryContext context, - @Nonnull Optional resourceSpec, + @Nonnull Optional resourceSpec, @Nonnull PoliciesConfig.Privilege privilege) { final Authorizer authorizer = context.getAuthorizer(); final String actor = context.getActorUrn(); @@ -196,7 +196,7 @@ public static boolean isAuthorized( @Nonnull String resource, @Nonnull DisjunctivePrivilegeGroup privilegeGroup ) { - final ResourceSpec resourceSpec = new ResourceSpec(resourceType, resource); + final EntitySpec resourceSpec = new EntitySpec(resourceType, resource); return AuthUtil.isAuthorized(authorizer, actor, Optional.of(resourceSpec), privilegeGroup); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java index 23be49c7e7140..2873866bb34f7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.dataset; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.linkedin.common.urn.Urn; @@ -104,7 +104,7 @@ private CorpUser createPartialUser(final Urn userUrn) { private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) { return AuthorizationUtils.isAuthorized(context, - Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())), + Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())), PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java index 20361830ad5a5..e4bec8e896fdf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.dataset; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -52,7 +52,7 @@ public CompletableFuture get(DataFetchingEnvironment environme private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) { return AuthorizationUtils.isAuthorized(context, - Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())), + Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())), PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java index 7db0b6f826a04..1140c031f1d35 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java @@ -5,6 +5,7 @@ import com.linkedin.datahub.graphql.generated.IngestionConfig; import com.linkedin.datahub.graphql.generated.IngestionSchedule; import com.linkedin.datahub.graphql.generated.IngestionSource; +import com.linkedin.datahub.graphql.generated.StringMapEntry; import com.linkedin.datahub.graphql.generated.StructuredReport; import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; import com.linkedin.entity.EntityResponse; @@ -21,6 +22,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; @@ -143,6 +145,14 @@ public static IngestionConfig mapIngestionSourceConfig(final DataHubIngestionSou result.setVersion(config.getVersion()); result.setExecutorId(config.getExecutorId()); result.setDebugMode(config.isDebugMode()); + if (config.getExtraArgs() != null) { + List extraArgs = config.getExtraArgs() + .keySet() + .stream() + .map(key -> new StringMapEntry(key, config.getExtraArgs().get(key))) + .collect(Collectors.toList()); + result.setExtraArgs(extraArgs); + } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java index e5064e6620526..ea20b837e0a1f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java @@ -117,6 +117,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (ingestionSourceInfo.getConfig().hasDebugMode()) { debugMode = ingestionSourceInfo.getConfig().isDebugMode() ? "true" : "false"; } + if (ingestionSourceInfo.getConfig().hasExtraArgs()) { + arguments.putAll(ingestionSourceInfo.getConfig().getExtraArgs()); + } arguments.put(DEBUG_MODE_ARG_NAME, debugMode); execInput.setArgs(new StringMap(arguments)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java index 2ce394ad5ba84..68e334bd976f8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java @@ -1,10 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.ingest.source; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; +import com.linkedin.datahub.graphql.generated.StringMapEntryInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceConfigInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceScheduleInput; @@ -17,6 +19,8 @@ import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import java.net.URISyntaxException; @@ -108,6 +112,12 @@ private DataHubIngestionSourceConfig mapConfig(final UpdateIngestionSourceConfig if (input.getDebugMode() != null) { result.setDebugMode(input.getDebugMode()); } + if (input.getExtraArgs() != null) { + Map extraArgs = input.getExtraArgs() + .stream() + .collect(Collectors.toMap(StringMapEntryInput::getKey, StringMapEntryInput::getValue)); + result.setExtraArgs(new StringMap(extraArgs)); + } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 197ca8640559d..f13ebf8373e91 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.load; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.generated.Entity; @@ -79,7 +79,7 @@ public TimeSeriesAspectResolver( private boolean isAuthorized(QueryContext context, String urn) { if (_entityName.equals(Constants.DATASET_ENTITY_NAME) && _aspectName.equals( Constants.DATASET_PROFILE_ASPECT_NAME)) { - return AuthorizationUtils.isAuthorized(context, Optional.of(new ResourceSpec(_entityName, urn)), + return AuthorizationUtils.isAuthorized(context, Optional.of(new EntitySpec(_entityName, urn)), PoliciesConfig.VIEW_DATASET_PROFILE_PRIVILEGE); } return true; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index 5ca7007d98e43..3f2dab0a5ba71 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -2,14 +2,11 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.CorpuserUrn; - import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddOwnerInput; -import com.linkedin.datahub.graphql.generated.OwnerEntityType; import com.linkedin.datahub.graphql.generated.OwnerInput; -import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.entity.EntityService; @@ -20,7 +17,6 @@ import lombok.extern.slf4j.Slf4j; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; -import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*; @Slf4j @@ -32,30 +28,33 @@ public class AddOwnerResolver implements DataFetcher> @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final AddOwnerInput input = bindArgument(environment.getArgument("input"), AddOwnerInput.class); - Urn ownerUrn = Urn.createFromString(input.getOwnerUrn()); - OwnerEntityType ownerEntityType = input.getOwnerEntityType(); - OwnershipType type = input.getType() == null ? OwnershipType.NONE : input.getType(); - String ownershipUrn = input.getOwnershipTypeUrn() == null ? mapOwnershipTypeToEntity(type.name()) : input.getOwnershipTypeUrn(); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); + OwnerInput.Builder ownerInputBuilder = OwnerInput.builder(); + ownerInputBuilder.setOwnerUrn(input.getOwnerUrn()); + ownerInputBuilder.setOwnerEntityType(input.getOwnerEntityType()); + if (input.getType() != null) { + ownerInputBuilder.setType(input.getType()); + } + if (input.getOwnershipTypeUrn() != null) { + ownerInputBuilder.setOwnershipTypeUrn(input.getOwnershipTypeUrn()); + } + OwnerInput ownerInput = ownerInputBuilder.build(); if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) { throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } return CompletableFuture.supplyAsync(() -> { - OwnerUtils.validateAddInput( - ownerUrn, input.getOwnershipTypeUrn(), ownerEntityType, - targetUrn, - _entityService - ); + OwnerUtils.validateAddOwnerInput(ownerInput, ownerUrn, _entityService); + try { log.debug("Adding Owner. input: {}", input); Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn()); OwnerUtils.addOwnersToResources( - ImmutableList.of(new OwnerInput(input.getOwnerUrn(), ownerEntityType, type, ownershipUrn)), + ImmutableList.of(ownerInput), ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)), actor, _entityService diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index 06424efa83819..4e5b5bdb2a651 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -39,7 +39,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } - OwnerUtils.validateAddInput( + OwnerUtils.validateAddOwnerInput( owners, targetUrn, _entityService diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java index 019c044d81ab3..5beaeecae673f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -53,8 +53,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw private void validateOwners(List owners) { for (OwnerInput ownerInput : owners) { - OwnerUtils.validateOwner(UrnUtils.getUrn(ownerInput.getOwnerUrn()), ownerInput.getOwnerEntityType(), - UrnUtils.getUrn(ownerInput.getOwnershipTypeUrn()), _entityService); + OwnerUtils.validateOwner(ownerInput, _entityService); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index d2f7f896e5953..7233995804423 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -50,7 +50,7 @@ public static void addOwnersToResources( ) { final List changes = new ArrayList<>(); for (ResourceRefInput resource : resources) { - changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService)); + changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), entityService)); } EntityUtils.ingestChangeProposals(changes, entityService, actor, false); } @@ -69,7 +69,7 @@ public static void removeOwnersFromResources( } - private static MetadataChangeProposal buildAddOwnersProposal(List owners, Urn resourceUrn, Urn actor, EntityService entityService) { + static MetadataChangeProposal buildAddOwnersProposal(List owners, Urn resourceUrn, EntityService entityService) { Ownership ownershipAspect = (Ownership) EntityUtils.getAspectFromEntity( resourceUrn.toString(), Constants.OWNERSHIP_ASPECT_NAME, entityService, @@ -181,18 +181,13 @@ public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context, orPrivilegeGroups); } - public static Boolean validateAddInput( + public static Boolean validateAddOwnerInput( List owners, Urn resourceUrn, EntityService entityService ) { for (OwnerInput owner : owners) { - boolean result = validateAddInput( - UrnUtils.getUrn(owner.getOwnerUrn()), - owner.getOwnershipTypeUrn(), - owner.getOwnerEntityType(), - resourceUrn, - entityService); + boolean result = validateAddOwnerInput(owner, resourceUrn, entityService); if (!result) { return false; } @@ -200,44 +195,29 @@ public static Boolean validateAddInput( return true; } - public static Boolean validateAddInput( - Urn ownerUrn, - String ownershipEntityUrn, - OwnerEntityType ownerEntityType, + public static Boolean validateAddOwnerInput( + OwnerInput owner, Urn resourceUrn, EntityService entityService ) { - if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) { - throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp group urn.", resourceUrn)); - } - - if (OwnerEntityType.CORP_USER.equals(ownerEntityType) && !Constants.CORP_USER_ENTITY_NAME.equals(ownerUrn.getEntityType())) { - throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp user urn.", resourceUrn)); - } - if (!entityService.exists(resourceUrn)) { throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Resource does not exist.", resourceUrn)); } - if (!entityService.exists(ownerUrn)) { - throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Owner %s does not exist.", resourceUrn, ownerUrn)); - } - - if (ownershipEntityUrn != null && !entityService.exists(UrnUtils.getUrn(ownershipEntityUrn))) { - throw new IllegalArgumentException(String.format("Failed to change ownership type for resource %s. Ownership Type " - + "%s does not exist.", resourceUrn, ownershipEntityUrn)); - } + validateOwner(owner, entityService); return true; } public static void validateOwner( - Urn ownerUrn, - OwnerEntityType ownerEntityType, - Urn ownershipEntityUrn, + OwnerInput owner, EntityService entityService ) { + + OwnerEntityType ownerEntityType = owner.getOwnerEntityType(); + Urn ownerUrn = UrnUtils.getUrn(owner.getOwnerUrn()); + if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) { throw new IllegalArgumentException( String.format("Failed to change ownership for resource(s). Expected a corp group urn, found %s", ownerUrn)); @@ -252,9 +232,14 @@ public static void validateOwner( throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Owner with urn %s does not exist.", ownerUrn)); } - if (!entityService.exists(ownershipEntityUrn)) { - throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Ownership type with " - + "urn %s does not exist.", ownershipEntityUrn)); + if (owner.getOwnershipTypeUrn() != null && !entityService.exists(UrnUtils.getUrn(owner.getOwnershipTypeUrn()))) { + throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Custom Ownership type with " + + "urn %s does not exist.", owner.getOwnershipTypeUrn())); + } + + if (owner.getType() == null && owner.getOwnershipTypeUrn() == null) { + throw new IllegalArgumentException("Failed to change ownership for resource(s). Expected either " + + "type or ownershipTypeUrn to be specified."); } } @@ -269,11 +254,11 @@ public static Boolean validateRemoveInput( } public static void addCreatorAsOwner( - QueryContext context, - String urn, - OwnerEntityType ownerEntityType, - OwnershipType ownershipType, - EntityService entityService) { + QueryContext context, + String urn, + OwnerEntityType ownerEntityType, + OwnershipType ownershipType, + EntityService entityService) { try { Urn actorUrn = CorpuserUrn.createFromString(context.getActorUrn()); String ownershipTypeUrn = mapOwnershipTypeToEntity(ownershipType.name()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java index 2f20fdaf1e9b1..11f7793db82c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java @@ -2,7 +2,7 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.authorization.DataHubAuthorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.GetGrantedPrivilegesInput; @@ -33,8 +33,8 @@ public CompletableFuture get(final DataFetchingEnvironment environme if (!isAuthorized(context, actor)) { throw new AuthorizationException("Unauthorized to get privileges for the given author."); } - final Optional resourceSpec = Optional.ofNullable(input.getResourceSpec()) - .map(spec -> new ResourceSpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn())); + final Optional resourceSpec = Optional.ofNullable(input.getResourceSpec()) + .map(spec -> new EntitySpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn())); if (context.getAuthorizer() instanceof AuthorizerChain) { DataHubAuthorizer dataHubAuthorizer = ((AuthorizerChain) context.getAuthorizer()).getDefaultAuthorizer(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java index 2423fc31ea52e..87614e1332528 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java @@ -4,16 +4,25 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.types.dataplatforminstance.mappers.DataPlatformInstanceMapper; +import com.linkedin.datahub.graphql.types.mappers.AutoCompleteResultsMapper; +import com.linkedin.datahub.graphql.types.SearchableEntityType; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.AutoCompleteResult; +import com.linkedin.metadata.query.filter.Filter; import graphql.execution.DataFetcherResult; +import org.apache.commons.lang3.NotImplementedException; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -22,7 +31,10 @@ import java.util.function.Function; import java.util.stream.Collectors; -public class DataPlatformInstanceType implements com.linkedin.datahub.graphql.types.EntityType { +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; + +public class DataPlatformInstanceType implements SearchableEntityType, + com.linkedin.datahub.graphql.types.EntityType { static final Set ASPECTS_TO_FETCH = ImmutableSet.of( Constants.DATA_PLATFORM_INSTANCE_KEY_ASPECT_NAME, @@ -84,4 +96,24 @@ public List> batchLoad(@Nonnull List filters, + int start, + int count, + @Nonnull final QueryContext context) throws Exception { + throw new NotImplementedException("Searchable type (deprecated) not implemented on DataPlatformInstance entity type"); + } + + @Override + public AutoCompleteResults autoComplete(@Nonnull String query, + @Nullable String field, + @Nullable Filter filters, + int limit, + @Nonnull final QueryContext context) throws Exception { + final AutoCompleteResult result = _entityClient.autoComplete(DATA_PLATFORM_INSTANCE_ENTITY_NAME, query, + filters, limit, context.getAuthentication()); + return AutoCompleteResultsMapper.map(result); + } + } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 39f86948c77c4..b37a8f34fa056 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -226,6 +226,11 @@ type Query { listOwnershipTypes( "Input required for listing custom ownership types" input: ListOwnershipTypesInput!): ListOwnershipTypesResult! + + """ + Fetch a Data Platform Instance by primary key (urn) + """ + dataPlatformInstance(urn: String!): DataPlatformInstance } """ @@ -3783,6 +3788,11 @@ type CorpGroup implements Entity { Additional read only info about the group """ info: CorpGroupInfo @deprecated + + """ + Whether or not this entity exists on DataHub + """ + exists: Boolean } """ diff --git a/datahub-graphql-core/src/main/resources/ingestion.graphql b/datahub-graphql-core/src/main/resources/ingestion.graphql index 69c8aff124583..21f9fb2633119 100644 --- a/datahub-graphql-core/src/main/resources/ingestion.graphql +++ b/datahub-graphql-core/src/main/resources/ingestion.graphql @@ -332,6 +332,11 @@ type IngestionConfig { Advanced: Whether or not to run ingestion in debug mode """ debugMode: Boolean + + """ + Advanced: Extra arguments for the ingestion run. + """ + extraArgs: [StringMapEntry!] } """ @@ -483,6 +488,11 @@ input UpdateIngestionSourceConfigInput { Whether or not to run ingestion in debug mode """ debugMode: Boolean + + """ + Extra arguments for the ingestion run. + """ + extraArgs: [StringMapEntryInput!] } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 4cabdb04afe77..e0cde5a2db9f9 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -458,6 +458,26 @@ enum FilterOperator { Represents the relation: The field exists. If the field is an array, the field is either not present or empty. """ EXISTS + + """ + Represent the relation greater than, e.g. ownerCount > 5 + """ + GREATER_THAN + + """ + Represent the relation greater than or equal to, e.g. ownerCount >= 5 + """ + GREATER_THAN_OR_EQUAL_TO + + """ + Represent the relation less than, e.g. ownerCount < 3 + """ + LESS_THAN + + """ + Represent the relation less than or equal to, e.g. ownerCount <= 3 + """ + LESS_THAN_OR_EQUAL_TO } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java index ccaab44f60dd4..8bfc32e1999ae 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java @@ -5,7 +5,7 @@ import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.linkedin.common.urn.GlossaryNodeUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -89,17 +89,17 @@ private void setUpTests() throws Exception { Mockito.any(Authentication.class) )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode3Aspects))); - final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); + final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3); - final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); + final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2); - final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); + final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1); } - private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, ResourceSpec resourceSpec) { + private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, EntitySpec resourceSpec) { final AuthorizationRequest authorizationRequest = new AuthorizationRequest( userUrn, privilege, @@ -150,7 +150,7 @@ public void testCanManageChildrenEntitiesAuthorized() throws Exception { // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString()); + final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString()); mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec); assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn, mockClient)); @@ -162,7 +162,7 @@ public void testCanManageChildrenEntitiesUnauthorized() throws Exception { // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString()); + final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString()); mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec); @@ -175,13 +175,13 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorized() throws Exceptio // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); + final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec3); - final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); + final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2); - final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); + final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1); assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient)); @@ -193,13 +193,13 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorized() throws Except // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); + final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3); - final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); + final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2); - final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); + final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1); assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient)); @@ -211,10 +211,10 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorizedLevel2() throws Ex // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); + final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec2); - final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); + final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1); assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient)); @@ -226,10 +226,10 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorizedLevel2() throws // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); + final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3); - final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); + final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2); assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn2, mockClient)); @@ -241,7 +241,7 @@ public void testCanManageChildrenRecursivelyEntitiesNoLevel2() throws Exception // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); - final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); + final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString()); mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3); assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn3, mockClient)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java index 2538accc694fb..16d8da9169a8f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java @@ -26,7 +26,7 @@ public class UpsertIngestionSourceResolverTest { "Test source", "mysql", "Test source description", new UpdateIngestionSourceScheduleInput("* * * * *", "UTC"), - new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false) + new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false, null) ); @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index efc0c5dfcf36d..329d71ec125db 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -2,6 +2,11 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; +import com.linkedin.common.Owner; +import com.linkedin.common.OwnerArray; +import com.linkedin.common.Ownership; +import com.linkedin.common.OwnershipSource; +import com.linkedin.common.OwnershipSourceType; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -28,6 +33,7 @@ public class AddOwnersResolverTest { private static final String TEST_ENTITY_URN = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; private static final String TEST_OWNER_1_URN = "urn:li:corpuser:test-id-1"; private static final String TEST_OWNER_2_URN = "urn:li:corpuser:test-id-2"; + private static final String TEST_OWNER_3_URN = "urn:li:corpGroup:test-id-3"; @Test public void testGetSuccessNoExistingOwners() throws Exception { @@ -75,33 +81,41 @@ public void testGetSuccessNoExistingOwners() throws Exception { } @Test - public void testGetSuccessExistingOwners() throws Exception { + public void testGetSuccessExistingOwnerNewType() throws Exception { EntityService mockService = getMockEntityService(); + com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray( + ImmutableList.of(new Owner() + .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN)) + .setType(com.linkedin.common.OwnershipType.NONE) + .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL)) + ))); + Mockito.when(mockService.getAspect( - Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)), - Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), - Mockito.eq(0L))) - .thenReturn(null); + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(oldOwnership); Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true); Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true); - Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true); Mockito.when(mockService.exists(Urn.createFromString( - OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name())))) - .thenReturn(true); + OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name())))) + .thenReturn(true); AddOwnersResolver resolver = new AddOwnersResolver(mockService); // Execute resolver QueryContext mockContext = getMockAllowContext(); DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + AddOwnersInput input = new AddOwnersInput(ImmutableList.of( - new OwnerInput(TEST_OWNER_1_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER, - OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())), - new OwnerInput(TEST_OWNER_2_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER, - OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())) + OwnerInput.builder() + .setOwnerUrn(TEST_OWNER_1_URN) + .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())) + .setOwnerEntityType(OwnerEntityType.CORP_USER) + .build() ), TEST_ENTITY_URN); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); @@ -111,11 +125,126 @@ public void testGetSuccessExistingOwners() throws Exception { verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( - Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) + Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) ); + } + + @Test + public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { + EntityService mockService = getMockEntityService(); + + com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray( + ImmutableList.of(new Owner() + .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN)) + .setType(com.linkedin.common.OwnershipType.TECHNICAL_OWNER) + .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL)) + ))); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(oldOwnership); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString( + OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name())))) + .thenReturn(true); + + AddOwnersResolver resolver = new AddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + + AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder() + .setOwnerUrn(TEST_OWNER_1_URN) + .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())) + .setOwnerEntityType(OwnerEntityType.CORP_USER) + .build() + ), TEST_ENTITY_URN); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + // Unable to easily validate exact payload due to the injected timestamp + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( - Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN)) + Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) + ); + } + + @Test + public void testGetSuccessMultipleOwnerTypes() throws Exception { + EntityService mockService = getMockEntityService(); + + com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray( + ImmutableList.of(new Owner() + .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN)) + .setType(com.linkedin.common.OwnershipType.NONE) + .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL)) + ))); + + Mockito.when(mockService.getAspect( + Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME), + Mockito.eq(0L))) + .thenReturn(oldOwnership); + + Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_3_URN))).thenReturn(true); + + Mockito.when(mockService.exists(Urn.createFromString( + OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name())))) + .thenReturn(true); + Mockito.when(mockService.exists(Urn.createFromString( + OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER.name())))) + .thenReturn(true); + + AddOwnersResolver resolver = new AddOwnersResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + + AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder() + .setOwnerUrn(TEST_OWNER_1_URN) + .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())) + .setOwnerEntityType(OwnerEntityType.CORP_USER) + .build(), + OwnerInput.builder() + .setOwnerUrn(TEST_OWNER_2_URN) + .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.BUSINESS_OWNER.name())) + .setOwnerEntityType(OwnerEntityType.CORP_USER) + .build(), + OwnerInput.builder() + .setOwnerUrn(TEST_OWNER_3_URN) + .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())) + .setOwnerEntityType(OwnerEntityType.CORP_GROUP) + .build() + ), TEST_ENTITY_URN); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + assertTrue(resolver.get(mockEnv).get()); + + // Unable to easily validate exact payload due to the injected timestamp + verifyIngestProposal(mockService, 1); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN)) + ); + + Mockito.verify(mockService, Mockito.times(1)).exists( + Mockito.eq(Urn.createFromString(TEST_OWNER_3_URN)) ); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java index 196eb24b52bf8..9c04c67dd3a3b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java @@ -5,7 +5,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -201,7 +201,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())) ); @@ -210,7 +210,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())) ); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java index a6b4887b0e882..78c894f27cbc3 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java @@ -5,7 +5,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -134,7 +134,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { DeleteQueryResolverTest.TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( DeleteQueryResolverTest.TEST_DATASET_URN.getEntityType(), DeleteQueryResolverTest.TEST_DATASET_URN.toString())) ); @@ -143,7 +143,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())) ); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java index 7a76b6d6be5a4..9b500b5fb3936 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java @@ -5,7 +5,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -206,7 +206,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())) ); @@ -215,7 +215,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())) ); @@ -224,7 +224,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString())) ); @@ -233,7 +233,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { TEST_ACTOR_URN.toString(), PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), Optional.of( - new ResourceSpec( + new EntitySpec( TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString())) ); diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx index d5e284af931df..53d2062277dec 100644 --- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx +++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx @@ -11,6 +11,7 @@ import { RoutedTabs } from '../../shared/RoutedTabs'; import GroupInfoSidebar from './GroupInfoSideBar'; import { GroupAssets } from './GroupAssets'; import { ErrorSection } from '../../shared/error/ErrorSection'; +import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage'; const messageStyle = { marginTop: '10%' }; @@ -110,6 +111,9 @@ export default function GroupProfile() { urn, }; + if (data?.corpGroup?.exists === false) { + return ; + } return ( <> {error && } diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx index d48ead2f5863e..9788d36af2c65 100644 --- a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx @@ -112,7 +112,11 @@ function CreateGlossaryEntityModal(props: Props) { - @@ -130,6 +134,7 @@ function CreateGlossaryEntityModal(props: Props) { > Name}> setIsMoveModalVisible(true)} > - +  Move diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx index 5352825708776..37a625f58100b 100644 --- a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx @@ -64,6 +64,7 @@ function MoveGlossaryEntityModal(props: Props) { return ( Cancel - + } > diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx index 579b8c9905da0..cb37c44a36caa 100644 --- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx @@ -41,7 +41,11 @@ export default function UpdateDescriptionModal({ title, description, original, o footer={ <> - diff --git a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx index 34d4f0cb3fe91..68a8cf4094362 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx @@ -57,7 +57,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => { return ( <> - { , - , ]} >
{ )} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx index de065d23e56e7..344c2aef87175 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx @@ -60,6 +60,7 @@ export const DocumentationTab = ({ properties }: { properties?: Props }) => {
- diff --git a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx index 11f54cb5078e6..a5262265fd23d 100644 --- a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx +++ b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx @@ -92,11 +92,12 @@ function BusinessGlossaryPage() { {(termsError || nodesError) && ( )} - + Business Glossary
diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx index 6c91a0f6f3f8f..13af19b0b6ac2 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx @@ -15,7 +15,7 @@ import { Message } from '../../shared/Message'; import TabToolbar from '../../entity/shared/components/styled/TabToolbar'; import { IngestionSourceBuilderModal } from './builder/IngestionSourceBuilderModal'; import { addToListIngestionSourcesCache, CLI_EXECUTOR_ID, removeFromListIngestionSourcesCache } from './utils'; -import { DEFAULT_EXECUTOR_ID, SourceBuilderState } from './builder/types'; +import { DEFAULT_EXECUTOR_ID, SourceBuilderState, StringMapEntryInput } from './builder/types'; import { IngestionSource, UpdateIngestionSourceInput } from '../../../types.generated'; import { SearchBar } from '../../search/SearchBar'; import { useEntityRegistry } from '../../useEntityRegistry'; @@ -173,6 +173,11 @@ export const IngestionSourceList = () => { setFocusSourceUrn(undefined); }; + const formatExtraArgs = (extraArgs): StringMapEntryInput[] => { + if (extraArgs === null || extraArgs === undefined) return []; + return extraArgs.map((entry) => ({ key: entry.key, value: entry.value })); + }; + const createOrUpdateIngestionSource = ( input: UpdateIngestionSourceInput, resetState: () => void, @@ -294,6 +299,7 @@ export const IngestionSourceList = () => { (recipeBuilderState.config?.executorId as string)) || DEFAULT_EXECUTOR_ID, debugMode: recipeBuilderState.config?.debugMode || false, + extraArgs: formatExtraArgs(recipeBuilderState.config?.extraArgs || []), }, schedule: recipeBuilderState.schedule && { interval: recipeBuilderState.schedule?.interval as string, @@ -358,7 +364,12 @@ export const IngestionSourceList = () => {
- )} diff --git a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx index dba9b25e14e99..7a14b6a794189 100644 --- a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx @@ -167,7 +167,11 @@ export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps
-
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 913f8253ece5a..3092364bb8bdd 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -1,7 +1,7 @@ import { Button, Checkbox, Collapse, Form, Input, Typography } from 'antd'; import React from 'react'; import styled from 'styled-components'; -import { SourceBuilderState, StepProps } from './types'; +import { SourceBuilderState, StepProps, StringMapEntryInput } from './types'; const ControlsContainer = styled.div` display: flex; @@ -13,6 +13,10 @@ const SaveButton = styled(Button)` margin-right: 15px; `; +const ExtraEnvKey = 'extra_env_vars'; +const ExtraReqKey = 'extra_pip_requirements'; +const ExtraPluginKey = 'extra_pip_plugins'; + export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) => { const setName = (stagedName: string) => { const newState: SourceBuilderState = { @@ -55,6 +59,90 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) updateState(newState); }; + const retrieveExtraEnvs = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraEnvs = (envs: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfEnvVars: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number; + const value = { key: ExtraEnvKey, value: envs }; + if (indxOfEnvVars > -1) { + extraArgs[indxOfEnvVars] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + + const retrieveExtraDataHubPlugins = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraDataHubPlugins = (plugins: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfPlugins: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number; + const value = { key: ExtraPluginKey, value: plugins }; + if (indxOfPlugins > -1) { + extraArgs[indxOfPlugins] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + + const retrieveExtraReqs = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraReqs = (reqs: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfReqs: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number; + const value = { key: ExtraReqKey, value: reqs }; + if (indxOfReqs > -1) { + extraArgs[indxOfReqs] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + const onClickCreate = (shouldRun?: boolean) => { if (state.name !== undefined && state.name.length > 0) { submit(shouldRun); @@ -102,7 +190,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} /> @@ -116,6 +204,39 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) onChange={(event) => setDebugMode(event.target.checked)} /> + Extra Enviroment Variables}> + + Advanced: Set extra environment variables to an ingestion execution + + setExtraEnvs(event.target.value)} + /> + + Extra DataHub plugins}> + + Advanced: Set extra DataHub plugins for an ingestion execution + + setExtraDataHubPlugins(event.target.value)} + /> + + Extra Pip Libraries}> + + Advanced: Add extra pip libraries for an ingestion execution + + setExtraReqs(event.target.value)} + /> + @@ -123,6 +244,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
0)} onClick={() => onClickCreate(false)} > diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx index 4ddeb7b492595..bee9b04cee100 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx @@ -86,10 +86,20 @@ function RecipeBuilder(props: Props) { {sourceConfigs?.displayName} Recipe - switchViews(true)}> + switchViews(true)} + data-testid="recipe-builder-form-button" + > Form - switchViews(false)}> + switchViews(false)} + data-testid="recipe-builder-yaml-button" + > YAML @@ -114,7 +124,9 @@ function RecipeBuilder(props: Props) { - + )} diff --git a/datahub-web-react/src/app/ingest/source/builder/types.ts b/datahub-web-react/src/app/ingest/source/builder/types.ts index cfe0f27ae7dbe..2df467b7beba1 100644 --- a/datahub-web-react/src/app/ingest/source/builder/types.ts +++ b/datahub-web-react/src/app/ingest/source/builder/types.ts @@ -34,6 +34,18 @@ export type StepProps = { ingestionSources: SourceConfig[]; }; +export type StringMapEntryInput = { + /** + * The key of the map entry + */ + key: string; + + /** + * The value fo the map entry + */ + value: string; +}; + /** * The object represents the state of the Ingestion Source Builder form. */ @@ -91,5 +103,10 @@ export interface SourceBuilderState { * Advanced: Whether or not to run this ingestion source in debug mode */ debugMode?: boolean | null; + + /** + * Advanced: Extra arguments for the ingestion run. + */ + extraArgs?: StringMapEntryInput[] | null; }; } diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx index 849efabdcde97..00fdc89964f88 100644 --- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx +++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx @@ -2,6 +2,7 @@ import { DownloadOutlined } from '@ant-design/icons'; import { Button, message, Modal, Typography } from 'antd'; import React, { useEffect, useState } from 'react'; import styled from 'styled-components'; +import YAML from 'yamljs'; import { useGetIngestionExecutionRequestQuery } from '../../../../graphql/ingestion.generated'; import { ANTD_GRAY } from '../../../entity/shared/constants'; import { downloadFile } from '../../../search/utils/csvUtils'; @@ -65,6 +66,13 @@ const IngestedAssetsSection = styled.div` padding-right: 30px; `; +const RecipeSection = styled.div` + border-top: 1px solid ${ANTD_GRAY[4]}; + padding-top: 16px; + padding-left: 30px; + padding-right: 30px; +`; + const LogsSection = styled.div` padding-top: 16px; padding-left: 30px; @@ -91,6 +99,8 @@ type Props = { export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => { const [showExpandedLogs, setShowExpandedLogs] = useState(false); + const [showExpandedRecipe, setShowExpandedRecipe] = useState(false); + const { data, loading, error, refetch } = useGetIngestionExecutionRequestQuery({ variables: { urn } }); const output = data?.executionRequest?.result?.report || 'No output found.'; @@ -120,7 +130,18 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => { const resultSummaryText = (result && {getExecutionRequestSummaryText(result)}) || undefined; - const isOutputExpandable = output.length > 100; + + const recipeJson = data?.executionRequest?.input.arguments?.find((arg) => arg.key === 'recipe')?.value; + let recipeYaml: string; + try { + recipeYaml = recipeJson && YAML.stringify(JSON.parse(recipeJson), 8, 2).trim(); + } catch (e) { + recipeYaml = ''; + } + const recipe = showExpandedRecipe ? recipeYaml : recipeYaml?.split('\n').slice(0, 1).join('\n'); + + const areLogsExpandable = output.length > 100; + const isRecipeExpandable = recipeYaml?.includes('\n'); return ( { -
{`${logs}${!showExpandedLogs && isOutputExpandable ? '...' : ''}`}
- {isOutputExpandable && ( +
{`${logs}${!showExpandedLogs && areLogsExpandable ? '...' : ''}`}
+ {areLogsExpandable && ( setShowExpandedLogs(!showExpandedLogs)}> {showExpandedLogs ? 'Hide' : 'Show More'} )}
+ {recipe && ( + + Recipe + + + The recipe used for this ingestion run. + + + +
{`${recipe}${!showExpandedRecipe && isRecipeExpandable ? '\n...' : ''}`}
+
+ {isRecipeExpandable && ( + setShowExpandedRecipe((v) => !v)}> + {showExpandedRecipe ? 'Hide' : 'Show More'} + + )} +
+ )}
); diff --git a/datahub-web-react/src/app/ingest/source/utils.ts b/datahub-web-react/src/app/ingest/source/utils.ts index c372388e958b7..f789ed8434721 100644 --- a/datahub-web-react/src/app/ingest/source/utils.ts +++ b/datahub-web-react/src/app/ingest/source/utils.ts @@ -1,17 +1,19 @@ -import YAML from 'yamljs'; import { CheckCircleOutlined, ClockCircleOutlined, CloseCircleOutlined, + ExclamationCircleOutlined, LoadingOutlined, + StopOutlined, WarningOutlined, } from '@ant-design/icons'; -import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants'; +import YAML from 'yamljs'; +import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated'; import { EntityType, FacetMetadata } from '../../../types.generated'; -import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil'; import EntityRegistry from '../../entity/EntityRegistry'; +import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants'; +import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil'; import { SourceConfig } from './builder/types'; -import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated'; export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => { const sourceConfigs = ingestionSources.find((source) => source.name === sourceType); @@ -40,7 +42,9 @@ export function getPlaceholderRecipe(ingestionSources: SourceConfig[], type?: st export const RUNNING = 'RUNNING'; export const SUCCESS = 'SUCCESS'; +export const WARNING = 'WARNING'; export const FAILURE = 'FAILURE'; +export const CONNECTION_FAILURE = 'CONNECTION_FAILURE'; export const CANCELLED = 'CANCELLED'; export const UP_FOR_RETRY = 'UP_FOR_RETRY'; export const ROLLING_BACK = 'ROLLING_BACK'; @@ -56,8 +60,10 @@ export const getExecutionRequestStatusIcon = (status: string) => { return ( (status === RUNNING && LoadingOutlined) || (status === SUCCESS && CheckCircleOutlined) || + (status === WARNING && ExclamationCircleOutlined) || (status === FAILURE && CloseCircleOutlined) || - (status === CANCELLED && CloseCircleOutlined) || + (status === CONNECTION_FAILURE && CloseCircleOutlined) || + (status === CANCELLED && StopOutlined) || (status === UP_FOR_RETRY && ClockCircleOutlined) || (status === ROLLED_BACK && WarningOutlined) || (status === ROLLING_BACK && LoadingOutlined) || @@ -70,7 +76,9 @@ export const getExecutionRequestStatusDisplayText = (status: string) => { return ( (status === RUNNING && 'Running') || (status === SUCCESS && 'Succeeded') || + (status === WARNING && 'Completed') || (status === FAILURE && 'Failed') || + (status === CONNECTION_FAILURE && 'Connection Failed') || (status === CANCELLED && 'Cancelled') || (status === UP_FOR_RETRY && 'Up for Retry') || (status === ROLLED_BACK && 'Rolled Back') || @@ -83,21 +91,25 @@ export const getExecutionRequestStatusDisplayText = (status: string) => { export const getExecutionRequestSummaryText = (status: string) => { switch (status) { case RUNNING: - return 'Ingestion is running'; + return 'Ingestion is running...'; case SUCCESS: - return 'Ingestion successfully completed'; + return 'Ingestion succeeded with no errors or suspected missing data.'; + case WARNING: + return 'Ingestion completed with minor or intermittent errors.'; case FAILURE: - return 'Ingestion completed with errors'; + return 'Ingestion failed to complete, or completed with serious errors.'; + case CONNECTION_FAILURE: + return 'Ingestion failed due to network, authentication, or permission issues.'; case CANCELLED: - return 'Ingestion was cancelled'; + return 'Ingestion was cancelled.'; case ROLLED_BACK: - return 'Ingestion was rolled back'; + return 'Ingestion was rolled back.'; case ROLLING_BACK: - return 'Ingestion is in the process of rolling back'; + return 'Ingestion is in the process of rolling back.'; case ROLLBACK_FAILED: - return 'Ingestion rollback failed'; + return 'Ingestion rollback failed.'; default: - return 'Ingestion status not recognized'; + return 'Ingestion status not recognized.'; } }; @@ -105,7 +117,9 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => { return ( (status === RUNNING && REDESIGN_COLORS.BLUE) || (status === SUCCESS && 'green') || + (status === WARNING && 'orangered') || (status === FAILURE && 'red') || + (status === CONNECTION_FAILURE && 'crimson') || (status === UP_FOR_RETRY && 'orange') || (status === CANCELLED && ANTD_GRAY[9]) || (status === ROLLED_BACK && 'orange') || diff --git a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx index 68e91983babdb..d3e01df3a66e8 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx @@ -67,8 +67,8 @@ export default function PolicyDetailsModal({ policy, visible, onClose, privilege const isMetadataPolicy = policy?.type === PolicyType.Metadata; const resources = convertLegacyResourceFilter(policy?.resources); - const resourceTypes = getFieldValues(resources?.filter, 'RESOURCE_TYPE') || []; - const resourceEntities = getFieldValues(resources?.filter, 'RESOURCE_URN') || []; + const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || []; + const resourceEntities = getFieldValues(resources?.filter, 'URN') || []; const domains = getFieldValues(resources?.filter, 'DOMAIN') || []; const { diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx index 1520388a5033a..b8e1505fceaec 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx @@ -67,8 +67,8 @@ export default function PolicyPrivilegeForm({ } = useAppConfig(); const resources: ResourceFilter = convertLegacyResourceFilter(maybeResources) || EMPTY_POLICY.resources; - const resourceTypes = getFieldValues(resources.filter, 'RESOURCE_TYPE') || []; - const resourceEntities = getFieldValues(resources.filter, 'RESOURCE_URN') || []; + const resourceTypes = getFieldValues(resources.filter, 'TYPE') || []; + const resourceEntities = getFieldValues(resources.filter, 'URN') || []; const getDisplayName = (entity) => { if (!entity) { @@ -145,10 +145,7 @@ export default function PolicyPrivilegeForm({ }; setResources({ ...resources, - filter: setFieldValues(filter, 'RESOURCE_TYPE', [ - ...resourceTypes, - createCriterionValue(selectedResourceType), - ]), + filter: setFieldValues(filter, 'TYPE', [...resourceTypes, createCriterionValue(selectedResourceType)]), }); }; @@ -160,7 +157,7 @@ export default function PolicyPrivilegeForm({ ...resources, filter: setFieldValues( filter, - 'RESOURCE_TYPE', + 'TYPE', resourceTypes?.filter((criterionValue) => criterionValue.value !== deselectedResourceType), ), }); @@ -173,7 +170,7 @@ export default function PolicyPrivilegeForm({ }; setResources({ ...resources, - filter: setFieldValues(filter, 'RESOURCE_URN', [ + filter: setFieldValues(filter, 'URN', [ ...resourceEntities, createCriterionValueWithEntity( resource, @@ -192,7 +189,7 @@ export default function PolicyPrivilegeForm({ ...resources, filter: setFieldValues( filter, - 'RESOURCE_URN', + 'URN', resourceEntities?.filter((criterionValue) => criterionValue.value !== resource), ), }); diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts index c7af7342f6efa..2f178fcdeb5c3 100644 --- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts +++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts @@ -99,10 +99,10 @@ export const convertLegacyResourceFilter = (resourceFilter: Maybe(); if (resourceFilter.type) { - criteria.push(createCriterion('RESOURCE_TYPE', [createCriterionValue(resourceFilter.type)])); + criteria.push(createCriterion('TYPE', [createCriterionValue(resourceFilter.type)])); } if (resourceFilter.resources && resourceFilter.resources.length > 0) { - criteria.push(createCriterion('RESOURCE_URN', resourceFilter.resources.map(createCriterionValue))); + criteria.push(createCriterion('URN', resourceFilter.resources.map(createCriterionValue))); } return { filter: { diff --git a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx index d5722429aaf6b..2bb76714d6119 100644 --- a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx +++ b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx @@ -39,6 +39,7 @@ export default function EntityPathsModal({ paths, resultEntityUrn, hideModal }: return ( Column path{paths.length > 1 && 's'} from{' '} diff --git a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts index 05419e5abed35..9a3af8fb8d56c 100644 --- a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts +++ b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts @@ -3,7 +3,7 @@ import { useLocation, useParams } from 'react-router'; import { useMemo } from 'react'; import { FacetFilterInput, EntityType } from '../../types.generated'; import { useEntityRegistry } from '../useEntityRegistry'; -import { ENTITY_FILTER_NAME, FILTER_DELIMITER, UnionType } from './utils/constants'; +import { ENTITY_FILTER_NAME, UnionType } from './utils/constants'; import { useUserContext } from '../context/useUserContext'; import useFilters from './utils/useFilters'; import { generateOrFilters } from './utils/generateOrFilters'; @@ -27,12 +27,6 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array = useFilters(params); - const nonNestedFilters = filters.filter( - (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), - ); - const nestedFilters = filters.filter( - (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), - ); const entityFilters: Array = useMemo( () => filters @@ -43,8 +37,8 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array generateOrFilters(unionType, nonNestedFilters, nestedFilters), - [nonNestedFilters, nestedFilters, unionType], + () => generateOrFilters(unionType, filters, excludedFilterFields), + [filters, excludedFilterFields, unionType], ); return { entityFilters, query, unionType, filters, orFilters, viewUrn, page, activeType, sortInput }; diff --git a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts index 505c50efb289f..fd5a5691b454e 100644 --- a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts +++ b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts @@ -1,7 +1,7 @@ import { DOMAINS_FILTER_NAME, ENTITY_SUB_TYPE_FILTER_NAME, - ENTITY_TYPE_FILTER_NAME, + ENTITY_FILTER_NAME, TAGS_FILTER_NAME, UnionType, } from '../constants'; @@ -10,7 +10,7 @@ import { generateOrFilters } from '../generateOrFilters'; describe('generateOrFilters', () => { it('should generate orFilters with UnionType.AND', () => { const filters = [ - { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, ]; const orFilters = generateOrFilters(UnionType.AND, filters); @@ -24,7 +24,7 @@ describe('generateOrFilters', () => { it('should generate orFilters with UnionType.OR', () => { const filters = [ - { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, ]; const orFilters = generateOrFilters(UnionType.OR, filters); @@ -43,17 +43,23 @@ describe('generateOrFilters', () => { const filters = [ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }, ]; - const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; - const orFilters = generateOrFilters(UnionType.AND, filters, nestedFilters); + // const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; + const orFilters = generateOrFilters(UnionType.AND, filters); expect(orFilters).toMatchObject([ { - and: [...filters, { field: '_entityType', values: ['CONTAINER'] }], + and: [ + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: '_entityType', values: ['CONTAINER'] }, + ], }, { and: [ - ...filters, + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, { field: '_entityType', values: ['DATASET'] }, { field: 'typeNames', values: ['table'] }, ], @@ -65,9 +71,9 @@ describe('generateOrFilters', () => { const filters = [ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }, ]; - const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; - const orFilters = generateOrFilters(UnionType.OR, filters, nestedFilters); + const orFilters = generateOrFilters(UnionType.OR, filters); expect(orFilters).toMatchObject([ { @@ -87,4 +93,18 @@ describe('generateOrFilters', () => { }, ]); }); + + it('should generate orFilters and exclude filters with a provided exclude field', () => { + const filters = [ + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + ]; + const orFilters = generateOrFilters(UnionType.AND, filters, [ENTITY_FILTER_NAME]); + + expect(orFilters).toMatchObject([ + { + and: [{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }], + }, + ]); + }); }); diff --git a/datahub-web-react/src/app/search/utils/generateOrFilters.ts b/datahub-web-react/src/app/search/utils/generateOrFilters.ts index b665a2e0f0495..fa2939b3436f5 100644 --- a/datahub-web-react/src/app/search/utils/generateOrFilters.ts +++ b/datahub-web-react/src/app/search/utils/generateOrFilters.ts @@ -26,20 +26,26 @@ function generateInputWithNestedFilters(filters: FacetFilterInput[], nestedFilte export function generateOrFilters( unionType: UnionType, filters: FacetFilterInput[], - nestedFilters: FacetFilterInput[] = [], + excludedFilterFields: string[] = [], ): AndFilterInput[] { - if ((filters?.length || 0) === 0 && nestedFilters.length === 0) { + if ((filters?.length || 0) === 0) { return []; } + const nonNestedFilters = filters.filter( + (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), + ); + const nestedFilters = filters.filter( + (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), + ); if (unionType === UnionType.OR) { const orFiltersWithNestedFilters = generateInputWithNestedFilters([], nestedFilters); - const orFilters = filters.map((filter) => ({ + const orFilters = nonNestedFilters.map((filter) => ({ and: [filter], })); return [...orFilters, ...orFiltersWithNestedFilters]; } - const andFiltersWithNestedFilters = generateInputWithNestedFilters(filters, nestedFilters); + const andFiltersWithNestedFilters = generateInputWithNestedFilters(nonNestedFilters, nestedFilters); if (andFiltersWithNestedFilters.length) { return andFiltersWithNestedFilters; @@ -47,7 +53,7 @@ export function generateOrFilters( return [ { - and: filters, + and: nonNestedFilters, }, ]; } diff --git a/datahub-web-react/src/graphql/group.graphql b/datahub-web-react/src/graphql/group.graphql index 9aa6e2b005f16..1007721e51a4e 100644 --- a/datahub-web-react/src/graphql/group.graphql +++ b/datahub-web-react/src/graphql/group.graphql @@ -3,6 +3,7 @@ query getGroup($urn: String!, $membersCount: Int!) { urn type name + exists origin { type externalType diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql index 80f66642fe11f..1767fe34bfef0 100644 --- a/datahub-web-react/src/graphql/ingestion.graphql +++ b/datahub-web-react/src/graphql/ingestion.graphql @@ -12,6 +12,10 @@ query listIngestionSources($input: ListIngestionSourcesInput!) { version executorId debugMode + extraArgs { + key + value + } } schedule { interval @@ -51,6 +55,10 @@ query getIngestionSource($urn: String!, $runStart: Int, $runCount: Int) { version executorId debugMode + extraArgs { + key + value + } } schedule { interval @@ -90,6 +98,10 @@ query getIngestionExecutionRequest($urn: String!) { source { type } + arguments { + key + value + } } result { status diff --git a/datahub-web-react/src/graphql/scroll.graphql b/datahub-web-react/src/graphql/scroll.graphql index 18274c50c2166..1031fed7b9e13 100644 --- a/datahub-web-react/src/graphql/scroll.graphql +++ b/datahub-web-react/src/graphql/scroll.graphql @@ -408,6 +408,7 @@ fragment downloadScrollAcrossLineageResult on ScrollAcrossLineageResults { count total searchResults { + degree entity { ...downloadSearchResults } diff --git a/datahub-web-react/src/images/verticalogo.png b/datahub-web-react/src/images/verticalogo.png index a81047fd43edb..5da38f4e67c7d 100644 Binary files a/datahub-web-react/src/images/verticalogo.png and b/datahub-web-react/src/images/verticalogo.png differ diff --git a/docker/build.gradle b/docker/build.gradle index 0faea626e982d..56634a5fe0c67 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -35,7 +35,7 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') { environment "DATAHUB_TELEMETRY_ENABLED", "false" environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" - // environment "ACTIONS_VERSION", 'alpine3.17-slim' + // environment "ACTIONS_VERSION", 'alpine3.18-slim' // environment "DATAHUB_ACTIONS_IMAGE", 'nginx' // Elastic @@ -97,10 +97,20 @@ task quickstartDebug(type: Exec, dependsOn: ':metadata-ingestion:install') { dependsOn(debug_modules.collect { it + ':dockerTagDebug' }) shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke' - environment "DATAHUB_PRECREATE_TOPICS", "true" environment "DATAHUB_TELEMETRY_ENABLED", "false" environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + // Elastic + // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch' + // environment "DATAHUB_SEARCH_TAG", '7.10.1' + + // OpenSearch + environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch' + environment "DATAHUB_SEARCH_TAG", '2.9.0' + environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true' + environment "USE_AWS_ELASTICSEARCH", 'true' + + def cmd = [ 'source ../metadata-ingestion/venv/bin/activate && ', 'datahub docker quickstart', diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 2d74a288b8c99..f5428f7480403 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=prod -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 564cc19cc9a5f..25afe9b8b3dce 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -1,7 +1,7 @@ ARG APP_ENV=full ARG BASE_IMAGE=base -FROM golang:1-alpine3.17 AS dockerize-binary +FROM golang:1-alpine3.18 AS dockerize-binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 82d9a93a9a2c3..eb082d50b3020 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -2,62 +2,58 @@ # pyspark==3.0.3 # pydeequ==1.0.1 -acryl-datahub-classify==0.0.6 -acryl-iceberg-legacy==0.0.4 -acryl-PyHive==0.6.13 -aenum==3.1.12 -aiohttp==3.8.4 +acryl-datahub-classify==0.0.8 +acryl-PyHive==0.6.14 +acryl-sqlglot==18.5.2.dev45 +aenum==3.1.15 +aiohttp==3.8.6 aiosignal==1.3.1 -alembic==1.11.1 +alembic==1.12.0 altair==4.2.0 -anyio==3.7.0 -apache-airflow==2.6.1 -apache-airflow-providers-common-sql==1.5.1 -apache-airflow-providers-ftp==3.4.1 -apache-airflow-providers-http==4.4.1 -apache-airflow-providers-imap==3.2.1 -apache-airflow-providers-sqlite==3.4.1 -apispec==5.2.2 +anyio==3.7.1 +apache-airflow==2.7.2 +apache-airflow-providers-common-sql==1.7.2 +apache-airflow-providers-ftp==3.5.2 +apache-airflow-providers-http==4.5.2 +apache-airflow-providers-imap==3.3.2 +apache-airflow-providers-sqlite==3.4.3 +apispec==6.3.0 appdirs==1.4.4 appnope==0.1.3 -argcomplete==3.0.8 -argon2-cffi==21.3.0 +argcomplete==3.1.2 +argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 asgiref==3.7.2 asn1crypto==1.5.1 -asttokens==2.2.1 -async-timeout==4.0.2 +asttokens==2.4.0 +async-timeout==4.0.3 asynch==0.2.2 attrs==23.1.0 avro==1.10.2 -avro-gen3==0.7.10 -azure-core==1.26.4 -azure-identity==1.10.0 -azure-storage-blob==12.16.0 -azure-storage-file-datalake==12.11.0 -Babel==2.12.1 +avro-gen3==0.7.11 +Babel==2.13.0 backcall==0.2.0 backoff==2.2.1 beautifulsoup4==4.12.2 -bleach==6.0.0 -blinker==1.6.2 -blis==0.7.9 -boto3==1.26.142 -botocore==1.29.142 +bleach==6.1.0 +blinker==1.6.3 +blis==0.7.11 +boto3==1.28.62 +botocore==1.31.62 bowler==0.9.0 -bracex==2.3.post1 +bracex==2.4 cached-property==1.5.2 cachelib==0.9.0 cachetools==5.3.1 -catalogue==2.0.8 -cattrs==22.2.0 -certifi==2023.5.7 -cffi==1.15.1 -chardet==5.1.0 -charset-normalizer==2.1.1 +catalogue==2.0.10 +cattrs==23.1.2 +certifi==2023.7.22 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.0 ciso8601==2.3.0 -click==8.1.3 -click-default-group==1.2.2 +click==8.1.7 +click-default-group==1.2.4 click-spinner==0.1.10 clickclick==20.10.2 clickhouse-cityhash==1.0.2.4 @@ -66,205 +62,217 @@ clickhouse-sqlalchemy==0.2.4 cloudpickle==2.2.1 colorama==0.4.6 colorlog==4.8.0 -confection==0.0.4 +comm==0.1.4 +confection==0.1.3 ConfigUpdater==3.1.1 confluent-kafka==1.8.2 connexion==2.14.2 cron-descriptor==1.4.0 -croniter==1.3.15 -cryptography==37.0.4 +croniter==2.0.1 +cryptography==41.0.4 cx-Oracle==8.3.0 -cymem==2.0.7 -dask==2023.5.1 -databricks-cli==0.17.7 +cymem==2.0.8 +dask==2023.9.3 +databricks-cli==0.18.0 databricks-dbapi==0.6.0 -databricks-sdk==0.1.8 -debugpy==1.6.7 +databricks-sdk==0.10.0 +debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 -deltalake==0.9.0 +deltalake==0.11.0 Deprecated==1.2.14 -dill==0.3.6 -dnspython==2.3.0 -docker==6.1.2 +dill==0.3.7 +dnspython==2.4.2 +docker==6.1.3 docutils==0.20.1 ecdsa==0.18.0 elasticsearch==7.13.4 email-validator==1.3.1 entrypoints==0.4 et-xmlfile==1.1.0 -exceptiongroup==1.1.1 -executing==1.2.0 -expandvars==0.9.0 -fastapi==0.95.2 -fastavro==1.7.4 -fastjsonschema==2.17.1 -feast==0.29.0 -filelock==3.12.0 +exceptiongroup==1.1.3 +executing==2.0.0 +expandvars==0.11.0 +fastapi==0.103.2 +fastavro==1.8.4 +fastjsonschema==2.18.1 +feast==0.31.1 +filelock==3.12.4 fissix==21.11.13 Flask==2.2.5 flatdict==4.0.1 -frozenlist==1.3.3 -fsspec==2023.5.0 +frozenlist==1.4.0 +fsspec==2023.9.2 future==0.18.3 -GeoAlchemy2==0.13.3 +GeoAlchemy2==0.14.1 gitdb==4.0.10 -GitPython==3.1.31 -google-api-core==2.11.0 -google-auth==2.19.0 -google-cloud-appengine-logging==1.3.0 +GitPython==3.1.37 +google-api-core==2.12.0 +google-auth==2.23.3 +google-cloud-appengine-logging==1.3.2 google-cloud-audit-log==0.2.5 -google-cloud-bigquery==3.10.0 -google-cloud-bigquery-storage==2.19.1 -google-cloud-core==2.3.2 +google-cloud-bigquery==3.12.0 +google-cloud-core==2.3.3 google-cloud-datacatalog-lineage==0.2.2 google-cloud-logging==3.5.0 google-crc32c==1.5.0 -google-resumable-media==2.5.0 -googleapis-common-protos==1.59.0 +google-re2==1.1 +google-resumable-media==2.6.0 +googleapis-common-protos==1.60.0 gql==3.4.1 graphql-core==3.2.3 graphviz==0.20.1 great-expectations==0.15.50 -greenlet==2.0.2 +greenlet==3.0.0 grpc-google-iam-v1==0.12.6 -grpcio==1.54.2 -grpcio-reflection==1.54.2 -grpcio-status==1.54.2 -grpcio-tools==1.54.2 -gssapi==1.8.2 -gunicorn==20.1.0 +grpcio==1.59.0 +grpcio-reflection==1.59.0 +grpcio-status==1.59.0 +grpcio-tools==1.59.0 +gssapi==1.8.3 +gunicorn==21.2.0 h11==0.14.0 -hmsclient==0.1.1 -httpcore==0.17.2 -httptools==0.5.0 -httpx==0.24.1 +httpcore==0.18.0 +httptools==0.6.0 +httpx==0.25.0 humanfriendly==10.0 idna==3.4 -ijson==3.2.0.post0 -importlib-metadata==6.6.0 -importlib-resources==5.12.0 +ijson==3.2.3 +importlib-metadata==6.8.0 +importlib-resources==6.1.0 inflection==0.5.1 ipaddress==1.0.23 ipykernel==6.17.1 -ipython==8.13.2 +ipython==8.16.1 ipython-genutils==0.2.0 -ipywidgets==8.0.6 +ipywidgets==8.1.1 iso3166==2.1.1 isodate==0.6.1 itsdangerous==2.1.2 -jedi==0.18.2 +jedi==0.19.1 Jinja2==3.1.2 jmespath==1.0.1 JPype1==1.4.1 -jsonlines==3.1.0 -jsonpatch==1.32 -jsonpointer==2.3 +jsonlines==4.0.0 +jsonpatch==1.33 +jsonpointer==2.4 jsonref==1.1.0 -jsonschema==4.17.3 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 jupyter-server==1.24.0 jupyter_client==7.4.9 jupyter_core==4.12.0 jupyterlab-pygments==0.2.2 -jupyterlab-widgets==3.0.7 +jupyterlab-widgets==3.0.9 langcodes==3.3.0 lark==1.1.4 lazy-object-proxy==1.9.0 leb128==1.0.5 -limits==3.5.0 +limits==3.6.0 linear-tsv==1.1.0 linkify-it-py==2.0.2 lkml==1.3.1 locket==1.0.0 lockfile==0.12.2 looker-sdk==23.0.0 -lxml==4.9.2 +lxml==4.9.3 lz4==4.3.2 makefun==1.15.1 Mako==1.2.4 -Markdown==3.4.3 -markdown-it-py==2.2.0 -MarkupSafe==2.1.2 -marshmallow==3.19.0 -marshmallow-enum==1.5.1 +Markdown==3.5 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +marshmallow==3.20.1 marshmallow-oneofschema==3.0.1 marshmallow-sqlalchemy==0.26.1 matplotlib-inline==0.1.6 -mdit-py-plugins==0.3.5 +mdit-py-plugins==0.4.0 mdurl==0.1.2 -mistune==2.0.5 +mistune==3.0.2 mixpanel==4.10.0 -mmh3==4.0.0 -more-itertools==9.1.0 +mlflow-skinny==2.7.1 +mmh3==4.0.1 +mmhash3==3.0.1 +more-itertools==10.1.0 moreorless==0.4.0 -moto==4.1.10 -msal==1.16.0 -msal-extensions==1.0.0 +moto==4.2.5 +msal==1.22.0 multidict==6.0.4 -murmurhash==1.0.9 -mypy==1.3.0 +murmurhash==1.0.10 +mypy==1.6.0 mypy-extensions==1.0.0 nbclassic==1.0.0 nbclient==0.6.3 -nbconvert==7.4.0 -nbformat==5.8.0 -nest-asyncio==1.5.6 +nbconvert==7.9.2 +nbformat==5.9.1 +nest-asyncio==1.5.8 networkx==3.1 -notebook==6.5.4 +notebook==6.5.6 notebook_shim==0.2.3 -numpy==1.24.3 +numpy==1.26.0 oauthlib==3.2.2 okta==1.7.0 +openlineage-airflow==1.2.0 +openlineage-integration-common==1.2.0 +openlineage-python==1.2.0 +openlineage_sql==1.2.0 openpyxl==3.1.2 +opentelemetry-api==1.20.0 +opentelemetry-exporter-otlp==1.20.0 +opentelemetry-exporter-otlp-proto-common==1.20.0 +opentelemetry-exporter-otlp-proto-grpc==1.20.0 +opentelemetry-exporter-otlp-proto-http==1.20.0 +opentelemetry-proto==1.20.0 +opentelemetry-sdk==1.20.0 +opentelemetry-semantic-conventions==0.41b0 ordered-set==4.1.0 oscrypto==1.3.0 -packaging==23.1 +packaging==23.2 pandas==1.5.3 pandavro==1.5.2 pandocfilters==1.5.0 -parse==1.19.0 +parse==1.19.1 parso==0.8.3 -partd==1.4.0 -pathspec==0.9.0 -pathy==0.10.1 +partd==1.4.1 +pathspec==0.11.2 +pathy==0.10.2 pendulum==2.1.2 pexpect==4.8.0 phonenumbers==8.13.0 pickleshare==0.7.5 -platformdirs==3.5.1 -pluggy==1.0.0 -portalocker==2.7.0 -preshed==3.0.8 +platformdirs==3.11.0 +pluggy==1.3.0 +preshed==3.0.9 prison==0.2.1 progressbar2==4.2.0 -prometheus-client==0.17.0 -prompt-toolkit==3.0.38 -proto-plus==1.22.2 -protobuf==4.23.2 +prometheus-client==0.17.1 +prompt-toolkit==3.0.39 +proto-plus==1.22.3 +protobuf==4.24.4 psutil==5.9.5 -psycopg2-binary==2.9.6 +psycopg2-binary==2.9.9 ptyprocess==0.7.0 pure-eval==0.2.2 pure-sasl==0.6.2 -py-partiql-parser==0.3.0 -pyarrow==8.0.0 +py-partiql-parser==0.3.7 +pyarrow==11.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 pyathena==2.4.1 pycountry==22.3.5 pycparser==2.21 -pycryptodome==3.18.0 -pycryptodomex==3.18.0 -pydantic==1.10.8 -pydash==7.0.3 +pycryptodome==3.19.0 +pycryptodomex==3.19.0 +pydantic==1.10.13 +pydash==7.0.6 pydruid==0.6.5 -Pygments==2.15.1 -pymongo==4.3.3 -PyMySQL==1.0.3 -pyOpenSSL==22.0.0 +Pygments==2.16.1 +pyiceberg==0.4.0 +pymongo==4.5.0 +PyMySQL==1.1.0 +pyOpenSSL==23.2.0 pyparsing==3.0.9 -pyrsistent==0.19.3 -pyspnego==0.9.0 +pyspnego==0.10.2 python-daemon==3.0.1 python-dateutil==2.8.2 python-dotenv==1.0.0 @@ -272,111 +280,115 @@ python-jose==3.3.0 python-ldap==3.4.3 python-nvd3==0.15.0 python-slugify==8.0.1 -python-stdnum==1.18 -python-tds==1.12.0 -python-utils==3.6.0 +python-stdnum==1.19 +python-tds==1.13.0 +python-utils==3.8.1 python3-openid==3.2.0 -pytz==2023.3 +pytz==2023.3.post1 pytzdata==2020.1 -PyYAML==6.0 -pyzmq==25.1.0 +PyYAML==6.0.1 +pyzmq==24.0.1 ratelimiter==1.2.0.post0 redash-toolbelt==0.1.9 -redshift-connector==2.0.910 -regex==2023.5.5 -requests==2.28.2 +redshift-connector==2.0.914 +referencing==0.30.2 +regex==2023.10.3 +requests==2.31.0 requests-file==1.5.1 requests-gssapi==1.2.3 requests-ntlm==1.2.0 requests-toolbelt==0.10.1 -responses==0.23.1 -retrying==1.3.4 +responses==0.23.3 rfc3339-validator==0.1.4 rfc3986==2.0.0 -rich==13.3.5 -rich_argparse==1.1.0 +rich==13.6.0 +rich-argparse==1.3.0 +rpds-py==0.10.6 rsa==4.9 ruamel.yaml==0.17.17 -s3transfer==0.6.1 -sasl3==0.2.11 -schwifty==2023.3.0 -scipy==1.10.1 +ruamel.yaml.clib==0.2.8 +s3transfer==0.7.0 +schwifty==2023.9.0 +scipy==1.11.3 scramp==1.4.4 Send2Trash==1.8.2 -setproctitle==1.3.2 -simple-salesforce==1.12.4 +sentry-sdk==1.32.0 +setproctitle==1.3.3 +simple-salesforce==1.12.5 six==1.16.0 -smart-open==6.3.0 -smmap==5.0.0 +smart-open==6.4.0 +smmap==5.0.1 sniffio==1.3.0 -snowflake-connector-python==2.9.0 -snowflake-sqlalchemy==1.4.7 -soupsieve==2.4.1 +snowflake-connector-python==3.2.1 +snowflake-sqlalchemy==1.5.0 +sortedcontainers==2.4.0 +soupsieve==2.5 spacy==3.4.3 spacy-legacy==3.0.12 -spacy-loggers==1.0.4 +spacy-loggers==1.0.5 sql-metadata==2.2.2 -SQLAlchemy==1.4.41 -sqlalchemy-bigquery==1.6.1 +SQLAlchemy==1.4.44 +sqlalchemy-bigquery==1.8.0 SQLAlchemy-JSONField==1.0.1.post0 sqlalchemy-pytds==0.3.5 sqlalchemy-redshift==0.8.14 SQLAlchemy-Utils==0.41.1 -sqlalchemy2-stubs==0.0.2a34 -sqllineage==1.3.6 -sqlparse==0.4.3 -srsly==2.4.6 -stack-data==0.6.2 +sqlalchemy2-stubs==0.0.2a35 +sqllineage==1.3.8 +sqlparse==0.4.4 +srsly==2.4.8 +stack-data==0.6.3 starlette==0.27.0 +strictyaml==1.7.3 tableauserverclient==0.25 tableschema==1.20.2 tabulate==0.9.0 tabulator==1.53.5 -tenacity==8.2.2 +tenacity==8.2.3 termcolor==2.3.0 terminado==0.17.1 text-unidecode==1.3 -thinc==8.1.10 -thrift==0.16.0 +thinc==8.1.12 +thrift==0.13.0 thrift-sasl==0.4.3 tinycss2==1.2.1 toml==0.10.2 tomli==2.0.1 +tomlkit==0.12.1 toolz==0.12.0 -tornado==6.3.2 -tqdm==4.65.0 +tornado==6.3.3 +tqdm==4.66.1 traitlets==5.2.1.post0 -trino==0.324.0 +trino==0.327.0 typeguard==2.13.3 typer==0.7.0 -types-PyYAML==6.0.12.10 +types-PyYAML==6.0.12.12 typing-inspect==0.9.0 -typing_extensions==4.5.0 -tzlocal==5.0.1 +typing_extensions==4.8.0 +tzlocal==5.1 uc-micro-py==1.0.2 -ujson==5.7.0 +ujson==5.8.0 unicodecsv==0.14.1 -urllib3==1.26.16 -uvicorn==0.22.0 +urllib3==1.26.17 +uvicorn==0.23.2 uvloop==0.17.0 -vertica-python==1.3.2 -vertica-sqlalchemy-dialect==0.0.1 +vertica-python==1.3.5 +vertica-sqlalchemy-dialect==0.0.8 vininfo==1.7.0 volatile==2.1.0 wasabi==0.10.1 -watchfiles==0.19.0 -wcmatch==8.4.1 -wcwidth==0.2.6 +watchfiles==0.20.0 +wcmatch==8.5 +wcwidth==0.2.8 webencodings==0.5.1 -websocket-client==1.5.2 +websocket-client==1.6.4 websockets==11.0.3 Werkzeug==2.2.3 -widgetsnbextension==4.0.7 +widgetsnbextension==4.0.9 wrapt==1.15.0 -WTForms==3.0.1 +WTForms==3.1.0 xlrd==2.0.1 xmltodict==0.13.0 yarl==1.9.2 zeep==4.2.1 -zipp==3.15.0 -zstd==1.5.5.1 +zstd==1.5.5.1 \ No newline at end of file diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 734f8ba452f3e..4b321b1639c1b 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=prod -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index ee5d927fb1ddb..4d38ee6daa235 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=prod -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 4e1521cc0561e..945be54678a24 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=prod -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile index af3c8c9df762a..c8fb2eba911b8 100644 --- a/docker/elasticsearch-setup/Dockerfile +++ b/docker/elasticsearch-setup/Dockerfile @@ -3,7 +3,7 @@ # Defining environment ARG APP_ENV=prod -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh index 629e9bc9484ee..b5024e49e59f1 100755 --- a/docker/kafka-setup/kafka-setup.sh +++ b/docker/kafka-setup/kafka-setup.sh @@ -36,7 +36,9 @@ if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SSL" ]]; then fi if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION ]]; then echo "ssl.truststore.location=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH - echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH + if [[ $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE != "PEM" ]]; then + echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH + fi if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE ]]; then echo "ssl.truststore.type=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE" >> $CONNECTION_PROPERTIES_PATH fi diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql index c4132575cf442..95c8cabbc5ca4 100644 --- a/docker/mariadb/init.sql +++ b/docker/mariadb/init.sql @@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat now(), 'urn:li:corpuser:__datahub_system' ); + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile index 732b860a58f07..56bab61180489 100644 --- a/docker/mysql-setup/Dockerfile +++ b/docker/mysql-setup/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql index 2370a971941d2..b789329ddfd17 100644 --- a/docker/mysql-setup/init.sql +++ b/docker/mysql-setup/init.sql @@ -39,3 +39,5 @@ INSERT INTO metadata_aspect_v2 SELECT * FROM temp_metadata_aspect_v2 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2); DROP TABLE temp_metadata_aspect_v2; + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker/mysql/init.sql b/docker/mysql/init.sql index b4b4e4617806c..aca57d7cd444c 100644 --- a/docker/mysql/init.sql +++ b/docker/mysql/init.sql @@ -27,3 +27,5 @@ INSERT INTO metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat now(), 'urn:li:corpuser:__datahub_system' ); + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 313615ac3465b..7f4d53ae044d4 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.18 AS binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder diff --git a/docker/postgres-setup/init.sql b/docker/postgres-setup/init.sql index 12fff7aec7fe6..72b2f73192e00 100644 --- a/docker/postgres-setup/init.sql +++ b/docker/postgres-setup/init.sql @@ -35,3 +35,5 @@ INSERT INTO metadata_aspect_v2 SELECT * FROM temp_metadata_aspect_v2 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2); DROP TABLE temp_metadata_aspect_v2; + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql index cf477c135422e..87c8dd3337fac 100644 --- a/docker/postgres/init.sql +++ b/docker/postgres/init.sql @@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat now(), 'urn:li:corpuser:__datahub_system' ); + +DROP TABLE IF EXISTS metadata_index; diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 68ea1ebffa6c9..506e263933394 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -13,6 +13,13 @@ module.exports = { projectName: "datahub", // Usually your repo name. staticDirectories: ["static", "genStatic"], stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"], + scripts: [ + { + src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38", + async: true, + defer: true, + }, + ], noIndex: isSaas, customFields: { isSaas: isSaas, @@ -50,44 +57,41 @@ module.exports = { position: "right", }, { - to: "https://demo.datahubproject.io/", - label: "Demo", - position: "right", - }, - { - href: "https://blog.datahubproject.io/", - label: "Blog", - position: "right", - }, - { - href: "https://feature-requests.datahubproject.io/roadmap", - label: "Roadmap", + type: "dropdown", + label: "Resources", position: "right", + items: [ + { + href: "https://demo.datahubproject.io/", + label: "Demo", + }, + { + href: "https://blog.datahubproject.io/", + label: "Blog", + }, + { + href: "https://feature-requests.datahubproject.io/roadmap", + label: "Roadmap", + }, + { + href: "https://slack.datahubproject.io", + label: "Slack", + }, + { + href: "https://github.com/datahub-project/datahub", + label: "GitHub", + }, + { + href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", + label: "YouTube", + }, + ], }, { type: "docsVersionDropdown", - position: "right", + position: "left", dropdownActiveClassDisabled: true, }, - { - href: "https://slack.datahubproject.io", - "aria-label": "Slack", - position: "right", - className: "item__icon item__slack", - }, - { - href: "https://github.com/datahub-project/datahub", - "aria-label": "GitHub", - position: "right", - className: "item__icon item__github", - }, - - { - href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", - "aria-label": "YouTube", - position: "right", - className: "item__icon item__youtube", - }, ], }, footer: { diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index bdf3926c17e0d..39eaea57444ed 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -140,6 +140,7 @@ module.exports = { "metadata-ingestion/docs/dev_guides/classification", "metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source", "metadata-ingestion/docs/dev_guides/sql_profiles", + "metadata-ingestion/docs/dev_guides/profiling_ingestions", ], }, ], @@ -157,6 +158,7 @@ module.exports = { // The purpose of this section is to provide the minimum steps required to deploy DataHub to the vendor of your choosing "docs/deploy/aws", "docs/deploy/gcp", + "docs/deploy/azure", "docker/README", "docs/deploy/kubernetes", "docs/deploy/environment-vars", @@ -440,10 +442,29 @@ module.exports = { }, "docs/act-on-metadata/impact-analysis", { - Observability: [ - "docs/managed-datahub/observe/freshness-assertions", - "docs/managed-datahub/observe/volume-assertions", - "docs/managed-datahub/observe/custom-sql-assertions", + label: "Observability", + type: "category", + items: [ + { + type: "doc", + id: "docs/managed-datahub/observe/freshness-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/volume-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/custom-sql-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/column-assertions", + className: "saasOnly", + }, ], }, { @@ -603,10 +624,10 @@ module.exports = { { type: "doc", id: "docs/managed-datahub/chrome-extension", - className: "saasOnly", }, { "Managed DataHub Release History": [ + "docs/managed-datahub/release-notes/v_0_2_12", "docs/managed-datahub/release-notes/v_0_2_11", "docs/managed-datahub/release-notes/v_0_2_10", "docs/managed-datahub/release-notes/v_0_2_9", diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index 55a54876b41ac..16e3893ed08b7 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -144,20 +144,29 @@ div[class^="announcementBar"] { /** Navbar */ -@media only screen and (max-width: 1050px) { - .navbar__toggle { - display: inherit; - } - .navbar__item { - display: none; - } -} - .navbar { .navbar__logo { height: 3rem; } + + .navbar__link { + align-items: center; + margin: 0 1rem 0; + padding: 0; + border-bottom: 2px solid transparent; + } + + .dropdown > .navbar__link:after { + top: -1px; + border-width: 0.3em 0.3em 0; + margin-left: 0.4em; + } + + .navbar__link--active { + border-bottom-color: var(--ifm-navbar-link-hover-color); + } .navbar__item { + padding: 0.25rem 0; svg[class*="iconExternalLink"] { display: none; } diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js index cc04ab23d3cf3..661d64392e67f 100644 --- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js +++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -6,6 +6,9 @@ import { translate } from "@docusaurus/Translate"; import { useLocation } from "@docusaurus/router"; import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem"; import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem"; + +import styles from "./styles.module.scss"; + const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId); export default function DocsVersionDropdownNavbarItem({ mobile, @@ -60,6 +63,7 @@ export default function DocsVersionDropdownNavbarItem({ return ( /resourceGroups/myResourceGroup", + "location": "eastus", + "managedBy": null, + "name": "myResourceGroup", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null +} +``` +- Create an AKS Cluster. For this project, it is best to increase node count to at least 3. Change cluster name, node count, and addons to your choosing. + +``` +az aks create -g myResourceGroup -n myAKSCluster --enable-managed-identity --node-count 3 --enable-addons monitoring --generate-ssh-keys +``` + +After a few minutes, the command completes and returns JSON-formatted information about the cluster. + +- Connect to the cluster + +Configure kubectl to connect to your Kubernetes cluster using the az aks get-credentials command. + +``` +az aks get-credentials --resource-group myResourceGroup --name myAKSCluster +``` + +Verify the connection to your cluster using the `kubectl get` command. This command returns a list of the cluster nodes. + +``` +kubectl get nodes +``` + +You should get results like below. Make sure node status is Ready. + +``` +NAME STATUS ROLES AGE VERSION +aks-nodepool1-37660971-vmss000000 Ready agent 24h v1.25.6 +aks-nodepool1-37660971-vmss000001 Ready agent 24h v1.25.6 +aks-nodepool1-37660971-vmss000002 Ready agent 24h v1.25.6 +``` + +## Setup DataHub using Helm + +Once the Kubernetes cluster has been set up, you can deploy DataHub and its prerequisites using helm. Please follow the +steps in this [guide](kubernetes.md). + + +Notes: +Since we are using PostgreSQL as the storage layer, change postgresql enabled to true and mysql to false in the values.yaml file of prerequisites. +Additionally, create a postgresql secret. Make sure to include 3 passwords for the postgresql secret: postgres-password, replication-password, and password. + +## Expose endpoints using a load balancer + +Now that all the pods are up and running, you need to expose the datahub-frontend end point by setting +up [ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/). To do this, you need to first set up an +ingress controller. + + +There are many [ingress controllers](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) to choose +from, but here, we will follow this [guide](https://learn.microsoft.com/en-us/azure/application-gateway/tutorial-ingress-controller-add-on-existing) to set up the Azure +Application Gateway Ingress Controller. + +- Deploy a New Application Gateway. + +First, you need to create a WAF policy + +``` +az network application-gateway waf-policy create -g myResourceGroup -n myWAFPolicy +``` + +- Before the application gateway can be deployed, you'll also need to create a public IP resource, a new virtual network with address space 10.0.0.0/16, and a subnet with address space 10.0.0.0/24. +Then, you can deploy your application gateway in the subnet using the publicIP. + +Caution: When you use an AKS cluster and application gateway in separate virtual networks, the address spaces of the two virtual networks must not overlap. The default address space that an AKS cluster deploys in is 10.224.0.0/12. + + +``` +az network public-ip create -n myPublicIp -g myResourceGroup --allocation-method Static --sku Standard +az network vnet create -n myVnet -g myResourceGroup --address-prefix 10.0.0.0/16 --subnet-name mySubnet --subnet-prefix 10.0.0.0/24 +az network application-gateway create -n myApplicationGateway -l eastus -g myResourceGroup --sku WAF_v2 --public-ip-address myPublicIp --vnet-name myVnet --subnet mySubnet --priority 100 --waf-policy /subscriptions/{subscription_id}/resourceGroups/myResourceGroup/providers/Microsoft.Network/ApplicationGatewayWebApplicationFirewallPolicies/myWAFPolicy +``` +Change myPublicIp, myResourceGroup, myVnet, mySubnet, and myApplicationGateway to names of your choosing. + + +- Enable the AGIC Add-On in Existing AKS Cluster Through Azure CLI + +``` +appgwId=$(az network application-gateway show -n myApplicationGateway -g myResourceGroup -o tsv --query "id") +az aks enable-addons -n myCluster -g myResourceGroup -a ingress-appgw --appgw-id $appgwId +``` + +- Peer the Two Virtual Networks Together + +Since you deployed the AKS cluster in its own virtual network and the Application gateway in another virtual network, you'll need to peer the two virtual networks together in order for traffic to flow from the Application gateway to the pods in the cluster. + +``` +nodeResourceGroup=$(az aks show -n myCluster -g myResourceGroup -o tsv --query "nodeResourceGroup") +aksVnetName=$(az network vnet list -g $nodeResourceGroup -o tsv --query "[0].name") + +aksVnetId=$(az network vnet show -n $aksVnetName -g $nodeResourceGroup -o tsv --query "id") +az network vnet peering create -n AppGWtoAKSVnetPeering -g myResourceGroup --vnet-name myVnet --remote-vnet $aksVnetId --allow-vnet-access + +appGWVnetId=$(az network vnet show -n myVnet -g myResourceGroup -o tsv --query "id") +az network vnet peering create -n AKStoAppGWVnetPeering -g $nodeResourceGroup --vnet-name $aksVnetName --remote-vnet $appGWVnetId --allow-vnet-access +``` + +- Deploy the Ingress on the Frontend Pod + +In order to use the ingress controller to expose frontend pod, we need to update the datahub-frontend section of the values.yaml file that was used to deploy DataHub. Here is a sample configuration: + +``` +datahub-frontend: + enabled: true + image: + repository: linkedin/datahub-frontend-react + # tag: "v0.10.0 # defaults to .global.datahub.version + + # Set up ingress to expose react front-end + ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: azure/application-gateway + appgw.ingress.kubernetes.io/backend-protocol: "http" + + hosts: + - paths: + - /* + defaultUserCredentials: {} +``` + +You can then apply the updates: + +``` +helm upgrade --install datahub datahub/datahub --values values.yaml +``` + +You can now verify that the ingress was created correctly + +``` +kubectl get ingress +``` + +You should see a result like this: + +![frontend-image](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/frontend-status.png?raw=true) + +## Use PostgresSQL for the storage layer +Configure a PostgreSQL database in the same virtual network as the Kubernetes cluster or implement virtual network peering to connect both networks. Once the database is provisioned, you should be able to see the following page under the Connect tab on the left side. + + +Note: PostgreSQL Database MUST be deployed in same location as AKS/resource group (eastus, centralus, etc.) +Take a note of the connection details: + +![postgres-info](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/postgres-info.png?raw=true) + + + + + +- Update the postgresql settings under global in the values.yaml as follows. + +``` +global: + sql: + datasource: + host: "${POSTGRES_HOST}.postgres.database.azure.com:5432" + hostForpostgresqlClient: "${POSTGRES_HOST}.postgres.database.azure.com" + port: "5432" + url: "jdbc:postgresql://${POSTGRES_HOST}.postgres.database.azure.com:5432/datahub?user=${POSTGRES_ADMIN_LOGIN}&password=${POSTGRES_ADMIN_PASSWORD}&sslmode=require" + driver: "org.postgresql.Driver" + username: "${POSTGRES_ADMIN_LOGIN}" + password: + value: "${POSTGRES_ADMIN_PASSWORD}" +``` +Run this command helm command to update datahub configuration + +``` +helm upgrade --install datahub datahub/datahub --values values.yaml +``` + +And there you go! You have now installed DataHub on an Azure Kubernetes Cluster with an ingress controller set up to expose the frontend. Additionally you have utilized PostgreSQL as the storage layer of DataHub. \ No newline at end of file diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 0689db9b17331..779c3d3d7c432 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -79,9 +79,10 @@ Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase. ## Frontend -| Variable | Default | Unit/Type | Components | Description | -|------------------------------------|----------|-----------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `AUTH_VERBOSE_LOGGING` | `false` | boolean | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging. | -| `AUTH_OIDC_GROUPS_CLAIM` | `groups` | string | [`Frontend`] | Claim to use as the user's group. | -| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false` | boolean | [`Frontend`] | Auto-provision the group from the user's group claim. | -| `AUTH_SESSION_TTL_HOURS` | `24` | string | [`Frontend`] | The number of hours a user session is valid. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. | \ No newline at end of file +| Variable | Default | Unit/Type | Components | Description | +|------------------------------------|----------|-----------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `AUTH_VERBOSE_LOGGING` | `false` | boolean | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging. | +| `AUTH_OIDC_GROUPS_CLAIM` | `groups` | string | [`Frontend`] | Claim to use as the user's group. | +| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false` | boolean | [`Frontend`] | Auto-provision the group from the user's group claim. | +| `AUTH_SESSION_TTL_HOURS` | `24` | string | [`Frontend`] | The number of hours a user session is valid. After this many hours the actor cookie will be expired by the browser and the user will be prompted to login again. | +| `MAX_SESSION_TOKEN_AGE` | `24h` | string | [`Frontend`] | The maximum age of the session token. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. | \ No newline at end of file diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 5d0ad5eaf8f7e..4d1535f28fa0a 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -4,22 +4,67 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next +- #9010 - In Redshift source's config `incremental_lineage` is set default to off. + ### Breaking Changes - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now. +- #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted + by Looker and LookML source connectors. - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details. - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`. -- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled. +- #8943 - The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled. This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future. If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI: `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`. +- #8846 - Changed enum values in resource filters used by policies. `RESOURCE_TYPE` became `TYPE` and `RESOURCE_URN` became `URN`. +Any existing policies using these filters (i.e. defined for particular `urns` or `types` such as `dataset`) need to be upgraded +manually, for example by retrieving their respective `dataHubPolicyInfo` aspect and changing part using filter i.e. +```yaml + "resources": { + "filter": { + "criteria": [ + { + "field": "RESOURCE_TYPE", + "condition": "EQUALS", + "values": [ + "dataset" + ] + } + ] + } +``` +into +```yaml + "resources": { + "filter": { + "criteria": [ + { + "field": "TYPE", + "condition": "EQUALS", + "values": [ + "dataset" + ] + } + ] + } +``` +for example, using `datahub put` command. Policies can be also removed and re-created via UI. +- #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`. +This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully +qualified dataset name, i.e. `.`. We attempt to support the old +pattern format by prepending `.*\\.` to dataset patterns lacking a period, so in most cases this +should not cause any issues. However, if you have a complex dataset pattern, we recommend you +manually convert it to the fully qualified format to avoid any potential issues. ### Potential Downtime ### Deprecations ### Other Notable Changes +- Session token configuration has changed, all previously created session tokens will be invalid and users will be prompted to log in. Expiration time has also been shortened which may result in more login prompts with the default settings. + There should be no other interruption due to this change. ## 0.11.0 diff --git a/docs/managed-datahub/chrome-extension.md b/docs/managed-datahub/chrome-extension.md index 0aa0860d03b67..a4560bc8cc09b 100644 --- a/docs/managed-datahub/chrome-extension.md +++ b/docs/managed-datahub/chrome-extension.md @@ -1,10 +1,8 @@ --- description: Learn how to upload and use the Acryl DataHub Chrome extension (beta) locally before it's available on the Chrome store. --- -import FeatureAvailability from '@site/src/components/FeatureAvailability'; # Acryl DataHub Chrome Extension - ## Installing the Extension diff --git a/docs/managed-datahub/observe/column-assertions.md b/docs/managed-datahub/observe/column-assertions.md new file mode 100644 index 0000000000000..99a764f771676 --- /dev/null +++ b/docs/managed-datahub/observe/column-assertions.md @@ -0,0 +1,358 @@ +--- +description: This page provides an overview of working with DataHub Column Assertions +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + + +# Column Assertions + + + +> ⚠️ The **Column Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only +> be available to a limited set of design partners. +> +> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success +> representative. + +## Introduction + +Can you remember a time when an important warehouse table column changed dramatically, with little or no notice? Perhaps the number of null values suddenly spiked, or a new value was added to a fixed set of possible values. If the answer is yes, how did you initially find out? We'll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when a number looked a bit out of the ordinary. + +There are many reasons why important columns in your Snowflake, Redshift, or BigQuery tables may change - application code bugs, new feature rollouts, etc. Oftentimes, these changes break important assumptions made about the data used in building key downstream data products like reporting dashboards or data-driven product features. + +What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data issues before anyone else? With Acryl DataHub Column Assertions, you can. + +With Acryl DataHub, you can define **Column Value** assertions to ensure each value in a column matches specific constraints, and **Column Metric** assertions to ensure that computed metrics from columns align with your expectations. As soon as things go wrong, your team will be the first to know, before the data issue becomes a larger data incident. + +In this guide, we'll cover the basics of Column Assertions - what they are, how to configure them, and more - so that you and your team can start building trust in your most important data assets. + +Let's dive in! + +## Support + +Column Assertions are currently supported for: + +1. Snowflake +2. Redshift +3. BigQuery + +Note that an Ingestion Source _must_ be configured with the data platform of your choice in +Acryl DataHub's **Ingestion** tab. + +> Note that Column Assertions are not yet supported if you are connecting to your warehouse +> using the DataHub CLI or a Remote Ingestion Executor. + +## What is a Column Assertion? + +A **Column Assertion** is a highly configurable Data Quality rule used to monitor specific columns of a Data Warehouse table for unexpected changes. + +Column Assertions are defined to validate a specific column, and can be used to + +1. Validate that the values of the column match some constraints (regex, allowed values, max, min, etc) across rows OR +2. Validate that specific column aggregation metrics match some expectations across rows. + +Column Assertions can be particularly useful for documenting and enforcing column-level "contracts", i.e. formal specifications about the expected contents of a particular column that can be used for coordinating among producers and consumers of the data. + +### Anatomy of Column Assertion + +Column Assertions can be divided into two main types: **Column Value** and **Column Metric** Assertions. + +A **Column Value Assertion** is used to monitor the value of a specific column in a table, and ensure that every row +adheres to a specific condition. In comparison, a **Column Metric Assertion** is used to compute a metric for that column, +and ensure that the value of that metric adheres to a specific condition. + +At the most basic level, both types consist of a few important parts: + +1. An **Evaluation Schedule** +2. A **Column Selection** +3. A **Evaluation Criteria** +4. A **Row Evaluation Type** + +In this section, we'll give an overview of each. + +#### 1. Evaluation Schedule + +The **Evaluation Schedule**: This defines how often to evaluate the Column Assertion against the given warehouse table. +This should usually be configured to match the expected change frequency of the table, although it can also be less +frequently depending on your requirements. You can also specify specific days of the week, hours in the day, or even +minutes in an hour. + +#### 2. Column Selection + +The **Column Selection**: This defines the column that should be monitored by the Column Assertion. You can choose from +any of the columns from the table listed in the dropdown. Note that columns of struct / object type are not currently supported. + +#### 3. Evaluation Criteria + +The **Evaluation Criteria**: This defines the condition that must be satisfied in order for the Column +Assertion to pass. + +For **Column Value Assertions**, you will be able to choose from a set of operators that can be applied to the column +value. The options presented will vary based on the data type of the selected column. For example, if you've selected a numeric column, you +can verify that the column value is greater than a particular value. For string types, you can check that the column value +matches a particular regex pattern. Additionally, you are able to control the behavior of the check in the presence of NULL values. If the +**Allow Nulls** option is _disabled_, then any null values encountered will be reported as a failure when evaluating the +assertion. If **Allow Nulls** is enabled, then nulls will be ignored; the condition will be evaluated for rows where the column value is non-null. + +For **Column Metric Assertions**, you will be able to choose from a list of common column metrics - MAX, MIN, MEAN, NULL COUNT, etc - and then compare these metric values to an expected value. The list of metrics will vary based on the type of the selected column. For example +if you've selected a numeric column, you can choose to compute the MEAN value of the column, and then assert that it is greater than a +specific number. For string types, you can choose to compute the MAX LENGTH of the string across all column values, and then assert that it +is less than a specific number. + +#### 4. Row Selection Set + +The **Row Selection Set**: This defines which rows in the table the Column Assertion will be evaluated across. You can choose +from the following options: + +- **All Table Rows**: Evaluate the Column Assertion across all rows in the table. This is the default option. Note that +this may not be desirable for large tables. + +- **Only Rows That Have Changed**: Evaluate the Column Assertion only against rows that have changed since the last +evaluation of the assertion. If you choose this option, you will need to specify a **High Watermark Column** to help determine which rows +have changed. A **High Watermark Column** is a column that contains a constantly incrementing value - a date, a time, or +another always-increasing number - that can be used to find the "new rows" that were added since previous evaluation. When selected, a query will be issued to the table to find only the rows that have changed since the previous assertion evaluation. + +## Creating a Column Assertion + +### Prerequisites + +1. **Permissions**: To create or delete Column Assertions for a specific entity on DataHub, you'll need to be granted the + `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + +2. **Data Platform Connection**: In order to create a Column Assertion, you'll need to have an **Ingestion Source** + configured to your Data Platform: Snowflake, BigQuery, or Redshift under the **Ingestion** tab. + +Once these are in place, you're ready to create your Column Assertions! + +### Steps + +1. Navigate to the Table that you want to monitor +2. Click the **Validations** tab + +

+ +

+ +3. Click **+ Create Assertion** + +

+ +

+ +4. Choose **Column** + +5. Configure the evaluation **schedule**. This is the frequency at which the assertion will be evaluated to produce a + pass or fail result, and the times when the column values will be checked. + +6. Configure the **column assertion type**. You can choose from **Column Value** or **Column Metric**. + **Column Value** assertions are used to monitor the value of a specific column in a table, and ensure that every row + adheres to a specific condition. **Column Metric** assertions are used to compute a metric for that column, and then compare the value of that metric to your expectations. + +

+ +

+ +7. Configure the **column selection**. This defines the column that should be monitored by the Column Assertion. + You can choose from any of the columns from the table listed in the dropdown. + +

+ +

+ +8. Configure the **evaluation criteria**. This step varies based on the type of assertion you chose in the previous step. + + - **Column Value Assertions**: You will be able to choose from a set of operators that can be applied to the column + value. The options presented will vary based on the data type of the selected column. For example with numeric types, you + can check that the column value is greater than a specific value. For string types, you can check that the column value + matches a particular regex pattern. You will also be able to control the behavior of null values in the column. If the + **Allow Nulls** option is _disabled_, any null values encountered will be reported as a failure when evaluating the + assertion. + + - **Column Metric Assertions**: You will be able to choose from a list of common metrics and then specify the operator + and value to compare against. The list of metrics will vary based on the data type of the selected column. For example + with numeric types, you can choose to compute the average value of the column, and then assert that it is greater than a + specific number. For string types, you can choose to compute the max length of all column values, and then assert that it + is less than a specific number. + +9. Configure the **row evaluation type**. This defines which rows in the table the Column Assertion should evaluate. You can choose + from the following options: + + - **All Table Rows**: Evaluate the Column Assertion against all rows in the table. This is the default option. Note that + this may not be desirable for large tables. + + - **Only Rows That Have Changed**: Evaluate the Column Assertion only against rows that have changed since the last + evaluation. If you choose this option, you will need to specify a **High Watermark Column** to help determine which rows + have changed. A **High Watermark Column** is a column that contains a constantly-incrementing value - a date, a time, or + another always-increasing number. When selected, a query will be issued to the table find only the rows which have changed since the last assertion run. + +

+ +

+ +10. (Optional) Click **Advanced** to further customize the Column Assertion. The options listed here will vary based on the + type of assertion you chose in the previous step. + + - **Invalid Values Threshold**: For **Column Value** assertions, you can configure the number of invalid values + (i.e. rows) that are allowed to fail before the assertion is marked as failing. This is useful if you want to allow a limited number + of invalid values in the column. By default this is 0, meaning the assertion will fail if any rows have an invalid column value. + + - **Source**: For **Column Metric** assertions, you can choose the mechanism that will be used to obtain the column + metric. **Query** will issue a query to the dataset to compute the metric. **DataHub Dataset Profile** will use the + DataHub Dataset Profile metadata to compute the metric. Note that this option requires that dataset profiling + statistics are up-to-date as of the assertion run time. + + - **Additional Filters**: You can choose to add additional filters to the query that will be used to evaluate the + assertion. This is useful if you want to limit the assertion to a subset of rows in the table. Note this option will not + be available if you choose **DataHub Dataset Profile** as the **source**. + +11. Click **Next** +12. Configure actions that should be taken when the Column Assertion passes or fails + +

+ +

+ +- **Raise incident**: Automatically raise a new DataHub `Column` Incident for the Table whenever the Column Assertion is failing. This + may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when + an incident is created due to an Assertion failure. +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Column Assertion. Note that + any other incidents will not be impacted. + +10. Click **Save**. + +And that's it! DataHub will now begin to monitor your Column Assertion for the table. + +To view the time of the next Column Assertion evaluation, simply click **Column** and then click on your +new Assertion: + +

+ +

+ +Once your assertion has run, you will begin to see Success or Failure status for the Table + +

+ +

+ +## Stopping a Column Assertion + +In order to temporarily stop the evaluation of a Column Assertion: + +1. Navigate to the **Validations** tab of the table with the assertion +2. Click **Column** to open the Column Assertions list +3. Click the three-dot menu on the right side of the assertion you want to disable +4. Click **Stop** + +

+ +

+ +To resume the Column Assertion, simply click **Turn On**. + +

+ +

+ +## Creating Column Assertions via API + +Under the hood, Acryl DataHub implements Column Assertion Monitoring using two "entity" concepts: + +- **Assertion**: The specific expectation for the column metric. e.g. "The value of an integer column is greater than 10 for all rows in the table." This is the "what". + +- **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific + mechanisms. This is the "how". + +Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the +`Edit Assertions` and `Edit Monitors` privileges for it. + +#### GraphQL + +In order to create a Column Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 +GraphQL mutation queries to create a Column Assertion entity and create an Assertion Monitor entity responsible for evaluating it. + +Start by creating the Column Assertion entity using the `createFieldAssertion` query and hang on to the 'urn' field of the Assertion entity +you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. + +##### Examples + +To create a Column Assertion Entity that checks that the value of an integer column is greater than 10: + +```json +mutation createFieldAssertion { + createFieldAssertion( + input: { + entityUrn: "", + type: FIELD_VALUES, + fieldValuesAssertion: { + field: { + path: "", + type: "NUMBER", + nativeType: "NUMBER(38,0)" + }, + operator: GREATER_THAN, + parameters: { + value: { + type: NUMBER, + value: "10" + } + }, + failThreshold: { + type: COUNT, + value: 0 + }, + excludeNulls: true + } + } + ) { + urn +} +} +``` + +To create an Assertion Monitor Entity that evaluates the column assertion every 8 hours using all rows in the table: + +```json +mutation createAssertionMonitor { + createAssertionMonitor( + input: { + entityUrn: "", + assertionUrn: "", + schedule: { + cron: "0 */8 * * *", + timezone: "America/Los_Angeles" + }, + parameters: { + type: DATASET_FIELD, + datasetFieldParameters: { + sourceType: ALL_ROWS_QUERY + } + } + } + ) { + urn + } +} +``` + +This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using a query against all rows of the table). + +After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. + +You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`. + +### Tips + +:::info +**Authorization** + +Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows: + +``` +Authorization: Bearer +``` + +**Exploring GraphQL API** + +Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql` +::: diff --git a/docs/managed-datahub/release-notes/v_0_2_11.md b/docs/managed-datahub/release-notes/v_0_2_11.md index 1f42090848712..c99d10201e097 100644 --- a/docs/managed-datahub/release-notes/v_0_2_11.md +++ b/docs/managed-datahub/release-notes/v_0_2_11.md @@ -7,7 +7,7 @@ Release Availability Date Recommended CLI/SDK --- -- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.10.5.5 +- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0 - [Deprecation] In LDAP ingestor, the manager_pagination_enabled changed to general pagination_enabled If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better. diff --git a/docs/managed-datahub/release-notes/v_0_2_12.md b/docs/managed-datahub/release-notes/v_0_2_12.md new file mode 100644 index 0000000000000..b13f471d9bf63 --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_2_12.md @@ -0,0 +1,30 @@ +# v0.2.12 +--- + +Release Availability Date +--- +13-Oct-2023 + +Recommended CLI/SDK +--- +- `v0.11.0.4` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0.4 +- [breaking] Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now. +- [breaking] Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted by Looker and LookML source connectors. +- [breaking] The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. +- [breaking] Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`. +- [breaking] The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled. +This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future. +If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup. +Otherwise, we recommend soft deleting all databricks data via the DataHub CLI: +`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`. + + +If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better. + + +## Release Changelog +--- +- Since `v0.2.11` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/75252a3d9f6a576904be5a0790d644b9ae2df6ac...10a190470e8c932b6d34cba49de7dbcba687a088 have been pulled in. + +## Some notable features in this SaaS release +- Nested Domains available in this release diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md index db2007e1e19a9..438ddd8823b7e 100644 --- a/docs/ui-ingestion.md +++ b/docs/ui-ingestion.md @@ -1,5 +1,12 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Ingestion + + ## Introduction Starting in version `0.8.25`, DataHub supports creating, configuring, scheduling, & executing batch metadata ingestion using the DataHub user interface. This makes @@ -173,28 +180,29 @@ Finally, give your Ingestion Source a name. Once you're happy with your configurations, click 'Done' to save your changes. -##### Advanced: Running with a specific CLI version +##### Advanced ingestion configs: -DataHub comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible +DataHub's Managed Ingestion UI comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible with the server. However, you can override the default package version using the 'Advanced' source configurations. To do so, simply click 'Advanced', then change the 'CLI Version' text box to contain the exact version of the DataHub CLI you'd like to use. -

_Pinning the CLI version to version `0.8.23.2`_ +Other advanced options include specifying **environment variables**, **DataHub plugins** or **python packages at runtime**. + Once you're happy with your changes, simply click 'Done' to save. You can upload and even update recipes using the cli as mentioned in the [cli documentation for uploading ingestion recipes](./cli.md#ingest-deploy). -An example execution would look something like: +An example execution for a given `recipe.yaml` file, would look something like: ```bash datahub ingest deploy --name "My Test Ingestion Source" --schedule "5 * * * *" --time-zone "UTC" -c recipe.yaml @@ -330,8 +338,8 @@ for the `datahub-actions` container and running `docker logs `. There are valid cases for ingesting metadata without the UI-based ingestion scheduler. For example, - You have written a custom ingestion Source -- Your data sources are not reachable on the network where DataHub is deployed -- Your ingestion source requires context from a local filesystem (e.g. input files, environment variables, etc) +- Your data sources are not reachable on the network where DataHub is deployed. Managed DataHub users can use a [remote executor](managed-datahub/operator-guide/setting-up-remote-ingestion-executor-on-aws.md) for remote UI-based ingestion. +- Your ingestion source requires context from a local filesystem (e.g. input files) - You want to distribute metadata ingestion among multiple producers / environments ### How do I attach policies to the actions pod to give it permissions to pull metadata from various sources? diff --git a/gradle/versioning/versioning.gradle b/gradle/versioning/versioning.gradle index 1fac894d165a8..39a8a3faf8011 100644 --- a/gradle/versioning/versioning.gradle +++ b/gradle/versioning/versioning.gradle @@ -21,7 +21,7 @@ Produces the following variables and supports token replacement import org.apache.tools.ant.filters.ReplaceTokens def detailedVersionString = "0.0.0-unknown-SNAPSHOT" -def cliMajorVersion = "0.10.5" // base default cli major version +def cliMajorVersion = "0.12.0" // base default cli major version def snapshotVersion = false if (project.hasProperty("releaseVersion")) { version = releaseVersion diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java index dfb936c61ee0c..e159993a8a243 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java @@ -11,7 +11,7 @@ public class AuthUtil { public static boolean isAuthorized( @Nonnull Authorizer authorizer, @Nonnull String actor, - @Nonnull Optional maybeResourceSpec, + @Nonnull Optional maybeResourceSpec, @Nonnull DisjunctivePrivilegeGroup privilegeGroup ) { for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) { @@ -27,7 +27,7 @@ public static boolean isAuthorized( public static boolean isAuthorizedForResources( @Nonnull Authorizer authorizer, @Nonnull String actor, - @Nonnull List> resourceSpecs, + @Nonnull List> resourceSpecs, @Nonnull DisjunctivePrivilegeGroup privilegeGroup ) { for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) { @@ -44,7 +44,7 @@ private static boolean isAuthorized( @Nonnull Authorizer authorizer, @Nonnull String actor, @Nonnull ConjunctivePrivilegeGroup requiredPrivileges, - @Nonnull Optional resourceSpec) { + @Nonnull Optional resourceSpec) { // Each privilege in a group _must_ all be true to permit the operation. for (final String privilege : requiredPrivileges.getRequiredPrivileges()) { // Create and evaluate an Authorization request. @@ -62,11 +62,11 @@ private static boolean isAuthorizedForResources( @Nonnull Authorizer authorizer, @Nonnull String actor, @Nonnull ConjunctivePrivilegeGroup requiredPrivileges, - @Nonnull List> resourceSpecs) { + @Nonnull List> resourceSpecs) { // Each privilege in a group _must_ all be true to permit the operation. for (final String privilege : requiredPrivileges.getRequiredPrivileges()) { // Create and evaluate an Authorization request. - for (Optional resourceSpec : resourceSpecs) { + for (Optional resourceSpec : resourceSpecs) { final AuthorizationRequest request = new AuthorizationRequest(actor, privilege, resourceSpec); final AuthorizationResult result = authorizer.authorize(request); if (AuthorizationResult.Type.DENY.equals(result.getType())) { diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java index 084a455495551..9e75de3cbf44d 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java @@ -21,5 +21,5 @@ public class AuthorizationRequest { * The resource that the user is requesting for, if applicable. If the privilege is a platform privilege * this optional will be empty. */ - Optional resourceSpec; + Optional resourceSpec; } diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java index f9940d171d5d4..b79a4fa20c7ea 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java @@ -18,9 +18,9 @@ public class AuthorizerContext { private final Map contextMap; /** - * A utility for resolving a {@link ResourceSpec} to resolved resource field values. + * A utility for resolving an {@link EntitySpec} to resolved entity field values. */ - private ResourceSpecResolver resourceSpecResolver; + private EntitySpecResolver entitySpecResolver; /** * diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java new file mode 100644 index 0000000000000..1258d958f2092 --- /dev/null +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java @@ -0,0 +1,44 @@ +package com.datahub.authorization; + +/** + * List of entity field types to fetch for a given entity + */ +public enum EntityFieldType { + + /** + * Type of the entity (e.g. dataset, chart) + * @deprecated + */ + @Deprecated + RESOURCE_URN, + /** + * Urn of the entity + * @deprecated + */ + @Deprecated + RESOURCE_TYPE, + /** + * Type of the entity (e.g. dataset, chart) + */ + TYPE, + /** + * Urn of the entity + */ + URN, + /** + * Owners of the entity + */ + OWNER, + /** + * Domains of the entity + */ + DOMAIN, + /** + * Groups of which the entity (only applies to corpUser) is a member + */ + GROUP_MEMBERSHIP, + /** + * Data platform instance of resource + */ + DATA_PLATFORM_INSTANCE +} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java new file mode 100644 index 0000000000000..656bec0f44fc2 --- /dev/null +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java @@ -0,0 +1,23 @@ +package com.datahub.authorization; + +import javax.annotation.Nonnull; +import lombok.Value; + + +/** + * Details about the entities involved in the authorization process. It models the actor and the resource being acted + * upon. Resource types currently supported can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig} + */ +@Value +public class EntitySpec { + /** + * The entity type. (dataset, chart, dashboard, corpGroup, etc). + */ + @Nonnull + String type; + /** + * The entity identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId) + */ + @Nonnull + String entity; +} \ No newline at end of file diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java new file mode 100644 index 0000000000000..67347fbf87a87 --- /dev/null +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java @@ -0,0 +1,11 @@ +package com.datahub.authorization; + +/** + * An Entity Spec Resolver is responsible for resolving a {@link EntitySpec} to a {@link ResolvedEntitySpec}. + */ +public interface EntitySpecResolver { + /** + Resolve a {@link EntitySpec} to a resolved entity spec. + **/ + ResolvedEntitySpec resolve(EntitySpec entitySpec); +} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java index 9318f5f8e7b96..955a06fd54cb9 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java @@ -33,9 +33,9 @@ public static FieldResolver getResolverFromValues(Set values) { /** * Helper function that returns FieldResolver given a fetchFieldValue function */ - public static FieldResolver getResolverFromFunction(ResourceSpec resourceSpec, - Function fetchFieldValue) { - return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(resourceSpec))); + public static FieldResolver getResolverFromFunction(EntitySpec entitySpec, + Function fetchFieldValue) { + return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(entitySpec))); } public static FieldValue emptyFieldValue() { diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java new file mode 100644 index 0000000000000..7948766df5715 --- /dev/null +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java @@ -0,0 +1,66 @@ +package com.datahub.authorization; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + + +/** + * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type + */ +@RequiredArgsConstructor +@ToString +public class ResolvedEntitySpec { + @Getter + private final EntitySpec spec; + private final Map fieldResolvers; + + public Set getFieldValues(EntityFieldType entityFieldType) { + if (!fieldResolvers.containsKey(entityFieldType)) { + return Collections.emptySet(); + } + return fieldResolvers.get(entityFieldType).getFieldValuesFuture().join().getValues(); + } + + /** + * Fetch the owners for an entity. + * @return a set of owner urns, or empty set if none exist. + */ + public Set getOwners() { + if (!fieldResolvers.containsKey(EntityFieldType.OWNER)) { + return Collections.emptySet(); + } + return fieldResolvers.get(EntityFieldType.OWNER).getFieldValuesFuture().join().getValues(); + } + + /** + * Fetch the platform instance for a Resolved Resource Spec + * @return a Platform Instance or null if one does not exist. + */ + @Nullable + public String getDataPlatformInstance() { + if (!fieldResolvers.containsKey(EntityFieldType.DATA_PLATFORM_INSTANCE)) { + return null; + } + Set dataPlatformInstance = fieldResolvers.get(EntityFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues(); + if (dataPlatformInstance.size() > 0) { + return dataPlatformInstance.stream().findFirst().get(); + } + return null; + } + + /** + * Fetch the group membership for an entity. + * @return a set of groups urns, or empty set if none exist. + */ + public Set getGroupMembership() { + if (!fieldResolvers.containsKey(EntityFieldType.GROUP_MEMBERSHIP)) { + return Collections.emptySet(); + } + return fieldResolvers.get(EntityFieldType.GROUP_MEMBERSHIP).getFieldValuesFuture().join().getValues(); + } +} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java deleted file mode 100644 index 53dd0be44f963..0000000000000 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.datahub.authorization; - -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; - - -/** - * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type - */ -@RequiredArgsConstructor -@ToString -public class ResolvedResourceSpec { - @Getter - private final ResourceSpec spec; - private final Map fieldResolvers; - - public Set getFieldValues(ResourceFieldType resourceFieldType) { - if (!fieldResolvers.containsKey(resourceFieldType)) { - return Collections.emptySet(); - } - return fieldResolvers.get(resourceFieldType).getFieldValuesFuture().join().getValues(); - } - - /** - * Fetch the owners for a resource. - * @return a set of owner urns, or empty set if none exist. - */ - public Set getOwners() { - if (!fieldResolvers.containsKey(ResourceFieldType.OWNER)) { - return Collections.emptySet(); - } - return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues(); - } -} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java deleted file mode 100644 index ee54d2bfbba1d..0000000000000 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datahub.authorization; - -/** - * List of resource field types to fetch for a given resource - */ -public enum ResourceFieldType { - /** - * Type of resource (e.g. dataset, chart) - */ - RESOURCE_TYPE, - /** - * Urn of resource - */ - RESOURCE_URN, - /** - * Owners of resource - */ - OWNER, - /** - * Domains of resource - */ - DOMAIN -} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java deleted file mode 100644 index c1bd53e31fe29..0000000000000 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datahub.authorization; - -import javax.annotation.Nonnull; -import lombok.Value; - - -/** - * Details about a specific resource being acted upon. Resource types currently supported - * can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig} - */ -@Value -public class ResourceSpec { - /** - * The resource type. Most often, this corresponds to the entity type. (dataset, chart, dashboard, corpGroup, etc). - */ - @Nonnull - String type; - /** - * The resource identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId) - */ - @Nonnull - String resource; -} \ No newline at end of file diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java deleted file mode 100644 index 05c35f377b9a9..0000000000000 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datahub.authorization; - -/** - * A Resource Spec Resolver is responsible for resolving a {@link ResourceSpec} to a {@link ResolvedResourceSpec}. - */ -public interface ResourceSpecResolver { - /** - Resolve a {@link ResourceSpec} to a resolved resource spec. - **/ - ResolvedResourceSpec resolve(ResourceSpec resourceSpec); -} diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java index ce7a3f22b3147..c731a3ec987c1 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java @@ -4,7 +4,7 @@ import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.AuthorizedActors; import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.Plugin; import java.util.Map; import java.util.Optional; @@ -32,5 +32,5 @@ public interface Authorizer extends Plugin { * Retrieves the current list of actors authorized to for a particular privilege against * an optional resource */ - AuthorizedActors authorizedActors(final String privilege, final Optional resourceSpec); + AuthorizedActors authorizedActors(final String privilege, final Optional resourceSpec); } diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle index 393b10b0e9d24..bc3415b2ccc8c 100644 --- a/metadata-dao-impl/kafka-producer/build.gradle +++ b/metadata-dao-impl/kafka-producer/build.gradle @@ -1,9 +1,9 @@ apply plugin: 'java' dependencies { - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-events:mxe-registration') - implementation project(':metadata-events:mxe-utils-avro-1.7') + implementation project(':metadata-events:mxe-utils-avro') implementation project(':entity-registry') implementation project(':metadata-io') diff --git a/metadata-events/mxe-avro-1.7/.gitignore b/metadata-events/mxe-avro/.gitignore similarity index 100% rename from metadata-events/mxe-avro-1.7/.gitignore rename to metadata-events/mxe-avro/.gitignore diff --git a/metadata-events/mxe-avro-1.7/build.gradle b/metadata-events/mxe-avro/build.gradle similarity index 81% rename from metadata-events/mxe-avro-1.7/build.gradle rename to metadata-events/mxe-avro/build.gradle index 8c0a26d22dc7d..9d11eeb160ff0 100644 --- a/metadata-events/mxe-avro-1.7/build.gradle +++ b/metadata-events/mxe-avro/build.gradle @@ -6,8 +6,8 @@ apply plugin: 'io.acryl.gradle.plugin.avro' apply plugin: 'java-library' dependencies { - api externalDependency.avro_1_7 - implementation(externalDependency.avroCompiler_1_7) { + api externalDependency.avro + implementation(externalDependency.avroCompiler) { exclude group: 'org.apache.velocity', module: 'velocity' } constraints { @@ -21,7 +21,7 @@ dependencies { def genDir = file("src/generated/java") -task avroCodeGen(type: com.commercehub.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) { +task avroCodeGen(type: com.github.davidmc24.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) { source("$rootDir/metadata-events/mxe-schemas/src/renamed/avro") outputDir = genDir dependsOn(':metadata-events:mxe-schemas:renameNamespace') diff --git a/metadata-events/mxe-registration/build.gradle b/metadata-events/mxe-registration/build.gradle index 60e0da59616d9..032870d93329f 100644 --- a/metadata-events/mxe-registration/build.gradle +++ b/metadata-events/mxe-registration/build.gradle @@ -5,7 +5,7 @@ configurations { } dependencies { - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-models') implementation spec.product.pegasus.dataAvro1_6 diff --git a/metadata-events/mxe-schemas/build.gradle b/metadata-events/mxe-schemas/build.gradle index fe46601fb68b7..8dc8b71bd1cd8 100644 --- a/metadata-events/mxe-schemas/build.gradle +++ b/metadata-events/mxe-schemas/build.gradle @@ -1,4 +1,4 @@ -apply plugin: 'java' +apply plugin: 'java-library' apply plugin: 'pegasus' dependencies { diff --git a/metadata-events/mxe-utils-avro-1.7/.gitignore b/metadata-events/mxe-utils-avro/.gitignore similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/.gitignore rename to metadata-events/mxe-utils-avro/.gitignore diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro/build.gradle similarity index 95% rename from metadata-events/mxe-utils-avro-1.7/build.gradle rename to metadata-events/mxe-utils-avro/build.gradle index 3b137965d6c19..a7bf287ab224d 100644 --- a/metadata-events/mxe-utils-avro-1.7/build.gradle +++ b/metadata-events/mxe-utils-avro/build.gradle @@ -1,7 +1,7 @@ apply plugin: 'java-library' dependencies { - api project(':metadata-events:mxe-avro-1.7') + api project(':metadata-events:mxe-avro') api project(':metadata-models') api spec.product.pegasus.dataAvro1_6 diff --git a/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java rename to metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java b/metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java rename to metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json similarity index 100% rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index 16585f70e820b..e1d53be7bae6b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -98,7 +98,7 @@ def _get_dependencies( # It is possible to tie an external sensor to DAG if external_task_id is omitted but currently we can't tie # jobflow to anothet jobflow. external_task_upstreams = [] - if task.task_type == "ExternalTaskSensor": + if isinstance(task, ExternalTaskSensor): task = cast(ExternalTaskSensor, task) if hasattr(task, "external_task_id") and task.external_task_id is not None: external_task_upstreams = [ @@ -155,6 +155,8 @@ def generate_dataflow( "_concurrency", # "_default_view", "catchup", + "description", + "doc_md", "fileloc", "is_paused_upon_creation", "start_date", @@ -431,6 +433,9 @@ def run_datajob( job_property_bag["operator"] = str(ti.operator) job_property_bag["priority_weight"] = str(ti.priority_weight) job_property_bag["log_url"] = ti.log_url + job_property_bag["orchestrator"] = "airflow" + job_property_bag["dag_id"] = str(dag.dag_id) + job_property_bag["task_id"] = str(ti.task_id) dpi.properties.update(job_property_bag) dpi.url = ti.log_url diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index 046fbb5efaa03..f9a2119f51e32 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -23,6 +23,7 @@ TASK_ON_FAILURE_CALLBACK = "on_failure_callback" TASK_ON_SUCCESS_CALLBACK = "on_success_callback" +TASK_ON_RETRY_CALLBACK = "on_retry_callback" def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]: @@ -259,6 +260,28 @@ def custom_on_success_callback(context): return custom_on_success_callback +def _wrap_on_retry_callback(on_retry_callback): + def custom_on_retry_callback(context): + config = get_lineage_config() + if config.enabled: + context["_datahub_config"] = config + try: + datahub_task_status_callback( + context, status=InstanceRunResult.UP_FOR_RETRY + ) + except Exception as e: + if not config.graceful_exceptions: + raise e + else: + print(f"Exception: {traceback.format_exc()}") + + # Call original policy + if on_retry_callback: + on_retry_callback(context) + + return custom_on_retry_callback + + def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}") # task.add_inlets(["auto"]) @@ -274,7 +297,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: on_success_callback_prop: property = getattr( MappedOperator, TASK_ON_SUCCESS_CALLBACK ) - if not on_failure_callback_prop.fset or not on_success_callback_prop.fset: + on_retry_callback_prop: property = getattr( + MappedOperator, TASK_ON_RETRY_CALLBACK + ) + if ( + not on_failure_callback_prop.fset + or not on_success_callback_prop.fset + or not on_retry_callback_prop.fset + ): task.log.debug( "Using MappedOperator's partial_kwargs instead of callback properties" ) @@ -284,10 +314,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback( task.on_success_callback ) + task.partial_kwargs[TASK_ON_RETRY_CALLBACK] = _wrap_on_retry_callback( + task.on_retry_callback + ) return task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback) # type: ignore task.on_success_callback = _wrap_on_success_callback(task.on_success_callback) # type: ignore + task.on_retry_callback = _wrap_on_retry_callback(task.on_retry_callback) # type: ignore # task.pre_execute = _wrap_pre_execution(task.pre_execute) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json index 26aa2afaa831a..a4c17c73e9c7e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -373,7 +375,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json index b2e3a1fe47da7..a0a95716a0993 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -302,7 +304,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag", "name": "simple_dag_task_1_manual_run_test", @@ -433,6 +438,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -654,7 +661,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json index 2e733c2ad40a9..1974f1f085df0 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -224,7 +226,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json index 44b288efda954..d02951bc9e82d 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -224,7 +226,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json index 454c509279e11..9acc47ec1321e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -189,7 +191,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", "name": "simple_dag_task_1_manual_run_test", @@ -523,7 +528,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json index 73b5765e96b7d..03299c483f57f 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -189,7 +191,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", "name": "simple_dag_task_1_manual_run_test", @@ -435,6 +440,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -579,7 +586,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json index affc395d421da..11a0b17b45b95 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -234,7 +236,10 @@ "state": "running", "operator": "SnowflakeOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "snowflake_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1", "name": "snowflake_operator_transform_cost_table_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index 1a32b38ce055d..19e4aac9fb95e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -74,9 +76,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -98,7 +98,44 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -157,7 +194,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:24.632190+00:00", + "start_date": "2023-10-15 20:29:10.262813+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -166,13 +203,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "5", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "create_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_create_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056984632, + "time": 1697401750262, "actor": "urn:li:corpuser:datahub" } } @@ -221,7 +261,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056984632, + "timestampMillis": 1697401750262, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -251,9 +291,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -275,7 +313,80 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -331,7 +442,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056984947, + "timestampMillis": 1697401750651, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -447,7 +558,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:28.605901+00:00", + "start_date": "2023-10-15 20:29:15.013834+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -456,13 +567,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "4", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "populate_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_populate_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056988605, + "time": 1697401755013, "actor": "urn:li:corpuser:datahub" } } @@ -511,7 +625,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056988605, + "timestampMillis": 1697401755013, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -621,7 +735,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056989098, + "timestampMillis": 1697401755600, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -807,7 +921,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:32.888165+00:00", + "start_date": "2023-10-15 20:29:20.216818+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -816,13 +930,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "3", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_transform_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056992888, + "time": 1697401760216, "actor": "urn:li:corpuser:datahub" } } @@ -895,7 +1012,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056992888, + "timestampMillis": 1697401760216, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1131,7 +1248,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056993744, + "timestampMillis": 1697401761237, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1249,7 +1366,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:37.745717+00:00", + "start_date": "2023-10-15 20:29:26.243934+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1258,13 +1375,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056997745, + "time": 1697401766243, "actor": "urn:li:corpuser:datahub" } } @@ -1313,7 +1433,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056997745, + "timestampMillis": 1697401766243, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1425,7 +1545,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056998672, + "timestampMillis": 1697401767373, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1543,7 +1663,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:42.645806+00:00", + "start_date": "2023-10-15 20:29:32.075613+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1552,13 +1672,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_processed_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057002645, + "time": 1697401772075, "actor": "urn:li:corpuser:datahub" } } @@ -1607,7 +1730,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057002645, + "timestampMillis": 1697401772075, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1719,7 +1842,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057003759, + "timestampMillis": 1697401773454, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index c082be693e30c..b67464b385335 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -74,9 +76,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -98,7 +98,44 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -157,7 +194,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:45.832554+00:00", + "start_date": "2023-10-15 20:27:26.883178+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -166,13 +203,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "5", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "create_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_create_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057245832, + "time": 1697401646883, "actor": "urn:li:corpuser:datahub" } } @@ -221,7 +261,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057245832, + "timestampMillis": 1697401646883, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -251,9 +291,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -275,7 +313,80 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -331,7 +442,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057246734, + "timestampMillis": 1697401647826, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -354,6 +465,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -502,7 +615,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:49.653938+00:00", + "start_date": "2023-10-15 20:27:31.398799+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -511,13 +624,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "4", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "populate_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_populate_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057249653, + "time": 1697401651398, "actor": "urn:li:corpuser:datahub" } } @@ -566,7 +682,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057249653, + "timestampMillis": 1697401651398, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -676,7 +792,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057250831, + "timestampMillis": 1697401652651, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -699,6 +815,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -917,7 +1035,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:53.989264+00:00", + "start_date": "2023-10-15 20:27:37.697995+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -926,13 +1044,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "3", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_transform_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057253989, + "time": 1697401657697, "actor": "urn:li:corpuser:datahub" } } @@ -1005,7 +1126,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057253989, + "timestampMillis": 1697401657697, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1241,7 +1362,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057255628, + "timestampMillis": 1697401659496, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1264,6 +1385,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -1414,7 +1537,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:01:00.421177+00:00", + "start_date": "2023-10-15 20:27:45.670215+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1423,13 +1546,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057260421, + "time": 1697401665670, "actor": "urn:li:corpuser:datahub" } } @@ -1478,7 +1604,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057260421, + "timestampMillis": 1697401665670, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1590,7 +1716,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057262258, + "timestampMillis": 1697401667670, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1613,6 +1739,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -1763,7 +1891,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:01:05.540192+00:00", + "start_date": "2023-10-15 20:27:51.559194+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1772,13 +1900,16 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_processed_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057265540, + "time": 1697401671559, "actor": "urn:li:corpuser:datahub" } } @@ -1827,7 +1958,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057265540, + "timestampMillis": 1697401671559, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1939,7 +2070,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057267631, + "timestampMillis": 1697401673788, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md index e4fc950a7cdbd..a0930102c6827 100644 --- a/metadata-ingestion/adding-source.md +++ b/metadata-ingestion/adding-source.md @@ -62,7 +62,7 @@ Some sources use the default `SourceReport` class, but others inherit and extend ### 3. Implement the source itself -The core for the source is the `get_workunits` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit. +The core for the source is the `get_workunits_internal` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit. The [file source](./src/datahub/ingestion/source/file.py) is a good and simple example. The MetadataChangeEventClass is defined in the metadata models which are generated diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md new file mode 100644 index 0000000000000..77cc2f456aa2d --- /dev/null +++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md @@ -0,0 +1,94 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Profiling ingestions + + + +**🤝 Version compatibility** +> Open Source DataHub: **0.11.1** | Acryl: **0.2.12** + +This page documents how to perform memory profiles of ingestion runs. +It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources. + +## How to use + + + + +Create an ingestion as specified in the [Ingestion guide](../../../docs/ui-ingestion.md). + +Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion: +```yaml +source: + ... + +sink: + ... + +flags: + generate_memory_profiles: "" +``` + +In the final panel, under the advanced section, add the `debug` datahub package under the **Extra DataHub Plugins** section. +As seen below: + +

+ +

+ +Finally, save and run the ingestion process. + +
+ +Install the `debug` plugin for DataHub's CLI wherever the ingestion runs: + +```bash +pip install 'acryl-datahub[debug]' +``` + +This will install [memray](https://github.com/bloomberg/memray) in your python environment. + +Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion: +```yaml +source: + ... + +sink: + ... + +flags: + generate_memory_profiles: "" +``` + +Finally run the ingestion recipe + +```bash +$ datahub ingest -c recipe.yaml +``` + + +
+ + +Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion. + +These files follow the pattern `file-.bin` for a unique identification. +Once the ingestion has finished you can use `memray` to analyze the memory dump in a flamegraph view using: + +```$ memray flamegraph file-None-file-2023_09_18-21_38_43.bin``` + +This will generate an interactive HTML file for analysis: + +

+ +

+ + +`memray` has an extensive set of features for memory investigation. Take a look at their [documentation](https://bloomberg.github.io/memray/overview.html) to see the full feature set. + + +## Questions + +If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://slack.datahubproject.io/)! diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md new file mode 100644 index 0000000000000..7263a59f5ea3d --- /dev/null +++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md @@ -0,0 +1,28 @@ +### Prerequisites +1. Create a user which has access to the database you want to ingest. + ```sql + CREATE USER datahub FROM AS PASSWORD = PERM = 20000000; + ``` +2. Create a user with the following privileges: + ```sql + GRANT SELECT ON dbc.columns TO datahub; + GRANT SELECT ON dbc.databases TO datahub; + GRANT SELECT ON dbc.tables TO datahub; + GRANT SELECT ON DBC.All_RI_ChildrenV TO datahub; + GRANT SELECT ON DBC.ColumnsV TO datahub; + GRANT SELECT ON DBC.IndicesV TO datahub; + GRANT SELECT ON dbc.TableTextV TO datahub; + GRANT SELECT ON dbc.TablesV TO datahub; + GRANT SELECT ON dbc.dbqlogtbl TO datahub; -- if lineage or usage extraction is enabled + ``` + + If you want to run profiling, you need to grant select permission on all the tables you want to profile. + +3. If lineage or usage extraction is enabled, please, check if query logging is enabled and it is set to size which +will fit for your queries (the default query text size Teradata captures is max 200 chars) + An example how you can set it for all users: + ```sql + REPLACE QUERY LOGGING LIMIT SQLTEXT=2000 ON ALL; + ``` + See more here about query logging: + [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs]() diff --git a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml new file mode 100644 index 0000000000000..cc94de20110fe --- /dev/null +++ b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml @@ -0,0 +1,16 @@ +pipeline_name: my-teradata-ingestion-pipeline +source: + type: teradata + config: + host_port: "myteradatainstance.teradata.com:1025" + username: myuser + password: mypassword + #database_pattern: + # allow: + # - "my_database" + # ignoreCase: true + include_table_lineage: true + include_usage_statistics: true + stateful_ingestion: + enabled: true +sink: diff --git a/metadata-ingestion/examples/library/create_dataproduct.py b/metadata-ingestion/examples/library/create_dataproduct.py new file mode 100644 index 0000000000000..245395b602480 --- /dev/null +++ b/metadata-ingestion/examples/library/create_dataproduct.py @@ -0,0 +1,25 @@ +from datahub.api.entities.dataproduct.dataproduct import DataProduct +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + +data_product = DataProduct( + id="pet_of_the_week", + display_name="Pet of the Week Campagin", + domain="urn:li:domain:ef39e99a-9d61-406d-b4a8-c70b16380206", + description="This campaign includes Pet of the Week data.", + assets=[ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)", + "urn:li:dashboard:(looker,baz)", + "urn:li:dataFlow:(airflow,dag_abc,PROD)", + ], + owners=[{"id": "urn:li:corpuser:jdoe", "type": "BUSINESS_OWNER"}], + terms=["urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance"], + tags=["urn:li:tag:adoption"], + properties={"lifecycle": "production", "sla": "7am every day"}, + external_url="https://en.wikipedia.org/wiki/Sloth", +) + +for mcp in data_product.generate_mcp(upsert=False): + graph.emit(mcp) diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index a9b9b4b20f5ac..021ebd4a31eb3 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -152,7 +152,8 @@ def add_name(self, name_attr, space_attr, new_schema): return encoded -autogen_header = """# flake8: noqa +autogen_header = """# mypy: ignore-errors +# flake8: noqa # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py # Do not modify manually! diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index ffa80515dbafd..81b26145e620c 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -351,8 +351,8 @@ def strip_types(field_path: str) -> str: field_objects = [] for f in entity_fields: field = avro.schema.Field( - type=f["type"], - name=f["name"], + f["type"], + f["name"], has_default=False, ) field_objects.append(field) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index fe8e3be4632c4..7f7826abe2095 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -32,13 +32,12 @@ "expandvars>=0.6.5", "avro-gen3==0.7.11", # "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3", - "avro>=1.10.2,<1.11", + "avro>=1.11.3,<1.12", "python-dateutil>=2.8.0", "tabulate", "progressbar2", "termcolor>=1.0.0", "psutil>=5.8.0", - "ratelimiter", "Deprecated", "humanfriendly", "packaging", @@ -102,22 +101,36 @@ "grpcio-tools>=1.44.0,<2", } -sql_common = { - # Required for all SQL sources. - # This is temporary lower bound that we're open to loosening/tightening as requirements show up - "sqlalchemy>=1.4.39, <2", - # Required for SQL profiling. - "great-expectations>=0.15.12, <=0.15.50", - # scipy version restricted to reduce backtracking, used by great-expectations, - "scipy>=1.7.2", - # GE added handling for higher version of jinja2 - # https://github.com/great-expectations/great_expectations/pull/5382/files - # datahub does not depend on traitlets directly but great expectations does. - # https://github.com/ipython/traitlets/issues/741 - "traitlets<5.2.2", - "greenlet", +usage_common = { + "sqlparse", +} + +sqlglot_lib = { + # Using an Acryl fork of sqlglot. + # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 + "acryl-sqlglot==18.5.2.dev45", } +sql_common = ( + { + # Required for all SQL sources. + # This is temporary lower bound that we're open to loosening/tightening as requirements show up + "sqlalchemy>=1.4.39, <2", + # Required for SQL profiling. + "great-expectations>=0.15.12, <=0.15.50", + # scipy version restricted to reduce backtracking, used by great-expectations, + "scipy>=1.7.2", + # GE added handling for higher version of jinja2 + # https://github.com/great-expectations/great_expectations/pull/5382/files + # datahub does not depend on traitlets directly but great expectations does. + # https://github.com/ipython/traitlets/issues/741 + "traitlets<5.2.2", + "greenlet", + } + | usage_common + | sqlglot_lib +) + sqllineage_lib = { "sqllineage==1.3.8", # We don't have a direct dependency on sqlparse but it is a dependency of sqllineage. @@ -126,12 +139,6 @@ "sqlparse==0.4.4", } -sqlglot_lib = { - # Using an Acryl fork of sqlglot. - # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==18.5.2.dev45", -} - aws_common = { # AWS Python SDK "boto3", @@ -244,10 +251,6 @@ powerbi_report_server = {"requests", "requests_ntlm"} -usage_common = { - "sqlparse", -} - databricks = { # 0.1.11 appears to have authentication issues with azure databricks "databricks-sdk>=0.9.0", @@ -281,8 +284,10 @@ # Misc plugins. "sql-parser": sqlglot_lib, # Source plugins - # PyAthena is pinned with exact version because we use private method in PyAthena - "athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"}, + # sqlalchemy-bigquery is included here since it provides an implementation of + # a SQLalchemy-conform STRUCT type definition + "athena": sql_common + | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"}, "azure-ad": set(), "bigquery": sql_common | bigquery_common @@ -354,9 +359,13 @@ | {"psycopg2-binary", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib, - "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"}, - "redshift-legacy": sql_common | redshift_common, - "redshift-usage-legacy": sql_common | usage_common | redshift_common, + "redshift": sql_common + | redshift_common + | usage_common + | {"redshift-connector"} + | sqlglot_lib, + "redshift-legacy": sql_common | redshift_common | sqlglot_lib, + "redshift-usage-legacy": sql_common | redshift_common | sqlglot_lib | usage_common, "s3": {*s3_base, *data_lake_profiling}, "gcs": {*s3_base, *data_lake_profiling}, "sagemaker": aws_common, @@ -373,12 +382,16 @@ # FIXME: I don't think tableau uses sqllineage anymore so we should be able # to remove that dependency. "tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib, + "teradata": sql_common + | usage_common + | sqlglot_lib + | {"teradatasqlalchemy>=17.20.0.0"}, "trino": sql_common | trino, "starburst-trino-usage": sql_common | usage_common | trino, "nifi": {"requests", "packaging", "requests-gssapi"}, "powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"} | sqlglot_lib, "powerbi-report-server": powerbi_report_server, - "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8"}, + "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"}, "unity-catalog": databricks | sqllineage_lib, } @@ -431,6 +444,10 @@ deepdiff_dep = "deepdiff" test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"} +debug_requirements = { + "memray", +} + base_dev_requirements = { *base_requirements, *framework_common, @@ -495,6 +512,7 @@ "s3", "snowflake", "tableau", + "teradata", "trino", "hive", "starburst-trino-usage", @@ -593,6 +611,7 @@ "tableau = datahub.ingestion.source.tableau:TableauSource", "openapi = datahub.ingestion.source.openapi:OpenApiSource", "metabase = datahub.ingestion.source.metabase:MetabaseSource", + "teradata = datahub.ingestion.source.sql.teradata:TeradataSource", "trino = datahub.ingestion.source.sql.trino:TrinoSource", "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource", "nifi = datahub.ingestion.source.nifi:NifiSource", @@ -660,6 +679,7 @@ "Documentation": "https://datahubproject.io/docs/", "Source": "https://github.com/datahub-project/datahub", "Changelog": "https://github.com/datahub-project/datahub/releases", + "Releases": "https://github.com/acryldata/datahub/releases", }, license="Apache License 2.0", description="A CLI to work with DataHub metadata", @@ -723,5 +743,6 @@ "dev": list(dev_requirements), "testing-utils": list(test_api_requirements), # To import `datahub.testing` "integration-tests": list(full_test_dev_requirements), + "debug": list(debug_requirements), }, ) diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py new file mode 100644 index 0000000000000..c45d4ddc92458 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py @@ -0,0 +1,7 @@ +from typing import Optional + +from datahub.configuration import ConfigModel + + +class BaseAssertion(ConfigModel): + description: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py new file mode 100644 index 0000000000000..a41b0f7aafd9f --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py @@ -0,0 +1,162 @@ +from typing import Optional, Union + +from typing_extensions import Literal, Protocol + +from datahub.configuration import ConfigModel +from datahub.metadata.schema_classes import ( + AssertionStdOperatorClass, + AssertionStdParameterClass, + AssertionStdParametersClass, + AssertionStdParameterTypeClass, +) + + +class Operator(Protocol): + """Specification for an assertion operator. + + This class exists only for documentation (not used in typing checking). + """ + + operator: str + + def id(self) -> str: + ... + + def generate_parameters(self) -> AssertionStdParametersClass: + ... + + +def _generate_assertion_std_parameter( + value: Union[str, int, float] +) -> AssertionStdParameterClass: + if isinstance(value, str): + return AssertionStdParameterClass( + value=value, type=AssertionStdParameterTypeClass.STRING + ) + elif isinstance(value, (int, float)): + return AssertionStdParameterClass( + value=str(value), type=AssertionStdParameterTypeClass.NUMBER + ) + else: + raise ValueError( + f"Unsupported assertion parameter {value} of type {type(value)}" + ) + + +Param = Union[str, int, float] + + +def _generate_assertion_std_parameters( + value: Optional[Param] = None, + min_value: Optional[Param] = None, + max_value: Optional[Param] = None, +) -> AssertionStdParametersClass: + return AssertionStdParametersClass( + value=_generate_assertion_std_parameter(value) if value else None, + minValue=_generate_assertion_std_parameter(min_value) if min_value else None, + maxValue=_generate_assertion_std_parameter(max_value) if max_value else None, + ) + + +class EqualToOperator(ConfigModel): + type: Literal["equal_to"] + value: Union[str, int, float] + + operator: str = AssertionStdOperatorClass.EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class BetweenOperator(ConfigModel): + type: Literal["between"] + min: Union[int, float] + max: Union[int, float] + + operator: str = AssertionStdOperatorClass.BETWEEN + + def id(self) -> str: + return f"{self.type}-{self.min}-{self.max}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters( + min_value=self.min, max_value=self.max + ) + + +class LessThanOperator(ConfigModel): + type: Literal["less_than"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.LESS_THAN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class GreaterThanOperator(ConfigModel): + type: Literal["greater_than"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.GREATER_THAN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class LessThanOrEqualToOperator(ConfigModel): + type: Literal["less_than_or_equal_to"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class GreaterThanOrEqualToOperator(ConfigModel): + type: Literal["greater_than_or_equal_to"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class NotNullOperator(ConfigModel): + type: Literal["not_null"] + + operator: str = AssertionStdOperatorClass.NOT_NULL + + def id(self) -> str: + return f"{self.type}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters() + + +Operators = Union[ + EqualToOperator, + BetweenOperator, + LessThanOperator, + LessThanOrEqualToOperator, + GreaterThanOperator, + GreaterThanOrEqualToOperator, + NotNullOperator, +] diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py index a665e95e93c43..6a3944ba36baf 100644 --- a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py @@ -4,6 +4,8 @@ from typing_extensions import Literal import datahub.emitter.mce_builder as builder +from datahub.api.entities.datacontract.assertion import BaseAssertion +from datahub.api.entities.datacontract.assertion_operator import Operators from datahub.configuration.common import ConfigModel from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.schema_classes import ( @@ -14,12 +16,15 @@ AssertionStdParametersClass, AssertionStdParameterTypeClass, AssertionTypeClass, + AssertionValueChangeTypeClass, DatasetAssertionInfoClass, DatasetAssertionScopeClass, + SqlAssertionInfoClass, + SqlAssertionTypeClass, ) -class IdConfigMixin(ConfigModel): +class IdConfigMixin(BaseAssertion): id_raw: Optional[str] = pydantic.Field( default=None, alias="id", @@ -30,25 +35,32 @@ def generate_default_id(self) -> str: raise NotImplementedError -class CustomSQLAssertion(IdConfigMixin, ConfigModel): +class CustomSQLAssertion(IdConfigMixin, BaseAssertion): type: Literal["custom_sql"] - sql: str + operator: Operators = pydantic.Field(discriminator="type") - def generate_dataset_assertion_info( - self, entity_urn: str - ) -> DatasetAssertionInfoClass: - return DatasetAssertionInfoClass( - dataset=entity_urn, - scope=DatasetAssertionScopeClass.UNKNOWN, - fields=[], - operator=AssertionStdOperatorClass._NATIVE_, - aggregation=AssertionStdAggregationClass._NATIVE_, - logic=self.sql, + def generate_default_id(self) -> str: + return f"{self.type}-{self.sql}-{self.operator.id()}" + + def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass: + sql_assertion_info = SqlAssertionInfoClass( + entity=entity_urn, + statement=self.sql, + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + # TODO: Support other types of assertions + type=SqlAssertionTypeClass.METRIC, + changeType=AssertionValueChangeTypeClass.ABSOLUTE, + ) + return AssertionInfoClass( + type=AssertionTypeClass.SQL, + sqlAssertion=sql_assertion_info, + description=self.description, ) -class ColumnUniqueAssertion(IdConfigMixin, ConfigModel): +class ColumnUniqueAssertion(IdConfigMixin, BaseAssertion): type: Literal["unique"] # TODO: support multiple columns? @@ -57,10 +69,8 @@ class ColumnUniqueAssertion(IdConfigMixin, ConfigModel): def generate_default_id(self) -> str: return f"{self.type}-{self.column}" - def generate_dataset_assertion_info( - self, entity_urn: str - ) -> DatasetAssertionInfoClass: - return DatasetAssertionInfoClass( + def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass: + dataset_assertion_info = DatasetAssertionInfoClass( dataset=entity_urn, scope=DatasetAssertionScopeClass.DATASET_COLUMN, fields=[builder.make_schema_field_urn(entity_urn, self.column)], @@ -72,6 +82,11 @@ def generate_dataset_assertion_info( ) ), ) + return AssertionInfoClass( + type=AssertionTypeClass.DATASET, + datasetAssertion=dataset_assertion_info, + description=self.description, + ) class DataQualityAssertion(ConfigModel): @@ -92,16 +107,9 @@ def id(self) -> str: def generate_mcp( self, assertion_urn: str, entity_urn: str ) -> List[MetadataChangeProposalWrapper]: - dataset_assertion_info = self.__root__.generate_dataset_assertion_info( - entity_urn - ) - return [ MetadataChangeProposalWrapper( entityUrn=assertion_urn, - aspect=AssertionInfoClass( - type=AssertionTypeClass.DATASET, - datasetAssertion=dataset_assertion_info, - ), + aspect=self.__root__.generate_assertion_info(entity_urn), ) ] diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py index 2df446623a9d6..f3c6be55e5fea 100644 --- a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py @@ -54,7 +54,7 @@ class DataContract(ConfigModel): freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None) # TODO: Add a validator to ensure that ids are unique - data_quality: Optional[List[DataQualityAssertion]] = None + data_quality: Optional[List[DataQualityAssertion]] = pydantic.Field(default=None) _original_yaml_dict: Optional[dict] = None diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py index ee8fa1181e614..71741d76b22fc 100644 --- a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py @@ -6,6 +6,7 @@ import pydantic from typing_extensions import Literal +from datahub.api.entities.datacontract.assertion import BaseAssertion from datahub.configuration.common import ConfigModel from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.schema_classes import ( @@ -21,7 +22,7 @@ ) -class CronFreshnessAssertion(ConfigModel): +class CronFreshnessAssertion(BaseAssertion): type: Literal["cron"] cron: str = pydantic.Field( @@ -32,12 +33,30 @@ class CronFreshnessAssertion(ConfigModel): description="The timezone to use for the cron schedule. Defaults to UTC.", ) + def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass: + return FreshnessAssertionScheduleClass( + type=FreshnessAssertionScheduleTypeClass.CRON, + cron=FreshnessCronScheduleClass( + cron=self.cron, + timezone=self.timezone, + ), + ) + -class FixedIntervalFreshnessAssertion(ConfigModel): +class FixedIntervalFreshnessAssertion(BaseAssertion): type: Literal["interval"] interval: timedelta + def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass: + return FreshnessAssertionScheduleClass( + type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL, + fixedInterval=FixedIntervalScheduleClass( + unit=CalendarIntervalClass.SECOND, + multiple=int(self.interval.total_seconds()), + ), + ) + class FreshnessAssertion(ConfigModel): __root__: Union[ @@ -51,36 +70,13 @@ def id(self): def generate_mcp( self, assertion_urn: str, entity_urn: str ) -> List[MetadataChangeProposalWrapper]: - freshness = self.__root__ - - if isinstance(freshness, CronFreshnessAssertion): - schedule = FreshnessAssertionScheduleClass( - type=FreshnessAssertionScheduleTypeClass.CRON, - cron=FreshnessCronScheduleClass( - cron=freshness.cron, - timezone=freshness.timezone, - ), - ) - elif isinstance(freshness, FixedIntervalFreshnessAssertion): - schedule = FreshnessAssertionScheduleClass( - type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL, - fixedInterval=FixedIntervalScheduleClass( - unit=CalendarIntervalClass.SECOND, - multiple=int(freshness.interval.total_seconds()), - ), - ) - else: - raise ValueError(f"Unknown freshness type {freshness}") - - assertionInfo = AssertionInfoClass( + aspect = AssertionInfoClass( type=AssertionTypeClass.FRESHNESS, freshnessAssertion=FreshnessAssertionInfoClass( entity=entity_urn, type=FreshnessAssertionTypeClass.DATASET_CHANGE, - schedule=schedule, + schedule=self.__root__.generate_freshness_assertion_schedule(), ), + description=self.__root__.description, ) - - return [ - MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo) - ] + return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)] diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py index b5b592e01f58f..b62f94e0592fc 100644 --- a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py @@ -6,6 +6,7 @@ import pydantic from typing_extensions import Literal +from datahub.api.entities.datacontract.assertion import BaseAssertion from datahub.configuration.common import ConfigModel from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.extractor.json_schema_util import get_schema_metadata @@ -19,7 +20,7 @@ ) -class JsonSchemaContract(ConfigModel): +class JsonSchemaContract(BaseAssertion): type: Literal["json-schema"] json_schema: dict = pydantic.Field(alias="json-schema") @@ -36,7 +37,7 @@ def _init_private_attributes(self) -> None: ) -class FieldListSchemaContract(ConfigModel, arbitrary_types_allowed=True): +class FieldListSchemaContract(BaseAssertion, arbitrary_types_allowed=True): type: Literal["field-list"] fields: List[SchemaFieldClass] @@ -67,15 +68,13 @@ def id(self): def generate_mcp( self, assertion_urn: str, entity_urn: str ) -> List[MetadataChangeProposalWrapper]: - schema_metadata = self.__root__._schema_metadata - - assertionInfo = AssertionInfoClass( + aspect = AssertionInfoClass( type=AssertionTypeClass.DATA_SCHEMA, schemaAssertion=SchemaAssertionInfoClass( - entity=entity_urn, schema=schema_metadata + entity=entity_urn, + schema=self.__root__._schema_metadata, ), + description=self.__root__.description, ) - return [ - MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo) - ] + return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)] diff --git a/metadata-ingestion/src/datahub/cli/specific/group_cli.py b/metadata-ingestion/src/datahub/cli/specific/group_cli.py index 9baa8ee68d975..e313fce33d4d5 100644 --- a/metadata-ingestion/src/datahub/cli/specific/group_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/group_cli.py @@ -43,7 +43,7 @@ def upsert(file: Path, override_editable: bool) -> None: with get_default_graph() as emitter: for group_config in group_configs: try: - datahub_group = CorpGroup.parse_obj(config_dict) + datahub_group = CorpGroup.parse_obj(group_config) for mcp in datahub_group.generate_mcp( generation_config=CorpGroupGenerationConfig( override_editable=override_editable, datahub_graph=emitter diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index c909b89eb0c2d..73ac4baac48c0 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -283,7 +283,7 @@ class VersionedConfig(ConfigModel): class LineageConfig(ConfigModel): incremental_lineage: bool = Field( - default=True, + default=False, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index a9f891ddb7b1e..80b6ceb576c1c 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -54,6 +54,13 @@ class DatasetSourceConfigMixin(PlatformInstanceConfigMixin, EnvConfigMixin): """ +class LowerCaseDatasetUrnConfigMixin(ConfigModel): + convert_urns_to_lowercase: bool = Field( + default=False, + description="Whether to convert dataset urns to lowercase.", + ) + + class DatasetLineageProviderConfigBase(EnvConfigMixin): """ Any non-Dataset source that produces lineage to Datasets should inherit this class. diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py index 071d590f270f8..cedaa4fbbd7f6 100644 --- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py +++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py @@ -106,6 +106,7 @@ def process_sql_parsing_result( user: Optional[UserUrn] = None, custom_operation_type: Optional[str] = None, include_urns: Optional[Set[DatasetUrn]] = None, + include_column_lineage: bool = True, ) -> Iterable[MetadataWorkUnit]: """Process a single query and yield any generated workunits. @@ -130,7 +131,9 @@ def process_sql_parsing_result( _merge_lineage_data( downstream_urn=downstream_urn, upstream_urns=result.in_tables, - column_lineage=result.column_lineage, + column_lineage=result.column_lineage + if include_column_lineage + else None, upstream_edges=self._lineage_map[downstream_urn], query_timestamp=query_timestamp, is_view_ddl=is_view_ddl, @@ -179,15 +182,16 @@ def add_lineage( def gen_workunits(self) -> Iterable[MetadataWorkUnit]: if self.generate_lineage: - yield from self._gen_lineage_workunits() + for mcp in self._gen_lineage_mcps(): + yield mcp.as_workunit() if self.generate_usage_statistics: yield from self._gen_usage_statistics_workunits() - def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]: + def _gen_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: for downstream_urn in self._lineage_map: upstreams: List[UpstreamClass] = [] fine_upstreams: List[FineGrainedLineageClass] = [] - for upstream_urn, edge in self._lineage_map[downstream_urn].items(): + for edge in self._lineage_map[downstream_urn].values(): upstreams.append(edge.gen_upstream_aspect()) fine_upstreams.extend(edge.gen_fine_grained_lineage_aspects()) @@ -201,7 +205,7 @@ def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]: ) yield MetadataChangeProposalWrapper( entityUrn=downstream_urn, aspect=upstream_lineage - ).as_workunit() + ) def _gen_usage_statistics_workunits(self) -> Iterable[MetadataWorkUnit]: yield from self._usage_aggregator.generate_workunits( diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py index 778bd119615e2..a6761a3c77d5e 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/common.py +++ b/metadata-ingestion/src/datahub/ingestion/api/common.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Generic, Iterable, Optional, Tuple, TypeVar +from datahub.configuration.common import ConfigurationError from datahub.emitter.mce_builder import set_dataset_urn_to_lower from datahub.ingestion.api.committable import Committable from datahub.ingestion.graph.client import DataHubGraph @@ -75,3 +76,11 @@ def register_checkpointer(self, committable: Committable) -> None: def get_committables(self) -> Iterable[Tuple[str, Committable]]: yield from self.checkpointers.items() + + def require_graph(self, operation: Optional[str] = None) -> DataHubGraph: + if not self.graph: + raise ConfigurationError( + f"{operation or 'This operation'} requires a graph, but none was provided. " + "To provide one, either use the datahub-rest sink or set the top-level datahub_api config in the recipe." + ) + return self.graph diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py new file mode 100644 index 0000000000000..945b201ca5758 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py @@ -0,0 +1,142 @@ +import copy +from typing import Dict, Iterable, Optional + +from datahub.emitter.mce_builder import datahub_guid, set_aspect +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph +from datahub.metadata.schema_classes import ( + FineGrainedLineageClass, + MetadataChangeEventClass, + SystemMetadataClass, + UpstreamClass, + UpstreamLineageClass, +) +from datahub.specific.dataset import DatasetPatchBuilder + + +def _convert_upstream_lineage_to_patch( + urn: str, + aspect: UpstreamLineageClass, + system_metadata: Optional[SystemMetadataClass], +) -> MetadataWorkUnit: + patch_builder = DatasetPatchBuilder(urn, system_metadata) + for upstream in aspect.upstreams: + patch_builder.add_upstream_lineage(upstream) + mcp = next(iter(patch_builder.build())) + return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp) + + +def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str: + return datahub_guid( + { + "upstreams": sorted(fine_upstream.upstreams or []), + "downstreams": sorted(fine_upstream.downstreams or []), + "transformOperation": fine_upstream.transformOperation, + } + ) + + +def _merge_upstream_lineage( + new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass +) -> UpstreamLineageClass: + merged_aspect = copy.deepcopy(gms_aspect) + + upstreams_map: Dict[str, UpstreamClass] = { + upstream.dataset: upstream for upstream in merged_aspect.upstreams + } + + upstreams_updated = False + fine_upstreams_updated = False + + for table_upstream in new_aspect.upstreams: + if table_upstream.dataset not in upstreams_map or ( + table_upstream.auditStamp.time + > upstreams_map[table_upstream.dataset].auditStamp.time + ): + upstreams_map[table_upstream.dataset] = table_upstream + upstreams_updated = True + + if upstreams_updated: + merged_aspect.upstreams = list(upstreams_map.values()) + + if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages: + fine_upstreams_map: Dict[str, FineGrainedLineageClass] = { + get_fine_grained_lineage_key(fine_upstream): fine_upstream + for fine_upstream in merged_aspect.fineGrainedLineages + } + for column_upstream in new_aspect.fineGrainedLineages: + column_upstream_key = get_fine_grained_lineage_key(column_upstream) + + if column_upstream_key not in fine_upstreams_map or ( + column_upstream.confidenceScore + > fine_upstreams_map[column_upstream_key].confidenceScore + ): + fine_upstreams_map[column_upstream_key] = column_upstream + fine_upstreams_updated = True + + if fine_upstreams_updated: + merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values()) + else: + merged_aspect.fineGrainedLineages = ( + new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages + ) + + return merged_aspect + + +def _lineage_wu_via_read_modify_write( + graph: Optional[DataHubGraph], + urn: str, + aspect: UpstreamLineageClass, + system_metadata: Optional[SystemMetadataClass], +) -> MetadataWorkUnit: + if graph is None: + raise ValueError( + "Failed to handle incremental lineage, DataHubGraph is missing. " + "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. " + ) + gms_aspect = graph.get_aspect(urn, UpstreamLineageClass) + if gms_aspect: + new_aspect = _merge_upstream_lineage(aspect, gms_aspect) + else: + new_aspect = aspect + + return MetadataChangeProposalWrapper( + entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata + ).as_workunit() + + +def auto_incremental_lineage( + graph: Optional[DataHubGraph], + incremental_lineage: bool, + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + if not incremental_lineage: + yield from stream + return # early exit + + for wu in stream: + lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type( + UpstreamLineageClass + ) + urn = wu.get_urn() + + if lineage_aspect: + if isinstance(wu.metadata, MetadataChangeEventClass): + set_aspect( + wu.metadata, None, UpstreamLineageClass + ) # we'll emit upstreamLineage separately below + if len(wu.metadata.proposedSnapshot.aspects) > 0: + yield wu + + if lineage_aspect.fineGrainedLineages: + yield _lineage_wu_via_read_modify_write( + graph, urn, lineage_aspect, wu.metadata.systemMetadata + ) + elif lineage_aspect.upstreams: + yield _convert_upstream_lineage_to_patch( + urn, lineage_aspect, wu.metadata.systemMetadata + ) + else: + yield wu diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 0bcc220cad49b..8940642f7008a 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -29,6 +29,7 @@ from datahub.ingestion.api.report import Report from datahub.ingestion.api.source_helpers import ( auto_browse_path_v2, + auto_lowercase_urns, auto_materialize_referenced_tags, auto_status_aspect, auto_workunit_reporter, @@ -192,7 +193,31 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: self.ctx.pipeline_config.flags.generate_browse_path_v2_dry_run ) + auto_lowercase_dataset_urns: Optional[MetadataWorkUnitProcessor] = None + if ( + self.ctx.pipeline_config + and self.ctx.pipeline_config.source + and self.ctx.pipeline_config.source.config + and ( + ( + hasattr( + self.ctx.pipeline_config.source.config, + "convert_urns_to_lowercase", + ) + and self.ctx.pipeline_config.source.config.convert_urns_to_lowercase + ) + or ( + hasattr(self.ctx.pipeline_config.source.config, "get") + and self.ctx.pipeline_config.source.config.get( + "convert_urns_to_lowercase" + ) + ) + ) + ): + auto_lowercase_dataset_urns = auto_lowercase_urns + return [ + auto_lowercase_dataset_urns, auto_status_aspect, auto_materialize_referenced_tags, browse_path_processor, diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index 7fc15cf829678..2ce9e07bc57bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -35,7 +35,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.tag_urn import TagUrn from datahub.utilities.urns.urn import guess_entity_type -from datahub.utilities.urns.urn_iter import list_urns +from datahub.utilities.urns.urn_iter import list_urns, lowercase_dataset_urns if TYPE_CHECKING: from datahub.ingestion.api.source import SourceReport @@ -70,7 +70,6 @@ def auto_status_aspect( for wu in stream: urn = wu.get_urn() all_urns.add(urn) - if not wu.is_primary_source: # If this is a non-primary source, we pretend like we've seen the status # aspect so that we don't try to emit a removal for it. @@ -173,6 +172,23 @@ def auto_materialize_referenced_tags( ).as_workunit() +def auto_lowercase_urns( + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + """Lowercase all dataset urns""" + + for wu in stream: + try: + old_urn = wu.get_urn() + lowercase_dataset_urns(wu.metadata) + wu.id = wu.id.replace(old_urn, wu.get_urn()) + + yield wu + except Exception as e: + logger.warning(f"Failed to lowercase urns for {wu}: {e}", exc_info=True) + yield wu + + def auto_browse_path_v2( stream: Iterable[MetadataWorkUnit], *, diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py index 4acf99a50e50e..df0b732833fbe 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py @@ -1,6 +1,18 @@ import json import logging -from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Type, + Union, + cast, + overload, +) import avro.schema @@ -54,6 +66,8 @@ avro.schema.PrimitiveSchema, ] +SchemaOrField = Union[avro.schema.Schema, avro.schema.Field] + FieldStack = List[avro.schema.Field] # The latest avro code contains this type definition in a compatibility module, @@ -124,16 +138,22 @@ def __init__( self._meta_mapping_processor = meta_mapping_processor self._schema_tags_field = schema_tags_field self._tag_prefix = tag_prefix + # Map of avro schema type to the conversion handler - self._avro_type_to_mce_converter_map: Dict[ - avro.schema.Schema, - Callable[[ExtendedAvroNestedSchemas], Generator[SchemaField, None, None]], + # TODO: Clean up this type... perhaps refactor + self._avro_type_to_mce_converter_map: Mapping[ + Union[ + Type[avro.schema.Schema], + Type[avro.schema.Field], + Type[avro.schema.LogicalSchema], + ], + Callable[[SchemaOrField], Iterable[SchemaField]], ] = { avro.schema.RecordSchema: self._gen_from_non_field_nested_schemas, avro.schema.UnionSchema: self._gen_from_non_field_nested_schemas, avro.schema.ArraySchema: self._gen_from_non_field_nested_schemas, avro.schema.MapSchema: self._gen_from_non_field_nested_schemas, - avro.schema.Field: self._gen_nested_schema_from_field, + avro.schema.Field: self._gen_nested_schema_from_field, # type: ignore avro.schema.PrimitiveSchema: self._gen_non_nested_to_mce_fields, avro.schema.FixedSchema: self._gen_non_nested_to_mce_fields, avro.schema.EnumSchema: self._gen_non_nested_to_mce_fields, @@ -142,20 +162,22 @@ def __init__( @staticmethod def _get_type_name( - avro_schema: avro.schema.Schema, logical_if_present: bool = False + avro_schema: SchemaOrField, logical_if_present: bool = False ) -> str: logical_type_name: Optional[str] = None if logical_if_present: - logical_type_name = getattr( - avro_schema, "logical_type", None - ) or avro_schema.props.get("logicalType") + logical_type_name = cast( + Optional[str], + getattr(avro_schema, "logical_type", None) + or avro_schema.props.get("logicalType"), + ) return logical_type_name or str( getattr(avro_schema.type, "type", avro_schema.type) ) @staticmethod def _get_column_type( - avro_schema: avro.schema.Schema, logical_type: Optional[str] + avro_schema: SchemaOrField, logical_type: Optional[str] ) -> SchemaFieldDataType: type_name: str = AvroToMceSchemaConverter._get_type_name(avro_schema) TypeClass: Optional[Type] = AvroToMceSchemaConverter.field_type_mapping.get( @@ -186,7 +208,7 @@ def _get_column_type( ) return dt - def _is_nullable(self, schema: avro.schema.Schema) -> bool: + def _is_nullable(self, schema: SchemaOrField) -> bool: if isinstance(schema, avro.schema.Field): return self._is_nullable(schema.type) if isinstance(schema, avro.schema.UnionSchema): @@ -208,7 +230,7 @@ def _strip_namespace(name_or_fullname: str) -> str: return name_or_fullname.rsplit(".", maxsplit=1)[-1] @staticmethod - def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str: + def _get_simple_native_type(schema: SchemaOrField) -> str: if isinstance(schema, (avro.schema.RecordSchema, avro.schema.Field)): # For Records, fields, always return the name. return AvroToMceSchemaConverter._strip_namespace(schema.name) @@ -226,7 +248,7 @@ def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str: return schema.type @staticmethod - def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str: + def _get_type_annotation(schema: SchemaOrField) -> str: simple_native_type = AvroToMceSchemaConverter._get_simple_native_type(schema) if simple_native_type.startswith("__struct_"): simple_native_type = "struct" @@ -237,10 +259,24 @@ def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str: else: return f"[type={simple_native_type}]" + @staticmethod + @overload + def _get_underlying_type_if_option_as_union( + schema: SchemaOrField, default: SchemaOrField + ) -> SchemaOrField: + ... + + @staticmethod + @overload + def _get_underlying_type_if_option_as_union( + schema: SchemaOrField, default: Optional[SchemaOrField] = None + ) -> Optional[SchemaOrField]: + ... + @staticmethod def _get_underlying_type_if_option_as_union( - schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None - ) -> AvroNestedSchemas: + schema: SchemaOrField, default: Optional[SchemaOrField] = None + ) -> Optional[SchemaOrField]: if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2: (first, second) = schema.schemas if first.type == AVRO_TYPE_NULL: @@ -258,8 +294,8 @@ class SchemaFieldEmissionContextManager: def __init__( self, - schema: avro.schema.Schema, - actual_schema: avro.schema.Schema, + schema: SchemaOrField, + actual_schema: SchemaOrField, converter: "AvroToMceSchemaConverter", description: Optional[str] = None, default_value: Optional[str] = None, @@ -275,7 +311,7 @@ def __enter__(self): self._converter._prefix_name_stack.append(type_annotation) return self - def emit(self) -> Generator[SchemaField, None, None]: + def emit(self) -> Iterable[SchemaField]: if ( not isinstance( self._actual_schema, @@ -307,7 +343,7 @@ def emit(self) -> Generator[SchemaField, None, None]: description = self._description if not description and actual_schema.props.get("doc"): - description = actual_schema.props.get("doc") + description = cast(Optional[str], actual_schema.props.get("doc")) if self._default_value is not None: description = f"{description if description else ''}\nField default value: {self._default_value}" @@ -320,12 +356,12 @@ def emit(self) -> Generator[SchemaField, None, None]: native_data_type = native_data_type[ slice(len(type_prefix), len(native_data_type) - 1) ] - native_data_type = actual_schema.props.get( - "native_data_type", native_data_type + native_data_type = cast( + str, actual_schema.props.get("native_data_type", native_data_type) ) field_path = self._converter._get_cur_field_path() - merged_props = {} + merged_props: Dict[str, Any] = {} merged_props.update(self._schema.other_props) merged_props.update(schema.other_props) @@ -363,12 +399,13 @@ def emit(self) -> Generator[SchemaField, None, None]: meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION) - logical_type_name: Optional[str] = ( + logical_type_name: Optional[str] = cast( + Optional[str], # logicalType nested inside type getattr(actual_schema, "logical_type", None) or actual_schema.props.get("logicalType") # bare logicalType - or self._actual_schema.props.get("logicalType") + or self._actual_schema.props.get("logicalType"), ) field = SchemaField( @@ -392,14 +429,12 @@ def emit(self) -> Generator[SchemaField, None, None]: def __exit__(self, exc_type, exc_val, exc_tb): self._converter._prefix_name_stack.pop() - def _get_sub_schemas( - self, schema: ExtendedAvroNestedSchemas - ) -> Generator[avro.schema.Schema, None, None]: + def _get_sub_schemas(self, schema: SchemaOrField) -> Iterable[SchemaOrField]: """Responsible for generation for appropriate sub-schemas for every nested AVRO type.""" def gen_items_from_list_tuple_or_scalar( val: Any, - ) -> Generator[avro.schema.Schema, None, None]: + ) -> Iterable[avro.schema.Schema]: if isinstance(val, (list, tuple)): for i in val: yield i @@ -433,7 +468,7 @@ def gen_items_from_list_tuple_or_scalar( def _gen_nested_schema_from_field( self, field: avro.schema.Field, - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for an AVRO Field type.""" # NOTE: Here we only manage the field stack and trigger MCE Field generation from this field's type. # The actual emitting of a field happens when @@ -447,7 +482,7 @@ def _gen_nested_schema_from_field( def _gen_from_last_field( self, schema_to_recurse: Optional[AvroNestedSchemas] = None - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """Emits the field most-recent field, optionally triggering sub-schema generation under the field.""" last_field_schema = self._fields_stack[-1] # Generate the custom-description for the field. @@ -467,8 +502,8 @@ def _gen_from_last_field( yield from self._to_mce_fields(sub_schema) def _gen_from_non_field_nested_schemas( - self, schema: AvroNestedSchemas - ) -> Generator[SchemaField, None, None]: + self, schema: SchemaOrField + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for all standard AVRO nested types.""" # Handle recursive record definitions recurse: bool = True @@ -511,8 +546,8 @@ def _gen_from_non_field_nested_schemas( yield from self._to_mce_fields(sub_schema) def _gen_non_nested_to_mce_fields( - self, schema: AvroNonNestedSchemas - ) -> Generator[SchemaField, None, None]: + self, schema: SchemaOrField + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for non-nested AVRO types.""" with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager( schema, @@ -521,9 +556,7 @@ def _gen_non_nested_to_mce_fields( ) as non_nested_emitter: yield from non_nested_emitter.emit() - def _to_mce_fields( - self, avro_schema: avro.schema.Schema - ) -> Generator[SchemaField, None, None]: + def _to_mce_fields(self, avro_schema: SchemaOrField) -> Iterable[SchemaField]: # Invoke the relevant conversion handler for the schema element type. schema_type = ( type(avro_schema) @@ -541,7 +574,7 @@ def to_mce_fields( meta_mapping_processor: Optional[OperationProcessor] = None, schema_tags_field: Optional[str] = None, tag_prefix: Optional[str] = None, - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """ Converts a key or value type AVRO schema string to appropriate MCE SchemaFields. :param avro_schema_string: String representation of the AVRO schema. diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 79d959965e0dd..f2735c24ca19d 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -353,77 +353,97 @@ def _time_to_print(self) -> bool: return False def run(self) -> None: - self.final_status = "unknown" - self._notify_reporters_on_ingestion_start() - callback = None - try: - callback = ( - LoggingCallback() - if not self.config.failure_log.enabled - else DeadLetterQueueCallback( - self.ctx, self.config.failure_log.log_config - ) - ) - for wu in itertools.islice( - self.source.get_workunits(), - self.preview_workunits if self.preview_mode else None, - ): - try: - if self._time_to_print(): - self.pretty_print_summary(currently_running=True) - except Exception as e: - logger.warning(f"Failed to print summary {e}") - - if not self.dry_run: - self.sink.handle_work_unit_start(wu) - try: - record_envelopes = self.extractor.get_records(wu) - for record_envelope in self.transform(record_envelopes): - if not self.dry_run: - self.sink.write_record_async(record_envelope, callback) - - except RuntimeError: - raise - except SystemExit: - raise - except Exception as e: - logger.error( - "Failed to process some records. Continuing.", exc_info=e + with contextlib.ExitStack() as stack: + if self.config.flags.generate_memory_profiles: + import memray + + stack.enter_context( + memray.Tracker( + f"{self.config.flags.generate_memory_profiles}/{self.config.run_id}.bin" ) - # TODO: Transformer errors should cause the pipeline to fail. - - self.extractor.close() - if not self.dry_run: - self.sink.handle_work_unit_end(wu) - self.source.close() - # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state - for record_envelope in self.transform( - [ - RecordEnvelope( - record=EndOfStream(), metadata={"workunit_id": "end-of-stream"} + ) + + self.final_status = "unknown" + self._notify_reporters_on_ingestion_start() + callback = None + try: + callback = ( + LoggingCallback() + if not self.config.failure_log.enabled + else DeadLetterQueueCallback( + self.ctx, self.config.failure_log.log_config ) - ] - ): - if not self.dry_run and not isinstance( - record_envelope.record, EndOfStream + ) + for wu in itertools.islice( + self.source.get_workunits(), + self.preview_workunits if self.preview_mode else None, + ): + try: + if self._time_to_print(): + self.pretty_print_summary(currently_running=True) + except Exception as e: + logger.warning(f"Failed to print summary {e}") + + if not self.dry_run: + self.sink.handle_work_unit_start(wu) + try: + record_envelopes = self.extractor.get_records(wu) + for record_envelope in self.transform(record_envelopes): + if not self.dry_run: + try: + self.sink.write_record_async( + record_envelope, callback + ) + except Exception as e: + # In case the sink's error handling is bad, we still want to report the error. + self.sink.report.report_failure( + f"Failed to write record: {e}" + ) + + except RuntimeError: + raise + except SystemExit: + raise + except Exception as e: + logger.error( + "Failed to process some records. Continuing.", + exc_info=e, + ) + # TODO: Transformer errors should cause the pipeline to fail. + + self.extractor.close() + if not self.dry_run: + self.sink.handle_work_unit_end(wu) + self.source.close() + # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state + for record_envelope in self.transform( + [ + RecordEnvelope( + record=EndOfStream(), + metadata={"workunit_id": "end-of-stream"}, + ) + ] ): - # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc. - self.sink.write_record_async(record_envelope, callback) - - self.sink.close() - self.process_commits() - self.final_status = "completed" - except (SystemExit, RuntimeError, KeyboardInterrupt) as e: - self.final_status = "cancelled" - logger.error("Caught error", exc_info=e) - raise - finally: - clear_global_warnings() - - if callback and hasattr(callback, "close"): - callback.close() # type: ignore - - self._notify_reporters_on_ingestion_completion() + if not self.dry_run and not isinstance( + record_envelope.record, EndOfStream + ): + # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc. + self.sink.write_record_async(record_envelope, callback) + + self.sink.close() + self.process_commits() + self.final_status = "completed" + except (SystemExit, RuntimeError, KeyboardInterrupt) as e: + self.final_status = "cancelled" + logger.error("Caught error", exc_info=e) + raise + finally: + clear_global_warnings() + + if callback and hasattr(callback, "close"): + callback.close() # type: ignore + + self._notify_reporters_on_ingestion_completion() def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]: """ diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py index ff9a7a6f3d146..da3cee8ad9c1b 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py @@ -57,6 +57,13 @@ class FlagsConfig(ConfigModel): ), ) + generate_memory_profiles: Optional[str] = Field( + default=None, + description=( + "Generate memray memory dumps for ingestion process by providing a path to write the dump file in." + ), + ) + class PipelineConfig(ConfigModel): # Once support for discriminated unions gets merged into Pydantic, we can diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py index 39054c256a7fd..38ddadaafc862 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py @@ -9,7 +9,6 @@ MetadataChangeEvent, MetadataChangeProposal, ) -from datahub.metadata.schema_classes import MetadataChangeProposalClass class KafkaSinkConfig(KafkaEmitterConfig): @@ -58,27 +57,21 @@ def write_record_async( ], write_callback: WriteCallback, ) -> None: - record = record_envelope.record - if isinstance(record, MetadataChangeEvent): - self.emitter.emit_mce_async( + callback = _KafkaCallback( + self.report, record_envelope, write_callback + ).kafka_callback + try: + record = record_envelope.record + self.emitter.emit( record, - callback=_KafkaCallback( - self.report, record_envelope, write_callback - ).kafka_callback, - ) - elif isinstance( - record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass) - ): - self.emitter.emit_mcp_async( - record, - callback=_KafkaCallback( - self.report, record_envelope, write_callback - ).kafka_callback, - ) - else: - raise ValueError( - f"The datahub-kafka sink only supports MetadataChangeEvent/MetadataChangeProposal[Wrapper] classes, not {type(record)}" + callback=callback, ) + except Exception as err: + # In case we throw an exception while trying to emit the record, + # catch it and report the failure. This might happen if the schema + # registry is down or otherwise misconfigured, in which case we'd + # fail when serializing the record. + callback(err, f"Failed to write record: {err}") def close(self) -> None: self.emitter.flush() diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index e5dff786b71d1..aa7e5aa352a3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -221,6 +221,7 @@ def report_table_dropped(self, table: str) -> None: SourceCapability.DELETION_DETECTION, "Enabled by default when stateful ingestion is turned on.", ) +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class GlueSource(StatefulIngestionSourceBase): """ Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub. diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py index 501162455cc45..878b8dd1bb9a5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py @@ -34,21 +34,26 @@ def get_bucket_relative_path(s3_uri: str) -> str: return "/".join(strip_s3_prefix(s3_uri).split("/")[1:]) -def make_s3_urn(s3_uri: str, env: str) -> str: +def make_s3_urn(s3_uri: str, env: str, remove_extension: bool = True) -> str: s3_name = strip_s3_prefix(s3_uri) if s3_name.endswith("/"): s3_name = s3_name[:-1] name, extension = os.path.splitext(s3_name) - - if extension != "": + if remove_extension and extension != "": extension = extension[1:] # remove the dot return f"urn:li:dataset:(urn:li:dataPlatform:s3,{name}_{extension},{env})" return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{env})" +def make_s3_urn_for_lineage(s3_uri: str, env: str) -> str: + # Ideally this is the implementation for all S3 URNs + # Don't feel comfortable changing `make_s3_urn` for glue, sagemaker, and athena + return make_s3_urn(s3_uri, env, remove_extension=False) + + def get_bucket_name(s3_uri: str) -> str: if not is_s3_uri(s3_uri): raise ValueError( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index fee181864a2d6..6959a48313010 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -16,7 +16,6 @@ make_dataplatform_instance_urn, make_dataset_urn, make_tag_urn, - set_dataset_urn_to_lower, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey @@ -154,6 +153,7 @@ def cleanup(config: BigQueryV2Config) -> None: ) @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") +@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration") @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_statistics`", @@ -218,8 +218,6 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): if self.config.enable_legacy_sharded_table_support: BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "" - set_dataset_urn_to_lower(self.config.convert_urns_to_lowercase) - self.bigquery_data_dictionary = BigQuerySchemaApi( self.report.schema_api_perf, self.config.get_bigquery_client() ) @@ -461,7 +459,8 @@ def _init_schema_resolver(self) -> SchemaResolver: ) else: logger.warning( - "Failed to load schema info from DataHub as DataHubGraph is missing.", + "Failed to load schema info from DataHub as DataHubGraph is missing. " + "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. ", ) return SchemaResolver(platform=self.platform, env=self.config.env) @@ -1051,11 +1050,18 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: for idx, field in enumerate(schema_fields): # Remove all the [version=2.0].[type=struct]. tags to get the field path if ( - re.sub(r"\[.*?\]\.", "", field.fieldPath, 0, re.MULTILINE) - == col.field_path + re.sub( + r"\[.*?\]\.", + "", + field.fieldPath.lower(), + 0, + re.MULTILINE, + ) + == col.field_path.lower() ): field.description = col.comment schema_fields[idx] = field + break else: tags = [] if col.is_partition_column: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index b0ac77201b415..55366d6c57cf8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -3,7 +3,7 @@ import re from dataclasses import dataclass, field from datetime import datetime -from typing import Any, ClassVar, Dict, List, Optional, Pattern, Set, Tuple, Union +from typing import Any, ClassVar, Dict, List, Optional, Pattern, Tuple, Union from dateutil import parser @@ -20,7 +20,13 @@ logger: logging.Logger = logging.getLogger(__name__) -_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = "((.+)[_$])?(\\d{8})$" +# Regexp for sharded tables. +# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date. +# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid +# then it is not a sharded table. +_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = ( + "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$" +) @dataclass(frozen=True, order=True) @@ -29,8 +35,6 @@ class BigqueryTableIdentifier: dataset: str table: str - invalid_chars: ClassVar[Set[str]] = {"$", "@"} - # Note: this regex may get overwritten by the sharded_table_pattern config. # The class-level constant, however, will not be overwritten. _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[ @@ -40,7 +44,7 @@ class BigqueryTableIdentifier: _BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd" @staticmethod - def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]: + def get_table_and_shard(table_name: str) -> Tuple[Optional[str], Optional[str]]: """ Args: table_name: @@ -53,16 +57,25 @@ def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]: In case of non-sharded tables, returns (, None) In case of sharded tables, returns (, shard) """ + new_table_name = table_name match = re.match( BigqueryTableIdentifier._BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX, table_name, re.IGNORECASE, ) if match: - table_name = match.group(2) - shard = match.group(3) - return table_name, shard - return table_name, None + shard: str = match[3] + if shard: + if table_name.endswith(shard): + new_table_name = table_name[: -len(shard)] + + new_table_name = ( + new_table_name.rstrip("_") if new_table_name else new_table_name + ) + if new_table_name.endswith("."): + new_table_name = table_name + return (new_table_name, shard) if new_table_name else (None, shard) + return new_table_name, None @classmethod def from_string_name(cls, table: str) -> "BigqueryTableIdentifier": @@ -90,18 +103,7 @@ def get_table_display_name(self) -> str: ) table_name, _ = self.get_table_and_shard(shortened_table_name) - if not table_name: - table_name = self.dataset - - # Handle exceptions - invalid_chars_in_table_name: List[str] = [ - c for c in self.invalid_chars if c in table_name - ] - if invalid_chars_in_table_name: - raise ValueError( - f"Cannot handle {self.raw_table_name()} - poorly formatted table name, contains {invalid_chars_in_table_name}" - ) - return table_name + return table_name or self.dataset def get_table_name(self) -> str: """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py index 03b12c61ee5c6..db552c09cd0a7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py @@ -4,7 +4,6 @@ from google.cloud import bigquery from google.cloud.logging_v2.client import Client as GCPLoggingClient -from ratelimiter import RateLimiter from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( AuditLogEntry, @@ -17,6 +16,7 @@ BQ_DATE_SHARD_FORMAT, BQ_DATETIME_FORMAT, ) +from datahub.utilities.ratelimiter import RateLimiter logger: logging.Logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 483355a85ac05..f762d451849ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -119,8 +119,8 @@ class BigQueryV2Config( ) match_fully_qualified_names: bool = Field( - default=False, - description="Whether `dataset_pattern` is matched against fully qualified dataset name `.`.", + default=True, + description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name `.`.", ) include_external_url: bool = Field( @@ -206,11 +206,6 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool: description="This flag enables the data lineage extraction from Data Lineage API exposed by Google Data Catalog. NOTE: This extractor can't build views lineage. It's recommended to enable the view's DDL parsing. Read the docs to have more information about: https://cloud.google.com/data-catalog/docs/concepts/about-data-lineage", ) - convert_urns_to_lowercase: bool = Field( - default=False, - description="Convert urns to lowercase.", - ) - enable_legacy_sharded_table_support: bool = Field( default=True, description="Use the legacy sharded table urn suffix added.", @@ -304,7 +299,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: "use project_id_pattern whenever possible. project_id will be deprecated, please use project_id_pattern only if possible." ) - dataset_pattern = values.get("dataset_pattern") + dataset_pattern: Optional[AllowDenyPattern] = values.get("dataset_pattern") schema_pattern = values.get("schema_pattern") if ( dataset_pattern == AllowDenyPattern.allow_all() @@ -314,6 +309,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: "dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern instead." ) values["dataset_pattern"] = schema_pattern + dataset_pattern = schema_pattern elif ( dataset_pattern != AllowDenyPattern.allow_all() and schema_pattern != AllowDenyPattern.allow_all() @@ -332,9 +328,24 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: ): logger.warning( "Please update `dataset_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." - "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. " - "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`." + "The config option `match_fully_qualified_names` is deprecated and will be removed in a future release." ) + elif match_fully_qualified_names and dataset_pattern is not None: + adjusted = False + for lst in [dataset_pattern.allow, dataset_pattern.deny]: + for i, pattern in enumerate(lst): + if "." not in pattern: + if pattern.startswith("^"): + lst[i] = r"^.*\." + pattern[1:] + else: + lst[i] = r".*\." + pattern + adjusted = True + if adjusted: + logger.warning( + "`dataset_pattern` was adjusted to match against fully qualified schema names," + " of the form `.`." + ) + return values def get_table_pattern(self, pattern: List[str]) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 98c8cbaf85eec..e9acf5ea86044 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -20,6 +20,7 @@ from google.cloud.datacatalog import lineage_v1 from google.cloud.logging_v2.client import Client as GCPLoggingClient +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter import mce_builder from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -548,7 +549,7 @@ def _get_parsed_audit_log_events(self, project_id: str) -> Iterable[QueryEvent]: # handle the case where the read happens within our time range but the query # completion event is delayed and happens after the configured end time. corrected_start_time = self.start_time - self.config.max_query_duration - corrected_end_time = self.end_time + -self.config.max_query_duration + corrected_end_time = self.end_time + self.config.max_query_duration self.report.log_entry_start_time = corrected_start_time self.report.log_entry_end_time = corrected_end_time @@ -683,8 +684,11 @@ def _create_lineage_map( self.report.num_skipped_lineage_entries_missing_data[e.project_id] += 1 continue - if not self.config.dataset_pattern.allowed( - destination_table.table_identifier.dataset + if not is_schema_allowed( + self.config.dataset_pattern, + destination_table.table_identifier.dataset, + destination_table.table_identifier.project_id, + self.config.match_fully_qualified_names, ) or not self.config.table_pattern.allowed( destination_table.table_identifier.get_table_name() ): diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py index a87cb8c1cbfa5..67fcc33cdf218 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py @@ -51,8 +51,8 @@ class BigqueryQuery: p.max_partition_id, p.active_billable_bytes, p.long_term_billable_bytes, - REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix, - REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base + REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix, + REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base FROM `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t @@ -92,8 +92,8 @@ class BigqueryQuery: tos.OPTION_VALUE as comment, t.is_insertable_into, t.ddl, - REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix, - REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base + REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix, + REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base FROM `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 201567e104a51..65b559550ffc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -21,6 +21,7 @@ import humanfriendly +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.configuration.time_window_config import ( BaseTimeWindowConfig, get_time_bucket, @@ -335,8 +336,13 @@ def get_time_window(self) -> Tuple[datetime, datetime]: def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool: return ( table_ref is not None - and self.config.dataset_pattern.allowed(table_ref.table_identifier.dataset) - and self.config.table_pattern.allowed(table_ref.table_identifier.table) + and is_schema_allowed( + self.config.dataset_pattern, + table_ref.table_identifier.dataset, + table_ref.table_identifier.project_id, + self.config.match_fully_qualified_names, + ) + and self.config.table_pattern.allowed(str(table_ref.table_identifier)) ) def _should_ingest_usage(self) -> bool: @@ -844,7 +850,7 @@ def _get_parsed_bigquery_log_events( # handle the case where the read happens within our time range but the query # completion event is delayed and happens after the configured end time. corrected_start_time = self.start_time - self.config.max_query_duration - corrected_end_time = self.end_time + -self.config.max_query_duration + corrected_end_time = self.end_time + self.config.max_query_duration self.report.audit_start_time = corrected_start_time self.report.audit_end_time = corrected_end_time diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index 7cb487a86d931..611f0c5c52cc6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -129,11 +129,9 @@ def __init__(self, config: CSVEnricherConfig, ctx: PipelineContext): # Map from entity urn to a list of SubResourceRow. self.editable_schema_metadata_map: Dict[str, List[SubResourceRow]] = {} self.should_overwrite: bool = self.config.write_semantics == "OVERRIDE" - if not self.should_overwrite and not self.ctx.graph: - raise ConfigurationError( - "With PATCH semantics, the csv-enricher source requires a datahub_api to connect to. " - "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe." - ) + + if not self.should_overwrite: + self.ctx.require_graph(operation="The csv-enricher's PATCH semantics flag") def get_resource_glossary_terms_work_unit( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py index d1c949f48e2cd..a35fb94614f72 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py @@ -18,7 +18,14 @@ logger: logging.Logger = logging.getLogger(__name__) SUPPORTED_FILE_TYPES: List[str] = ["csv", "tsv", "json", "parquet", "avro"] -SUPPORTED_COMPRESSIONS: List[str] = ["gz", "bz2"] + +# These come from the smart_open library. +SUPPORTED_COMPRESSIONS: List[str] = [ + "gz", + "bz2", + # We have a monkeypatch on smart_open that aliases .gzip to .gz. + "gzip", +] class PathSpec(ConfigModel): diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py index 053d136305527..83958dc76754f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py @@ -1,3 +1,4 @@ +import os from typing import Optional from pydantic import Field, root_validator @@ -67,9 +68,25 @@ class DataHubSourceConfig(StatefulIngestionConfigBase): ), ) + pull_from_datahub_api: bool = Field( + default=False, + description="Use the DataHub API to fetch versioned aspects.", + hidden_from_docs=True, + ) + + max_workers: int = Field( + default=5 * (os.cpu_count() or 4), + description="Number of worker threads to use for datahub api ingestion.", + hidden_from_docs=True, + ) + @root_validator def check_ingesting_data(cls, values): - if not values.get("database_connection") and not values.get("kafka_connection"): + if ( + not values.get("database_connection") + and not values.get("kafka_connection") + and not values.get("pull_from_datahub_api") + ): raise ValueError( "Your current config will not ingest any data." " Please specify at least one of `database_connection` or `kafka_connection`, ideally both." diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py new file mode 100644 index 0000000000000..7ee36736723b2 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py @@ -0,0 +1,49 @@ +import logging +from concurrent import futures +from typing import Dict, Iterable, List + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.graph.filters import RemovedStatusFilter +from datahub.ingestion.source.datahub.config import DataHubSourceConfig +from datahub.ingestion.source.datahub.report import DataHubSourceReport +from datahub.metadata._schema_classes import _Aspect + +logger = logging.getLogger(__name__) + +# Should work for at least mysql, mariadb, postgres +DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f" + + +class DataHubApiReader: + def __init__( + self, + config: DataHubSourceConfig, + report: DataHubSourceReport, + graph: DataHubGraph, + ): + self.config = config + self.report = report + self.graph = graph + + def get_aspects(self) -> Iterable[MetadataChangeProposalWrapper]: + urns = self.graph.get_urns_by_filter( + status=RemovedStatusFilter.ALL, + batch_size=self.config.database_query_batch_size, + ) + tasks: List[futures.Future[Iterable[MetadataChangeProposalWrapper]]] = [] + with futures.ThreadPoolExecutor( + max_workers=self.config.max_workers + ) as executor: + for urn in urns: + tasks.append(executor.submit(self._get_aspects_for_urn, urn)) + for task in futures.as_completed(tasks): + yield from task.result() + + def _get_aspects_for_urn(self, urn: str) -> Iterable[MetadataChangeProposalWrapper]: + aspects: Dict[str, _Aspect] = self.graph.get_entity_semityped(urn) # type: ignore + for aspect in aspects.values(): + yield MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=aspect, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py index 2368febe1ff57..a2f43b8cc62cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py @@ -15,6 +15,7 @@ from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.datahub.config import DataHubSourceConfig +from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader from datahub.ingestion.source.datahub.datahub_database_reader import ( DataHubDatabaseReader, ) @@ -58,6 +59,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: logger.info(f"Ingesting DataHub metadata up until {self.report.stop_time}") state = self.stateful_ingestion_handler.get_last_run_state() + if self.config.pull_from_datahub_api: + yield from self._get_api_workunits() + if self.config.database_connection is not None: yield from self._get_database_workunits( from_createdon=state.database_createdon_datetime @@ -139,6 +143,18 @@ def _get_kafka_workunits( ) self._commit_progress(i) + def _get_api_workunits(self) -> Iterable[MetadataWorkUnit]: + if self.ctx.graph is None: + self.report.report_failure( + "datahub_api", + "Specify datahub_api on your ingestion recipe to ingest from the DataHub API", + ) + return + + reader = DataHubApiReader(self.config, self.report, self.ctx.graph) + for mcp in reader.get_aspects(): + yield mcp.as_workunit() + def _commit_progress(self, i: Optional[int] = None) -> None: """Commit progress to stateful storage, if there have been no errors. diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index af9769bc9d94c..da1ea8ecb4678 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -20,9 +20,8 @@ DBTCommonConfig, DBTNode, DBTSourceBase, - DBTTest, - DBTTestResult, ) +from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 0f5c08eb6ac54..c4de24bf192f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1,11 +1,10 @@ -import json import logging import re from abc import abstractmethod from dataclasses import dataclass, field from datetime import datetime from enum import auto -from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple import pydantic from pydantic import root_validator, validator @@ -34,6 +33,12 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.common.subtypes import DatasetSubTypes +from datahub.ingestion.source.dbt.dbt_tests import ( + DBTTest, + DBTTestResult, + make_assertion_from_test, + make_assertion_result_from_test, +) from datahub.ingestion.source.sql.sql_types import ( ATHENA_SQL_TYPES_MAP, BIGQUERY_TYPES_MAP, @@ -81,20 +86,7 @@ TimeTypeClass, ) from datahub.metadata.schema_classes import ( - AssertionInfoClass, - AssertionResultClass, - AssertionResultTypeClass, - AssertionRunEventClass, - AssertionRunStatusClass, - AssertionStdAggregationClass, - AssertionStdOperatorClass, - AssertionStdParameterClass, - AssertionStdParametersClass, - AssertionStdParameterTypeClass, - AssertionTypeClass, DataPlatformInstanceClass, - DatasetAssertionInfoClass, - DatasetAssertionScopeClass, DatasetPropertiesClass, GlobalTagsClass, GlossaryTermsClass, @@ -288,6 +280,11 @@ class DBTCommonConfig( default=False, description="When enabled, dbt test warnings will be treated as failures.", ) + # override fault value to True. + incremental_lineage: bool = Field( + default=True, + description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", + ) @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: @@ -551,134 +548,6 @@ def get_column_type( return SchemaFieldDataType(type=TypeClass()) -@dataclass -class AssertionParams: - scope: Union[DatasetAssertionScopeClass, str] - operator: Union[AssertionStdOperatorClass, str] - aggregation: Union[AssertionStdAggregationClass, str] - parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None - logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None - - -def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]: - """ - Try to produce a useful string for the name of a relationship constraint. - Return None if we fail to - """ - destination_ref = kw_args.get("to") - source_ref = kw_args.get("model") - column_name = kw_args.get("column_name") - dest_field_name = kw_args.get("field") - if not destination_ref or not source_ref or not column_name or not dest_field_name: - # base assertions are violated, bail early - return None - m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref) - if m: - destination_table = m.group(1) - else: - destination_table = destination_ref - m = re.search(r"ref\(\'(.*)\'\)", source_ref) - if m: - source_table = m.group(1) - else: - source_table = source_ref - return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}" - - -@dataclass -class DBTTest: - qualified_test_name: str - column_name: Optional[str] - kw_args: dict - - TEST_NAME_TO_ASSERTION_MAP: ClassVar[Dict[str, AssertionParams]] = { - "not_null": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.NOT_NULL, - aggregation=AssertionStdAggregationClass.IDENTITY, - ), - "unique": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.EQUAL_TO, - aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION, - parameters=lambda _: AssertionStdParametersClass( - value=AssertionStdParameterClass( - value="1.0", - type=AssertionStdParameterTypeClass.NUMBER, - ) - ), - ), - "accepted_values": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.IN, - aggregation=AssertionStdAggregationClass.IDENTITY, - parameters=lambda kw_args: AssertionStdParametersClass( - value=AssertionStdParameterClass( - value=json.dumps(kw_args.get("values")), - type=AssertionStdParameterTypeClass.SET, - ), - ), - ), - "relationships": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass._NATIVE_, - aggregation=AssertionStdAggregationClass.IDENTITY, - parameters=lambda kw_args: AssertionStdParametersClass( - value=AssertionStdParameterClass( - value=json.dumps(kw_args.get("values")), - type=AssertionStdParameterTypeClass.SET, - ), - ), - logic_fn=_get_name_for_relationship_test, - ), - "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.NOT_NULL, - aggregation=AssertionStdAggregationClass.IDENTITY, - ), - "dbt_expectations.expect_column_values_to_be_between": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.BETWEEN, - aggregation=AssertionStdAggregationClass.IDENTITY, - parameters=lambda x: AssertionStdParametersClass( - minValue=AssertionStdParameterClass( - value=str(x.get("min_value", "unknown")), - type=AssertionStdParameterTypeClass.NUMBER, - ), - maxValue=AssertionStdParameterClass( - value=str(x.get("max_value", "unknown")), - type=AssertionStdParameterTypeClass.NUMBER, - ), - ), - ), - "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams( - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass.IN, - aggregation=AssertionStdAggregationClass.IDENTITY, - parameters=lambda kw_args: AssertionStdParametersClass( - value=AssertionStdParameterClass( - value=json.dumps(kw_args.get("value_set")), - type=AssertionStdParameterTypeClass.SET, - ), - ), - ), - } - - -@dataclass -class DBTTestResult: - invocation_id: str - - status: str - execution_time: datetime - - native_results: Dict[str, str] - - -def string_map(input_map: Dict[str, Any]) -> Dict[str, str]: - return {k: str(v) for k, v in input_map.items()} - - @platform_name("dbt") @config_class(DBTCommonConfig) @support_status(SupportStatus.CERTIFIED) @@ -750,7 +619,7 @@ def create_test_entity_mcps( for upstream_urn in sorted(upstream_urns): if self.config.entities_enabled.can_emit_node_type("test"): - yield self._make_assertion_from_test( + yield make_assertion_from_test( custom_props, node, assertion_urn, @@ -759,133 +628,17 @@ def create_test_entity_mcps( if node.test_result: if self.config.entities_enabled.can_emit_test_results: - yield self._make_assertion_result_from_test( - node, assertion_urn, upstream_urn + yield make_assertion_result_from_test( + node, + assertion_urn, + upstream_urn, + test_warnings_are_errors=self.config.test_warnings_are_errors, ) else: logger.debug( f"Skipping test result {node.name} emission since it is turned off." ) - def _make_assertion_from_test( - self, - extra_custom_props: Dict[str, str], - node: DBTNode, - assertion_urn: str, - upstream_urn: str, - ) -> MetadataWorkUnit: - assert node.test_info - qualified_test_name = node.test_info.qualified_test_name - column_name = node.test_info.column_name - kw_args = node.test_info.kw_args - - if qualified_test_name in DBTTest.TEST_NAME_TO_ASSERTION_MAP: - assertion_params = DBTTest.TEST_NAME_TO_ASSERTION_MAP[qualified_test_name] - assertion_info = AssertionInfoClass( - type=AssertionTypeClass.DATASET, - customProperties=extra_custom_props, - datasetAssertion=DatasetAssertionInfoClass( - dataset=upstream_urn, - scope=assertion_params.scope, - operator=assertion_params.operator, - fields=[ - mce_builder.make_schema_field_urn(upstream_urn, column_name) - ] - if ( - assertion_params.scope - == DatasetAssertionScopeClass.DATASET_COLUMN - and column_name - ) - else [], - nativeType=node.name, - aggregation=assertion_params.aggregation, - parameters=assertion_params.parameters(kw_args) - if assertion_params.parameters - else None, - logic=assertion_params.logic_fn(kw_args) - if assertion_params.logic_fn - else None, - nativeParameters=string_map(kw_args), - ), - ) - elif column_name: - # no match with known test types, column-level test - assertion_info = AssertionInfoClass( - type=AssertionTypeClass.DATASET, - customProperties=extra_custom_props, - datasetAssertion=DatasetAssertionInfoClass( - dataset=upstream_urn, - scope=DatasetAssertionScopeClass.DATASET_COLUMN, - operator=AssertionStdOperatorClass._NATIVE_, - fields=[ - mce_builder.make_schema_field_urn(upstream_urn, column_name) - ], - nativeType=node.name, - logic=node.compiled_code or node.raw_code, - aggregation=AssertionStdAggregationClass._NATIVE_, - nativeParameters=string_map(kw_args), - ), - ) - else: - # no match with known test types, default to row-level test - assertion_info = AssertionInfoClass( - type=AssertionTypeClass.DATASET, - customProperties=extra_custom_props, - datasetAssertion=DatasetAssertionInfoClass( - dataset=upstream_urn, - scope=DatasetAssertionScopeClass.DATASET_ROWS, - operator=AssertionStdOperatorClass._NATIVE_, - logic=node.compiled_code or node.raw_code, - nativeType=node.name, - aggregation=AssertionStdAggregationClass._NATIVE_, - nativeParameters=string_map(kw_args), - ), - ) - - wu = MetadataChangeProposalWrapper( - entityUrn=assertion_urn, - aspect=assertion_info, - ).as_workunit() - - return wu - - def _make_assertion_result_from_test( - self, - node: DBTNode, - assertion_urn: str, - upstream_urn: str, - ) -> MetadataWorkUnit: - assert node.test_result - test_result = node.test_result - - assertionResult = AssertionRunEventClass( - timestampMillis=int(test_result.execution_time.timestamp() * 1000.0), - assertionUrn=assertion_urn, - asserteeUrn=upstream_urn, - runId=test_result.invocation_id, - result=AssertionResultClass( - type=AssertionResultTypeClass.SUCCESS - if test_result.status == "pass" - or ( - not self.config.test_warnings_are_errors - and test_result.status == "warn" - ) - else AssertionResultTypeClass.FAILURE, - nativeResults=test_result.native_results, - ), - status=AssertionRunStatusClass.COMPLETE, - ) - - event = MetadataChangeProposalWrapper( - entityUrn=assertion_urn, - aspect=assertionResult, - ) - wu = MetadataWorkUnit( - id=f"{assertion_urn}-assertionRunEvent-{upstream_urn}", - mcp=event, - ) - return wu - @abstractmethod def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: # return dbt nodes + global custom properties diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index c08295ed1dc59..dc3a84847beb2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -26,9 +26,8 @@ DBTNode, DBTSourceBase, DBTSourceReport, - DBTTest, - DBTTestResult, ) +from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py new file mode 100644 index 0000000000000..721769d214d9e --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py @@ -0,0 +1,261 @@ +import json +import re +from dataclasses import dataclass +from datetime import datetime +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union + +from datahub.emitter import mce_builder +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + AssertionInfoClass, + AssertionResultClass, + AssertionResultTypeClass, + AssertionRunEventClass, + AssertionRunStatusClass, + AssertionStdAggregationClass, + AssertionStdOperatorClass, + AssertionStdParameterClass, + AssertionStdParametersClass, + AssertionStdParameterTypeClass, + AssertionTypeClass, + DatasetAssertionInfoClass, + DatasetAssertionScopeClass, +) + +if TYPE_CHECKING: + from datahub.ingestion.source.dbt.dbt_common import DBTNode + + +@dataclass +class DBTTest: + qualified_test_name: str + column_name: Optional[str] + kw_args: dict + + +@dataclass +class DBTTestResult: + invocation_id: str + + status: str + execution_time: datetime + + native_results: Dict[str, str] + + +def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]: + """ + Try to produce a useful string for the name of a relationship constraint. + Return None if we fail to + """ + destination_ref = kw_args.get("to") + source_ref = kw_args.get("model") + column_name = kw_args.get("column_name") + dest_field_name = kw_args.get("field") + if not destination_ref or not source_ref or not column_name or not dest_field_name: + # base assertions are violated, bail early + return None + m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref) + if m: + destination_table = m.group(1) + else: + destination_table = destination_ref + m = re.search(r"ref\(\'(.*)\'\)", source_ref) + if m: + source_table = m.group(1) + else: + source_table = source_ref + return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}" + + +@dataclass +class AssertionParams: + scope: Union[DatasetAssertionScopeClass, str] + operator: Union[AssertionStdOperatorClass, str] + aggregation: Union[AssertionStdAggregationClass, str] + parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None + logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None + + +_DBT_TEST_NAME_TO_ASSERTION_MAP: Dict[str, AssertionParams] = { + "not_null": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.NOT_NULL, + aggregation=AssertionStdAggregationClass.IDENTITY, + ), + "unique": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.EQUAL_TO, + aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION, + parameters=lambda _: AssertionStdParametersClass( + value=AssertionStdParameterClass( + value="1.0", + type=AssertionStdParameterTypeClass.NUMBER, + ) + ), + ), + "accepted_values": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.IN, + aggregation=AssertionStdAggregationClass.IDENTITY, + parameters=lambda kw_args: AssertionStdParametersClass( + value=AssertionStdParameterClass( + value=json.dumps(kw_args.get("values")), + type=AssertionStdParameterTypeClass.SET, + ), + ), + ), + "relationships": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass._NATIVE_, + aggregation=AssertionStdAggregationClass.IDENTITY, + parameters=lambda kw_args: AssertionStdParametersClass( + value=AssertionStdParameterClass( + value=json.dumps(kw_args.get("values")), + type=AssertionStdParameterTypeClass.SET, + ), + ), + logic_fn=_get_name_for_relationship_test, + ), + "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.NOT_NULL, + aggregation=AssertionStdAggregationClass.IDENTITY, + ), + "dbt_expectations.expect_column_values_to_be_between": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.BETWEEN, + aggregation=AssertionStdAggregationClass.IDENTITY, + parameters=lambda x: AssertionStdParametersClass( + minValue=AssertionStdParameterClass( + value=str(x.get("min_value", "unknown")), + type=AssertionStdParameterTypeClass.NUMBER, + ), + maxValue=AssertionStdParameterClass( + value=str(x.get("max_value", "unknown")), + type=AssertionStdParameterTypeClass.NUMBER, + ), + ), + ), + "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams( + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass.IN, + aggregation=AssertionStdAggregationClass.IDENTITY, + parameters=lambda kw_args: AssertionStdParametersClass( + value=AssertionStdParameterClass( + value=json.dumps(kw_args.get("value_set")), + type=AssertionStdParameterTypeClass.SET, + ), + ), + ), +} + + +def _string_map(input_map: Dict[str, Any]) -> Dict[str, str]: + return {k: str(v) for k, v in input_map.items()} + + +def make_assertion_from_test( + extra_custom_props: Dict[str, str], + node: "DBTNode", + assertion_urn: str, + upstream_urn: str, +) -> MetadataWorkUnit: + assert node.test_info + qualified_test_name = node.test_info.qualified_test_name + column_name = node.test_info.column_name + kw_args = node.test_info.kw_args + + if qualified_test_name in _DBT_TEST_NAME_TO_ASSERTION_MAP: + assertion_params = _DBT_TEST_NAME_TO_ASSERTION_MAP[qualified_test_name] + assertion_info = AssertionInfoClass( + type=AssertionTypeClass.DATASET, + customProperties=extra_custom_props, + datasetAssertion=DatasetAssertionInfoClass( + dataset=upstream_urn, + scope=assertion_params.scope, + operator=assertion_params.operator, + fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)] + if ( + assertion_params.scope == DatasetAssertionScopeClass.DATASET_COLUMN + and column_name + ) + else [], + nativeType=node.name, + aggregation=assertion_params.aggregation, + parameters=assertion_params.parameters(kw_args) + if assertion_params.parameters + else None, + logic=assertion_params.logic_fn(kw_args) + if assertion_params.logic_fn + else None, + nativeParameters=_string_map(kw_args), + ), + ) + elif column_name: + # no match with known test types, column-level test + assertion_info = AssertionInfoClass( + type=AssertionTypeClass.DATASET, + customProperties=extra_custom_props, + datasetAssertion=DatasetAssertionInfoClass( + dataset=upstream_urn, + scope=DatasetAssertionScopeClass.DATASET_COLUMN, + operator=AssertionStdOperatorClass._NATIVE_, + fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)], + nativeType=node.name, + logic=node.compiled_code or node.raw_code, + aggregation=AssertionStdAggregationClass._NATIVE_, + nativeParameters=_string_map(kw_args), + ), + ) + else: + # no match with known test types, default to row-level test + assertion_info = AssertionInfoClass( + type=AssertionTypeClass.DATASET, + customProperties=extra_custom_props, + datasetAssertion=DatasetAssertionInfoClass( + dataset=upstream_urn, + scope=DatasetAssertionScopeClass.DATASET_ROWS, + operator=AssertionStdOperatorClass._NATIVE_, + logic=node.compiled_code or node.raw_code, + nativeType=node.name, + aggregation=AssertionStdAggregationClass._NATIVE_, + nativeParameters=_string_map(kw_args), + ), + ) + + return MetadataChangeProposalWrapper( + entityUrn=assertion_urn, + aspect=assertion_info, + ).as_workunit() + + +def make_assertion_result_from_test( + node: "DBTNode", + assertion_urn: str, + upstream_urn: str, + test_warnings_are_errors: bool, +) -> MetadataWorkUnit: + assert node.test_result + test_result = node.test_result + + assertionResult = AssertionRunEventClass( + timestampMillis=int(test_result.execution_time.timestamp() * 1000.0), + assertionUrn=assertion_urn, + asserteeUrn=upstream_urn, + runId=test_result.invocation_id, + result=AssertionResultClass( + type=AssertionResultTypeClass.SUCCESS + if test_result.status == "pass" + or (not test_warnings_are_errors and test_result.status == "warn") + else AssertionResultTypeClass.FAILURE, + nativeResults=test_result.native_results, + ), + status=AssertionRunStatusClass.COMPLETE, + ) + + return MetadataChangeProposalWrapper( + entityUrn=assertion_urn, + aspect=assertionResult, + ).as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 566304e1999b7..23770ff3cf812 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -3,7 +3,7 @@ import logging from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, Iterable, List, Optional, Type +from typing import Any, Dict, Iterable, List, Optional, Type, cast import avro.schema import confluent_kafka @@ -18,7 +18,10 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.kafka import KafkaConsumerConnectionConfig -from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.source_common import ( + DatasetSourceConfigMixin, + LowerCaseDatasetUrnConfigMixin, +) from datahub.emitter import mce_builder from datahub.emitter.mce_builder import ( make_data_platform_urn, @@ -76,7 +79,11 @@ class KafkaTopicConfigKeys(str, Enum): UNCLEAN_LEADER_ELECTION_CONFIG = "unclean.leader.election.enable" -class KafkaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): +class KafkaSourceConfig( + StatefulIngestionConfigBase, + DatasetSourceConfigMixin, + LowerCaseDatasetUrnConfigMixin, +): connection: KafkaConsumerConnectionConfig = KafkaConsumerConnectionConfig() topic_patterns: AllowDenyPattern = AllowDenyPattern(allow=[".*"], deny=["^_.*"]) @@ -309,13 +316,20 @@ def _extract_record( avro_schema = avro.schema.parse( schema_metadata.platformSchema.documentSchema ) - description = avro_schema.doc + description = getattr(avro_schema, "doc", None) # set the tags all_tags: List[str] = [] - for tag in avro_schema.other_props.get( - self.source_config.schema_tags_field, [] - ): - all_tags.append(self.source_config.tag_prefix + tag) + try: + schema_tags = cast( + Iterable[str], + avro_schema.other_props.get( + self.source_config.schema_tags_field, [] + ), + ) + for tag in schema_tags: + all_tags.append(self.source_config.tag_prefix + tag) + except TypeError: + pass if self.source_config.enable_meta_mapping: meta_aspects = self.meta_processor.process(avro_schema.other_props) diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 5fae0ee5215a3..1a1e012e80633 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -1096,6 +1096,7 @@ def transform_connector_config( @config_class(KafkaConnectSourceConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class KafkaConnectSource(StatefulIngestionSourceBase): config: KafkaConnectSourceConfig report: KafkaConnectSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 89b1e45695c57..30c38720dd96c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -81,9 +81,6 @@ EnumTypeClass, FineGrainedLineageClass, GlobalTagsClass, - OwnerClass, - OwnershipClass, - OwnershipTypeClass, SchemaMetadataClass, StatusClass, SubTypesClass, @@ -453,17 +450,9 @@ def _get_schema( @staticmethod def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent: assert tag_urn in LookerUtil.tag_definitions - ownership = OwnershipClass( - owners=[ - OwnerClass( - owner="urn:li:corpuser:datahub", - type=OwnershipTypeClass.DATAOWNER, - ) - ] - ) return MetadataChangeEvent( proposedSnapshot=TagSnapshotClass( - urn=tag_urn, aspects=[ownership, LookerUtil.tag_definitions[tag_urn]] + urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]] ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 8297a0aa8efa7..a3df977582ca4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -103,6 +103,11 @@ @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `extract_owners`" ) +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, configured using `extract_column_level_lineage`", +) @capability( SourceCapability.USAGE_STATS, "Enabled by default, configured using `extract_usage_history`", @@ -1128,7 +1133,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def emit_independent_looks_mcp( self, dashboard_element: LookerDashboardElement ) -> Iterable[MetadataWorkUnit]: - yield from auto_workunit( stream=self._make_chart_metadata_events( dashboard_element=dashboard_element, diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index fb4512893feb1..24145d60210ff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -80,6 +80,7 @@ def remove_trailing_slash(cls, v): @config_class(MetabaseConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class MetabaseSource(Source): """ This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py index 1c0c809c16a60..f33c6e0edae3d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py @@ -23,11 +23,17 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import ( + MetadataWorkUnitProcessor, + Source, + SourceCapability, + SourceReport, +) from datahub.ingestion.api.source_helpers import ( auto_status_aspect, auto_workunit_reporter, @@ -121,6 +127,8 @@ def version_must_be_1(cls, v): @platform_name("File Based Lineage") @config_class(LineageFileSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.LINEAGE_COARSE, "Specified in the lineage file.") +@capability(SourceCapability.LINEAGE_FINE, "Specified in the lineage file.") @dataclass class LineageFileSource(Source): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index a000c66a406c2..c46b56da422d9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -98,6 +98,7 @@ class HTTPError429(HTTPError): @config_class(ModeConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class ModeSource(Source): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index f02b6845e40b5..890c5c64bd5e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -11,7 +11,11 @@ from pymongo.mongo_client import MongoClient from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.source_common import EnvConfigMixin +from datahub.configuration.source_common import ( + EnvConfigMixin, + PlatformInstanceConfigMixin, +) +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -55,7 +59,7 @@ DENY_DATABASE_LIST = set(["admin", "config", "local"]) -class MongoDBConfig(EnvConfigMixin): +class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin): # See the MongoDB authentication docs for details and examples. # https://pymongo.readthedocs.io/en/stable/examples/authentication.html connect_uri: str = Field( @@ -199,6 +203,7 @@ def construct_schema_pymongo( @platform_name("MongoDB") @config_class(MongoDBConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @dataclass class MongoDBSource(Source): @@ -320,7 +325,12 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.report_dropped(dataset_name) continue - dataset_urn = f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{self.config.env})" + dataset_urn = make_dataset_urn_with_platform_instance( + platform=platform, + name=dataset_name, + env=self.config.env, + platform_instance=self.config.platform_instance, + ) dataset_snapshot = DatasetSnapshot( urn=dataset_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index ac1e03812db3b..bc05edbb3c623 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -26,11 +26,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.schema_classes import ( DataFlowInfoClass, @@ -360,6 +361,7 @@ def report_dropped(self, ent_name: str) -> None: @platform_name("NiFi", id="nifi") @config_class(NifiSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations") class NifiSource(Source): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 52bcef66658c8..4611a8eed4782 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -264,7 +264,6 @@ def extract_lineage( ) if len(upstream) > 0: - upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass( upstreams=upstream, fineGrainedLineages=cll_lineage or None, @@ -1139,6 +1138,10 @@ def report_to_datahub_work_units( SourceCapability.OWNERSHIP, "Disabled by default, configured using `extract_ownership`", ) +@capability( + SourceCapability.LINEAGE_COARSE, + "Enabled by default, configured using `extract_lineage`.", +) @capability( SourceCapability.LINEAGE_FINE, "Disabled by default, configured using `extract_column_level_lineage`. ", diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index 804a14b0fe1cf..79b044841e054 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -132,6 +132,16 @@ class RedshiftConfig( description="Whether `schema_pattern` is matched against fully qualified schema name `.`.", ) + extract_column_level_lineage: bool = Field( + default=True, + description="Whether to extract column level lineage. This config works with rest-sink only.", + ) + + incremental_lineage: bool = Field( + default=False, + description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run. This config works with rest-sink only.", + ) + @root_validator(pre=True) def check_email_is_set_on_usage(cls, values): if values.get("include_usage_statistics"): diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index bbe52b5d98ba3..c9ddfbe92ab2a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -9,10 +9,12 @@ import humanfriendly import redshift_connector -from sqllineage.runner import LineageRunner +import datahub.emitter.mce_builder as builder +import datahub.utilities.sqlglot_lineage as sqlglot_l from datahub.emitter import mce_builder from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.aws.s3_util import strip_s3_prefix from datahub.ingestion.source.redshift.common import get_db_name from datahub.ingestion.source.redshift.config import LineageMode, RedshiftConfig @@ -28,13 +30,19 @@ from datahub.ingestion.source.state.redundant_run_skip_handler import ( RedundantLineageRunSkipHandler, ) -from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + FineGrainedLineage, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, + UpstreamLineage, +) from datahub.metadata.schema_classes import ( DatasetLineageTypeClass, UpstreamClass, UpstreamLineageClass, ) from datahub.utilities import memory_footprint +from datahub.utilities.urns import dataset_urn logger: logging.Logger = logging.getLogger(__name__) @@ -56,13 +64,14 @@ class LineageCollectorType(Enum): @dataclass(frozen=True, eq=True) class LineageDataset: platform: LineageDatasetPlatform - path: str + urn: str @dataclass() class LineageItem: dataset: LineageDataset upstreams: Set[LineageDataset] + cll: Optional[List[sqlglot_l.ColumnLineageInfo]] collector_type: LineageCollectorType dataset_lineage_type: str = field(init=False) @@ -83,10 +92,12 @@ def __init__( self, config: RedshiftConfig, report: RedshiftReport, + context: PipelineContext, redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None, ): self.config = config self.report = report + self.context = context self._lineage_map: Dict[str, LineageItem] = defaultdict() self.redundant_run_skip_handler = redundant_run_skip_handler @@ -121,33 +132,37 @@ def _get_s3_path(self, path: str) -> str: return path - def _get_sources_from_query(self, db_name: str, query: str) -> List[LineageDataset]: + def _get_sources_from_query( + self, db_name: str, query: str + ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]: sources: List[LineageDataset] = list() - parser = LineageRunner(query) + parsed_result: Optional[ + sqlglot_l.SqlParsingResult + ] = sqlglot_l.create_lineage_sql_parsed_result( + query=query, + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + database=db_name, + schema=str(self.config.default_schema), + graph=self.context.graph, + env=self.config.env, + ) - for table in parser.source_tables: - split = str(table).split(".") - if len(split) == 3: - db_name, source_schema, source_table = split - elif len(split) == 2: - source_schema, source_table = split - else: - raise ValueError( - f"Invalid table name {table} in query {query}. " - f"Expected format: [db_name].[schema].[table] or [schema].[table] or [table]." - ) + if parsed_result is None: + logger.debug(f"native query parsing failed for {query}") + return sources, None - if source_schema == "": - source_schema = str(self.config.default_schema) + logger.debug(f"parsed_result = {parsed_result}") + for table_urn in parsed_result.in_tables: source = LineageDataset( platform=LineageDatasetPlatform.REDSHIFT, - path=f"{db_name}.{source_schema}.{source_table}", + urn=table_urn, ) sources.append(source) - return sources + return sources, parsed_result.column_lineage def _build_s3_path_from_row(self, filename: str) -> str: path = filename.strip() @@ -165,9 +180,11 @@ def _get_sources( source_table: Optional[str], ddl: Optional[str], filename: Optional[str], - ) -> List[LineageDataset]: + ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]: sources: List[LineageDataset] = list() # Source + cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None + if ( lineage_type in { @@ -177,7 +194,7 @@ def _get_sources( and ddl is not None ): try: - sources = self._get_sources_from_query(db_name=db_name, query=ddl) + sources, cll = self._get_sources_from_query(db_name=db_name, query=ddl) except Exception as e: logger.warning( f"Error parsing query {ddl} for getting lineage. Error was {e}." @@ -192,22 +209,38 @@ def _get_sources( "Only s3 source supported with copy. The source was: {path}." ) self.report.num_lineage_dropped_not_support_copy_path += 1 - return sources + return sources, cll path = strip_s3_prefix(self._get_s3_path(path)) + urn = make_dataset_urn_with_platform_instance( + platform=platform.value, + name=path, + env=self.config.env, + platform_instance=self.config.platform_instance_map.get( + platform.value + ) + if self.config.platform_instance_map is not None + else None, + ) elif source_schema is not None and source_table is not None: platform = LineageDatasetPlatform.REDSHIFT path = f"{db_name}.{source_schema}.{source_table}" + urn = make_dataset_urn_with_platform_instance( + platform=platform.value, + platform_instance=self.config.platform_instance, + name=path, + env=self.config.env, + ) else: - return [] + return [], cll sources = [ LineageDataset( platform=platform, - path=path, + urn=urn, ) ] - return sources + return sources, cll def _populate_lineage_map( self, @@ -231,6 +264,7 @@ def _populate_lineage_map( :rtype: None """ try: + cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None raw_db_name = database alias_db_name = get_db_name(self.config) @@ -243,7 +277,7 @@ def _populate_lineage_map( if not target: continue - sources = self._get_sources( + sources, cll = self._get_sources( lineage_type, alias_db_name, source_schema=lineage_row.source_schema, @@ -251,6 +285,7 @@ def _populate_lineage_map( ddl=lineage_row.ddl, filename=lineage_row.filename, ) + target.cll = cll target.upstreams.update( self._get_upstream_lineages( @@ -262,20 +297,16 @@ def _populate_lineage_map( ) # Merging downstreams if dataset already exists and has downstreams - if target.dataset.path in self._lineage_map: - self._lineage_map[ - target.dataset.path - ].upstreams = self._lineage_map[ - target.dataset.path - ].upstreams.union( - target.upstreams - ) + if target.dataset.urn in self._lineage_map: + self._lineage_map[target.dataset.urn].upstreams = self._lineage_map[ + target.dataset.urn + ].upstreams.union(target.upstreams) else: - self._lineage_map[target.dataset.path] = target + self._lineage_map[target.dataset.urn] = target logger.debug( - f"Lineage[{target}]:{self._lineage_map[target.dataset.path]}" + f"Lineage[{target}]:{self._lineage_map[target.dataset.urn]}" ) except Exception as e: self.warn( @@ -308,17 +339,34 @@ def _get_target_lineage( target_platform = LineageDatasetPlatform.S3 # Following call requires 'filename' key in lineage_row target_path = self._build_s3_path_from_row(lineage_row.filename) + urn = make_dataset_urn_with_platform_instance( + platform=target_platform.value, + name=target_path, + env=self.config.env, + platform_instance=self.config.platform_instance_map.get( + target_platform.value + ) + if self.config.platform_instance_map is not None + else None, + ) except ValueError as e: self.warn(logger, "non-s3-lineage", str(e)) return None else: target_platform = LineageDatasetPlatform.REDSHIFT target_path = f"{alias_db_name}.{lineage_row.target_schema}.{lineage_row.target_table}" + urn = make_dataset_urn_with_platform_instance( + platform=target_platform.value, + platform_instance=self.config.platform_instance, + name=target_path, + env=self.config.env, + ) return LineageItem( - dataset=LineageDataset(platform=target_platform, path=target_path), + dataset=LineageDataset(platform=target_platform, urn=urn), upstreams=set(), collector_type=lineage_type, + cll=None, ) def _get_upstream_lineages( @@ -331,11 +379,22 @@ def _get_upstream_lineages( targe_source = [] for source in sources: if source.platform == LineageDatasetPlatform.REDSHIFT: - db, schema, table = source.path.split(".") + qualified_table_name = dataset_urn.DatasetUrn.create_from_string( + source.urn + ).get_entity_id()[1] + db, schema, table = qualified_table_name.split(".") if db == raw_db_name: db = alias_db_name path = f"{db}.{schema}.{table}" - source = LineageDataset(platform=source.platform, path=path) + source = LineageDataset( + platform=source.platform, + urn=make_dataset_urn_with_platform_instance( + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + name=path, + env=self.config.env, + ), + ) # Filtering out tables which does not exist in Redshift # It was deleted in the meantime or query parser did not capture well the table name @@ -345,7 +404,7 @@ def _get_upstream_lineages( or not any(table == t.name for t in all_tables[db][schema]) ): logger.debug( - f"{source.path} missing table, dropping from lineage.", + f"{source.urn} missing table, dropping from lineage.", ) self.report.num_lineage_tables_dropped += 1 continue @@ -433,36 +492,73 @@ def populate_lineage( memory_footprint.total_size(self._lineage_map) ) + def make_fine_grained_lineage_class( + self, lineage_item: LineageItem, dataset_urn: str + ) -> List[FineGrainedLineage]: + fine_grained_lineages: List[FineGrainedLineage] = [] + + if ( + self.config.extract_column_level_lineage is False + or lineage_item.cll is None + ): + logger.debug("CLL extraction is disabled") + return fine_grained_lineages + + logger.debug("Extracting column level lineage") + + cll: List[sqlglot_l.ColumnLineageInfo] = lineage_item.cll + + for cll_info in cll: + downstream = ( + [builder.make_schema_field_urn(dataset_urn, cll_info.downstream.column)] + if cll_info.downstream is not None + and cll_info.downstream.column is not None + else [] + ) + + upstreams = [ + builder.make_schema_field_urn(column_ref.table, column_ref.column) + for column_ref in cll_info.upstreams + ] + + fine_grained_lineages.append( + FineGrainedLineage( + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=downstream, + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=upstreams, + ) + ) + + logger.debug(f"Created fine_grained_lineage for {dataset_urn}") + + return fine_grained_lineages + def get_lineage( self, table: Union[RedshiftTable, RedshiftView], dataset_urn: str, schema: RedshiftSchema, ) -> Optional[Tuple[UpstreamLineageClass, Dict[str, str]]]: - dataset_key = mce_builder.dataset_urn_to_key(dataset_urn) - if dataset_key is None: - return None upstream_lineage: List[UpstreamClass] = [] - if dataset_key.name in self._lineage_map: - item = self._lineage_map[dataset_key.name] + cll_lineage: List[FineGrainedLineage] = [] + + if dataset_urn in self._lineage_map: + item = self._lineage_map[dataset_urn] for upstream in item.upstreams: upstream_table = UpstreamClass( - dataset=make_dataset_urn_with_platform_instance( - upstream.platform.value, - upstream.path, - platform_instance=self.config.platform_instance_map.get( - upstream.platform.value - ) - if self.config.platform_instance_map - else None, - env=self.config.env, - ), + dataset=upstream.urn, type=item.dataset_lineage_type, ) upstream_lineage.append(upstream_table) + cll_lineage = self.make_fine_grained_lineage_class( + lineage_item=item, + dataset_urn=dataset_urn, + ) + tablename = table.name if table.type == "EXTERNAL_TABLE": # external_db_params = schema.option @@ -489,7 +585,12 @@ def get_lineage( else: return None - return UpstreamLineage(upstreams=upstream_lineage), {} + return ( + UpstreamLineage( + upstreams=upstream_lineage, fineGrainedLineages=cll_lineage or None + ), + {}, + ) def report_status(self, step: str, status: bool) -> None: if self.redundant_run_skip_handler: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index e8a8ff976afa6..c7d01021773b1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -1,5 +1,6 @@ import logging from collections import defaultdict +from functools import partial from typing import Dict, Iterable, List, Optional, Type, Union import humanfriendly @@ -25,6 +26,7 @@ platform_name, support_status, ) +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage from datahub.ingestion.api.source import ( CapabilityReport, MetadataWorkUnitProcessor, @@ -216,6 +218,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): ] = { "BYTES": BytesType, "BOOL": BooleanType, + "BOOLEAN": BooleanType, + "DOUBLE": NumberType, + "DOUBLE PRECISION": NumberType, "DECIMAL": NumberType, "NUMERIC": NumberType, "BIGNUMERIC": NumberType, @@ -242,6 +247,13 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "CHARACTER": StringType, "CHAR": StringType, "TIMESTAMP WITHOUT TIME ZONE": TimeType, + "REAL": NumberType, + "VARCHAR": StringType, + "TIMESTAMPTZ": TimeType, + "GEOMETRY": NullType, + "HLLSKETCH": NullType, + "TIMETZ": TimeType, + "VARBYTE": StringType, } def get_platform_instance_id(self) -> str: @@ -369,6 +381,11 @@ def gen_database_container(self, database: str) -> Iterable[MetadataWorkUnit]: def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.ctx.graph, + self.config.incremental_lineage, + ), StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, @@ -881,6 +898,7 @@ def extract_lineage( self.lineage_extractor = RedshiftLineageExtractor( config=self.config, report=self.report, + context=self.ctx, redundant_run_skip_handler=self.redundant_lineage_run_skip_handler, ) @@ -941,7 +959,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]: ) if lineage_info: yield from gen_lineage( - dataset_urn, lineage_info, self.config.incremental_lineage + dataset_urn, + lineage_info, + incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage ) for schema in self.db_views[database]: @@ -955,7 +975,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]: ) if lineage_info: yield from gen_lineage( - dataset_urn, lineage_info, self.config.incremental_lineage + dataset_urn, + lineage_info, + incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage ) def add_config_to_report(self): diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index 9b5296f0b9dd5..3ef6476078f6f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -75,7 +75,10 @@ class DataLakeSourceConfig( default=100, description="Maximum number of rows to use when inferring schemas for TSV and CSV files.", ) - + add_partition_columns_to_schema: bool = Field( + default=False, + description="Whether to add partition fields to the schema.", + ) verify_ssl: Union[bool, str] = Field( default=True, description="Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index ac4433b7eb1f0..94c571eabad11 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -10,6 +10,7 @@ from pathlib import PurePath from typing import Any, Dict, Iterable, List, Optional, Tuple +import smart_open.compression as so_compression from more_itertools import peekable from pyspark.conf import SparkConf from pyspark.sql import SparkSession @@ -77,6 +78,7 @@ NullTypeClass, NumberTypeClass, RecordTypeClass, + SchemaField, SchemaFieldDataType, SchemaMetadata, StringTypeClass, @@ -89,6 +91,7 @@ OperationClass, OperationTypeClass, OtherSchemaClass, + SchemaFieldDataTypeClass, _Aspect, ) from datahub.telemetry import stats, telemetry @@ -120,6 +123,9 @@ } PAGE_SIZE = 1000 +# Hack to support the .gzip extension with smart_open. +so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"]) + def get_column_type( report: SourceReport, dataset_name: str, column_type: str @@ -407,7 +413,9 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List: table_data.full_path, "rb", transport_params={"client": s3_client} ) else: - file = open(table_data.full_path, "rb") + # We still use smart_open here to take advantage of the compression + # capabilities of smart_open. + file = smart_open(table_data.full_path, "rb") fields = [] @@ -452,8 +460,39 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List: logger.debug(f"Extracted fields in schema: {fields}") fields = sorted(fields, key=lambda f: f.fieldPath) + if self.source_config.add_partition_columns_to_schema: + self.add_partition_columns_to_schema( + fields=fields, path_spec=path_spec, full_path=table_data.full_path + ) + return fields + def add_partition_columns_to_schema( + self, path_spec: PathSpec, full_path: str, fields: List[SchemaField] + ) -> None: + is_fieldpath_v2 = False + for field in fields: + if field.fieldPath.startswith("[version=2.0]"): + is_fieldpath_v2 = True + break + vars = path_spec.get_named_vars(full_path) + if vars is not None and "partition_key" in vars: + for partition_key in vars["partition_key"].values(): + fields.append( + SchemaField( + fieldPath=f"{partition_key}" + if not is_fieldpath_v2 + else f"[version=2.0].[type=string].{partition_key}", + nativeDataType="string", + type=SchemaFieldDataType(StringTypeClass()) + if not is_fieldpath_v2 + else SchemaFieldDataTypeClass(type=StringTypeClass()), + isPartitioningKey=True, + nullable=True, + recursive=False, + ) + ) + def get_table_profile( self, table_data: TableData, dataset_urn: str ) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 9a993f5774032..4219533dc217c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -20,12 +20,12 @@ import datahub.emitter.mce_builder as builder from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.sql_parsing_builder import SqlParsingBuilder from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.aws.s3_util import make_s3_urn +from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage from datahub.ingestion.source.snowflake.constants import ( LINEAGE_PERMISSION_ERROR, SnowflakeEdition, - SnowflakeObjectDomain, ) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery @@ -53,7 +53,6 @@ sqlglot_lineage, ) from datahub.utilities.time import ts_millis_to_datetime -from datahub.utilities.urns.dataset_urn import DatasetUrn logger: logging.Logger = logging.getLogger(__name__) @@ -136,7 +135,6 @@ def get_workunits( return self._populate_external_lineage_map(discovered_tables) - if self.config.include_view_lineage: if len(discovered_views) > 0: yield from self.get_view_upstream_workunits( @@ -196,19 +194,6 @@ def get_table_upstream_workunits( f"Upstream lineage detected for {self.report.num_tables_with_upstreams} tables.", ) - def _gen_workunit_from_sql_parsing_result( - self, - dataset_identifier: str, - result: SqlParsingResult, - ) -> MetadataWorkUnit: - upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result( - self.dataset_urn_builder(dataset_identifier), result - ) - self.report.num_views_with_upstreams += 1 - return self._create_upstream_lineage_workunit( - dataset_identifier, upstreams, fine_upstreams - ) - def _gen_workunits_from_query_result( self, discovered_assets: Collection[str], @@ -242,18 +227,31 @@ def get_view_upstream_workunits( schema_resolver: SchemaResolver, view_definitions: MutableMapping[str, str], ) -> Iterable[MetadataWorkUnit]: - views_processed = set() + views_failed_parsing = set() if self.config.include_view_column_lineage: with PerfTimer() as timer: + builder = SqlParsingBuilder( + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) for view_identifier, view_definition in view_definitions.items(): result = self._run_sql_parser( view_identifier, view_definition, schema_resolver ) - if result: - views_processed.add(view_identifier) - yield self._gen_workunit_from_sql_parsing_result( - view_identifier, result + if result and result.out_tables: + self.report.num_views_with_upstreams += 1 + # This does not yield any workunits but we use + # yield here to execute this method + yield from builder.process_sql_parsing_result( + result=result, + query=view_definition, + is_view_ddl=True, ) + else: + views_failed_parsing.add(view_identifier) + + yield from builder.gen_workunits() self.report.view_lineage_parse_secs = timer.elapsed_seconds() with PerfTimer() as timer: @@ -261,7 +259,7 @@ def get_view_upstream_workunits( if results: yield from self._gen_workunits_from_query_result( - set(discovered_views) - views_processed, + views_failed_parsing, results, upstream_for_view=True, ) @@ -349,39 +347,6 @@ def get_upstreams_from_query_result_row( return upstreams, fine_upstreams - def get_upstreams_from_sql_parsing_result( - self, downstream_table_urn: str, result: SqlParsingResult - ) -> Tuple[List[UpstreamClass], List[FineGrainedLineage]]: - # Note: This ignores the out_tables section of the sql parsing result. - upstreams = [ - UpstreamClass(dataset=upstream_table_urn, type=DatasetLineageTypeClass.VIEW) - for upstream_table_urn in set(result.in_tables) - ] - - # Maps downstream_col -> [upstream_col] - fine_lineage: Dict[str, Set[SnowflakeColumnId]] = defaultdict(set) - for column_lineage in result.column_lineage or []: - out_column = column_lineage.downstream.column - for upstream_column_info in column_lineage.upstreams: - upstream_table_name = DatasetUrn.create_from_string( - upstream_column_info.table - ).get_dataset_name() - fine_lineage[out_column].add( - SnowflakeColumnId( - columnName=upstream_column_info.column, - objectName=upstream_table_name, - objectDomain=SnowflakeObjectDomain.VIEW.value, - ) - ) - fine_upstreams = [ - self.build_finegrained_lineage( - downstream_table_urn, downstream_col, upstream_cols - ) - for downstream_col, upstream_cols in fine_lineage.items() - ] - - return upstreams, list(filter(None, fine_upstreams)) - def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None: with PerfTimer() as timer: self.report.num_external_table_edges_scanned = 0 @@ -652,7 +617,9 @@ def get_external_upstreams(self, external_lineage: Set[str]) -> List[UpstreamCla # For now, populate only for S3 if external_lineage_entry.startswith("s3://"): external_upstream_table = UpstreamClass( - dataset=make_s3_urn(external_lineage_entry, self.config.env), + dataset=make_s3_urn_for_lineage( + external_lineage_entry, self.config.env + ), type=DatasetLineageTypeClass.COPY, ) external_upstreams.append(external_upstream_table) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 24275dcdff34d..8e18d85d6f3ca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -86,7 +86,7 @@ def get_batch_kwargs( # Fixed-size sampling can be slower than equivalent fraction-based sampling # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations sample_pc = 100 * self.config.profiling.sample_size / table.rows_count - custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})' + custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})' return { **super().get_batch_kwargs(table, schema_name, db_name), # Lowercase/Mixedcase table names in Snowflake do not work by default. diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 215116b4c33fb..a5c07d9a3870c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -4,6 +4,7 @@ import os.path import platform from dataclasses import dataclass +from functools import partial from typing import Callable, Dict, Iterable, List, Optional, Union import pandas as pd @@ -26,6 +27,7 @@ platform_name, support_status, ) +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage from datahub.ingestion.api.source import ( CapabilityReport, MetadataWorkUnitProcessor, @@ -511,6 +513,11 @@ def _init_schema_resolver(self) -> SchemaResolver: def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.ctx.graph, + self.config.incremental_lineage, + ), StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 9cb613bde1e9f..75e8fe1d6f7a6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -1,12 +1,17 @@ import json import logging +import re import typing -from typing import Any, Dict, Iterable, List, Optional, Tuple, cast +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast import pydantic from pyathena.common import BaseCursor from pyathena.model import AthenaTableMetadata +from pyathena.sqlalchemy_athena import AthenaRestDialect +from sqlalchemy import create_engine, inspect, types from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.types import TypeEngine +from sqlalchemy_bigquery import STRUCT from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.emitter.mcp_builder import ContainerKey, DatabaseKey @@ -21,13 +26,166 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.s3_util import make_s3_urn from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes -from datahub.ingestion.source.sql.sql_common import SQLAlchemySource +from datahub.ingestion.source.sql.sql_common import ( + SQLAlchemySource, + register_custom_type, +) from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri from datahub.ingestion.source.sql.sql_utils import ( add_table_to_schema_container, gen_database_container, gen_database_key, ) +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField +from datahub.metadata.schema_classes import MapTypeClass, RecordTypeClass +from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column +from datahub.utilities.sqlalchemy_type_converter import ( + MapType, + get_schema_fields_for_sqlalchemy_column, +) + +logger = logging.getLogger(__name__) + +assert STRUCT, "required type modules are not available" +register_custom_type(STRUCT, RecordTypeClass) +register_custom_type(MapType, MapTypeClass) + + +class CustomAthenaRestDialect(AthenaRestDialect): + """Custom definition of the Athena dialect. + + Custom implementation that allows to extend/modify the behavior of the SQLalchemy + dialect that is used by PyAthena (which is the library that is used by DataHub + to extract metadata from Athena). + This dialect can then be used by the inspector (see get_inspectors()). + + """ + + # regex to identify complex types in DDL strings which are embedded in `<>`. + _complex_type_pattern = re.compile(r"(<.+>)") + + @typing.no_type_check + def _get_column_type( + self, type_: Union[str, Dict[str, Any]] + ) -> TypeEngine: # noqa: C901 + """Derives the data type of the Athena column. + + This method is overwritten to extend the behavior of PyAthena. + Pyathena is not capable of detecting complex data types, e.g., + arrays, maps, or, structs (as of version 2.25.2). + The custom implementation extends the functionality by the above-mentioned data types. + """ + + # Originally, this method only handles `type_` as a string + # With the workaround used below to parse DDL strings for structs, + # `type` might also be a dictionary + if isinstance(type_, str): + match = self._pattern_column_type.match(type_) + if match: + type_name = match.group(1).lower() + type_meta_information = match.group(2) + else: + type_name = type_.lower() + type_meta_information = None + elif isinstance(type_, dict): + # this occurs only when a type parsed as part of a STRUCT is passed + # in such case type_ is a dictionary whose type can be retrieved from the attribute + type_name = type_.get("type", None) + type_meta_information = None + else: + raise RuntimeError(f"Unsupported type definition: {type_}") + + args = [] + + if type_name in ["array"]: + detected_col_type = types.ARRAY + + # here we need to account again for two options how `type_` is passed to this method + # first, the simple array definition as a DDL string (something like array) + # this is always the case when the array is not part of a complex data type (mainly STRUCT) + # second, the array definition can also be passed in form of dictionary + # this is the case when the array is part of a complex data type + if isinstance(type_, str): + # retrieve the raw name of the data type as a string + array_type_raw = self._complex_type_pattern.findall(type_)[0][ + 1:-1 + ] # array type without enclosing <> + # convert the string name of the data type into a SQLalchemy type (expected return) + array_type = self._get_column_type(array_type_raw) + elif isinstance(type_, dict): + # retrieve the data type of the array items and + # transform it into a SQLalchemy type + array_type = self._get_column_type(type_["items"]) + else: + raise RuntimeError(f"Unsupported array definition: {type_}") + + args = [array_type] + + elif type_name in ["struct", "record"]: + # STRUCT is not part of the SQLalchemy types selection + # but is provided by another official SQLalchemy library and + # compatible with the other SQLalchemy types + detected_col_type = STRUCT + + if isinstance(type_, dict): + # in case a struct as part of another struct is passed + # it is provided in form of a dictionary and + # can simply be used for the further processing + struct_type = type_ + else: + # this is the case when the type definition of the struct is passed as a DDL string + # therefore, it is required to parse the DDL string + # here a method provided in another Datahub source is used so that the parsing + # doesn't need to be implemented twice + # `get_avro_schema_for_hive_column` accepts a DDL description as column type and + # returns the parsed data types in form of a dictionary + schema = get_avro_schema_for_hive_column( + hive_column_name=type_name, hive_column_type=type_ + ) + + # the actual type description needs to be extracted + struct_type = schema["fields"][0]["type"] + + # A STRUCT consist of multiple attributes which are expected to be passed as + # a list of tuples consisting of name data type pairs. e.g., `('age', Integer())` + # See the reference: + # https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_struct.py#L53 + # + # To extract all of them, we simply iterate over all detected fields and + # convert them to SQLalchemy types + struct_args = [] + for field in struct_type["fields"]: + struct_args.append( + ( + field["name"], + self._get_column_type(field["type"]["type"]) + if field["type"]["type"] not in ["record", "array"] + else self._get_column_type(field["type"]), + ) + ) + + args = struct_args + + elif type_name in ["map"]: + # Instead of SQLalchemy's TupleType the custom MapType is used here + # which is just a simple wrapper around TupleType + detected_col_type = MapType + + # the type definition for maps looks like the following: key_type:val_type (e.g., string:string) + key_type_raw, value_type_raw = type_meta_information.split(",") + + # convert both type names to actual SQLalchemy types + args = [ + self._get_column_type(key_type_raw), + self._get_column_type(value_type_raw), + ] + # by using get_avro_schema_for_hive_column() for parsing STRUCTs the data type `long` + # can also be returned, so we need to extend the handling here as well + elif type_name in ["bigint", "long"]: + detected_col_type = types.BIGINT + else: + return super()._get_column_type(type_name) + return detected_col_type(*args) class AthenaConfig(SQLCommonConfig): @@ -129,6 +287,18 @@ def create(cls, config_dict, ctx): config = AthenaConfig.parse_obj(config_dict) return cls(config, ctx) + # overwrite this method to allow to specify the usage of a custom dialect + def get_inspectors(self) -> Iterable[Inspector]: + url = self.config.get_sql_alchemy_url() + logger.debug(f"sql_alchemy_url={url}") + engine = create_engine(url, **self.config.options) + + # set custom dialect to be used by the inspector + engine.dialect = CustomAthenaRestDialect() + with engine.connect() as conn: + inspector = inspect(conn) + yield inspector + def get_table_properties( self, inspector: Inspector, schema: str, table: str ) -> Tuple[Optional[str], Dict[str, str], Optional[str]]: @@ -136,9 +306,7 @@ def get_table_properties( self.cursor = cast(BaseCursor, inspector.engine.raw_connection().cursor()) assert self.cursor - # Unfortunately properties can be only get through private methods as those are not exposed - # https://github.com/laughingman7743/PyAthena/blob/9e42752b0cc7145a87c3a743bb2634fe125adfa7/pyathena/model.py#L201 - metadata: AthenaTableMetadata = self.cursor._get_table_metadata( + metadata: AthenaTableMetadata = self.cursor.get_table_metadata( table_name=table, schema_name=schema ) description = metadata.comment @@ -241,6 +409,30 @@ def get_schema_names(self, inspector: Inspector) -> List[str]: return [schema for schema in schemas if schema == athena_config.database] return schemas + # Overwrite to modify the creation of schema fields + def get_schema_fields_for_column( + self, + dataset_name: str, + column: Dict, + pk_constraints: Optional[dict] = None, + tags: Optional[List[str]] = None, + ) -> List[SchemaField]: + fields = get_schema_fields_for_sqlalchemy_column( + column_name=column["name"], + column_type=column["type"], + description=column.get("comment", None), + nullable=column.get("nullable", True), + is_part_of_key=True + if ( + pk_constraints is not None + and isinstance(pk_constraints, dict) + and column["name"] in pk_constraints.get("constrained_columns", []) + ) + else False, + ) + + return fields + def close(self): if self.cursor: self.cursor.close() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index 63b21bc82eddd..d081acb6c1eff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -1,15 +1,18 @@ import json import logging import re -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union from pydantic.class_validators import validator from pydantic.fields import Field # This import verifies that the dependencies are available. from pyhive import hive # noqa: F401 -from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveTimestamp +from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp +from sqlalchemy.engine.reflection import Inspector +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, @@ -18,8 +21,10 @@ platform_name, support_status, ) +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.extractor import schema_util -from datahub.ingestion.source.sql.sql_common import register_custom_type +from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type +from datahub.ingestion.source.sql.sql_config import SQLCommonConfig from datahub.ingestion.source.sql.two_tier_sql_source import ( TwoTierSQLAlchemyConfig, TwoTierSQLAlchemySource, @@ -31,6 +36,7 @@ SchemaField, TimeTypeClass, ) +from datahub.metadata.schema_classes import ViewPropertiesClass from datahub.utilities import config_clean from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column @@ -90,19 +96,34 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw): logger.warning(f"Failed to patch method due to {e}") +@reflection.cache # type: ignore +def get_view_names_patched(self, connection, schema=None, **kw): + query = "SHOW VIEWS" + if schema: + query += " IN " + self.identifier_preparer.quote_identifier(schema) + return [row[0] for row in connection.execute(query)] + + +@reflection.cache # type: ignore +def get_view_definition_patched(self, connection, view_name, schema=None, **kw): + full_table = self.identifier_preparer.quote_identifier(view_name) + if schema: + full_table = "{}.{}".format( + self.identifier_preparer.quote_identifier(schema), + self.identifier_preparer.quote_identifier(view_name), + ) + row = connection.execute("SHOW CREATE TABLE {}".format(full_table)).fetchone() + return row[0] + + +HiveDialect.get_view_names = get_view_names_patched +HiveDialect.get_view_definition = get_view_definition_patched + + class HiveConfig(TwoTierSQLAlchemyConfig): # defaults scheme = Field(default="hive", hidden_from_docs=True) - # Hive SQLAlchemy connector returns views as tables. - # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. - # Disabling views helps us prevent this duplication. - include_views = Field( - default=False, - hidden_from_docs=True, - description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.", - ) - @validator("host_port") def clean_host_port(cls, v): return config_clean.remove_protocol(v) @@ -174,3 +195,41 @@ def get_schema_fields_for_column( return new_fields return fields + + # Hive SQLAlchemy connector returns views as tables in get_table_names. + # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. + # This override makes sure that we ingest view definitions for views + def _process_view( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + view: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + + try: + view_definition = inspector.get_view_definition(view, schema) + if view_definition is None: + view_definition = "" + else: + # Some dialects return a TextClause instead of a raw string, + # so we need to convert them to a string. + view_definition = str(view_definition) + except NotImplementedError: + view_definition = "" + + if view_definition: + view_properties_aspect = ViewPropertiesClass( + materialized=False, viewLanguage="SQL", viewLogic=view_definition + ) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=view_properties_aspect, + ).as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 685d4fb3074c9..710825c8ba55d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -530,7 +530,7 @@ def _get_procedure_inputs( def _get_procedure_code( conn: Connection, procedure: StoredProcedure ) -> Tuple[Optional[str], Optional[str]]: - query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.full_name}'" + query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.escape_full_name}'" try: code_data = conn.execute(query) except ProgrammingError: @@ -567,7 +567,7 @@ def _get_procedure_properties( create_date as date_created, modify_date as date_modified FROM sys.procedures - WHERE object_id = object_id('{procedure.full_name}') + WHERE object_id = object_id('{procedure.escape_full_name}') """ ) properties = {} diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py index ba8655b83446d..4f133c6459a0f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py @@ -103,10 +103,6 @@ class BasePostgresConfig(BasicSQLAlchemyConfig): class PostgresConfig(BasePostgresConfig): - include_view_lineage = Field( - default=False, description="Include table lineage for views" - ) - database_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), description=( @@ -183,9 +179,10 @@ def get_inspectors(self) -> Iterable[Inspector]: def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: yield from super().get_workunits_internal() - for inspector in self.get_inspectors(): - if self.config.include_view_lineage: - yield from self._get_view_lineage_workunits(inspector) + if self.views_failed_parsing: + for inspector in self.get_inspectors(): + if self.config.include_view_lineage: + yield from self._get_view_lineage_workunits(inspector) def _get_view_lineage_elements( self, inspector: Inspector @@ -217,14 +214,15 @@ def _get_view_lineage_elements( key = (lineage.dependent_view, lineage.dependent_schema) # Append the source table to the list. lineage_elements[key].append( - mce_builder.make_dataset_urn( - self.platform, - self.get_identifier( + mce_builder.make_dataset_urn_with_platform_instance( + platform=self.platform, + name=self.get_identifier( schema=lineage.source_schema, entity=lineage.source_table, inspector=inspector, ), - self.config.env, + platform_instance=self.config.platform_instance, + env=self.config.env, ) ) @@ -244,12 +242,16 @@ def _get_view_lineage_workunits( dependent_view, dependent_schema = key # Construct a lineage object. - urn = mce_builder.make_dataset_urn( - self.platform, - self.get_identifier( - schema=dependent_schema, entity=dependent_view, inspector=inspector - ), - self.config.env, + view_identifier = self.get_identifier( + schema=dependent_schema, entity=dependent_view, inspector=inspector + ) + if view_identifier not in self.views_failed_parsing: + return + urn = mce_builder.make_dataset_urn_with_platform_instance( + platform=self.platform, + name=view_identifier, + platform_instance=self.config.platform_instance, + env=self.config.env, ) # use the mce_builder to ensure that the change proposal inherits diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 056be6c2e50ac..51909eaf4ed55 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -2,12 +2,14 @@ import logging import traceback from dataclasses import dataclass, field +from functools import partial from typing import ( TYPE_CHECKING, Any, Dict, Iterable, List, + MutableMapping, Optional, Set, Tuple, @@ -29,7 +31,9 @@ make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.sql_parsing_builder import SqlParsingBuilder from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.common.subtypes import ( @@ -86,9 +90,16 @@ ViewPropertiesClass, ) from datahub.telemetry import telemetry +from datahub.utilities.file_backed_collections import FileBackedDict from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport +from datahub.utilities.sqlglot_lineage import ( + SchemaResolver, + SqlParsingResult, + sqlglot_lineage, + view_definition_lineage_helper, +) if TYPE_CHECKING: from datahub.ingestion.source.ge_data_profiler import ( @@ -110,6 +121,11 @@ class SQLSourceReport(StaleEntityRemovalSourceReport): query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None + num_view_definitions_parsed: int = 0 + num_view_definitions_failed_parsing: int = 0 + num_view_definitions_failed_column_parsing: int = 0 + view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) + def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: """ Entity could be a view or a table @@ -138,6 +154,7 @@ class SqlWorkUnit(MetadataWorkUnit): _field_type_mapping: Dict[Type[TypeEngine], Type] = { + # Note: to add dialect-specific types to this mapping, use the `register_custom_type` function. types.Integer: NumberTypeClass, types.Numeric: NumberTypeClass, types.Boolean: BooleanTypeClass, @@ -318,6 +335,18 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) cached_domains=[k for k in self.config.domain], graph=self.ctx.graph ) + self.views_failed_parsing: Set[str] = set() + self.schema_resolver: SchemaResolver = SchemaResolver( + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + self._view_definition_cache: MutableMapping[str, str] + if self.config.use_file_backed_cache: + self._view_definition_cache = FileBackedDict[str]() + else: + self._view_definition_cache = {} + def warn(self, log: logging.Logger, key: str, reason: str) -> None: self.report.report_warning(key, reason[:100]) log.warning(f"{key} => {reason}") @@ -454,6 +483,11 @@ def get_schema_level_workunits( def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.ctx.graph, + self.config.incremental_lineage, + ), StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, @@ -511,6 +545,35 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests, profiler, platform=self.platform ) + if self.config.include_view_lineage: + yield from self.get_view_lineage() + + def get_view_lineage(self) -> Iterable[MetadataWorkUnit]: + builder = SqlParsingBuilder( + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + for dataset_name in self._view_definition_cache.keys(): + view_definition = self._view_definition_cache[dataset_name] + result = self._run_sql_parser( + dataset_name, + view_definition, + self.schema_resolver, + ) + if result and result.out_tables: + # This does not yield any workunits but we use + # yield here to execute this method + yield from builder.process_sql_parsing_result( + result=result, + query=view_definition, + is_view_ddl=True, + include_column_lineage=self.config.include_view_column_lineage, + ) + else: + self.views_failed_parsing.add(dataset_name) + yield from builder.gen_workunits() + def get_identifier( self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any ) -> str: @@ -657,6 +720,8 @@ def _process_table( schema_fields, ) dataset_snapshot.aspects.append(schema_metadata) + if self.config.include_view_lineage: + self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) db_name = self.get_db_name(inspector) yield from self.add_table_to_schema_container( @@ -861,6 +926,12 @@ def _process_view( view: str, sql_config: SQLCommonConfig, ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) try: columns = inspector.get_columns(view, schema) except KeyError: @@ -876,6 +947,8 @@ def _process_view( columns, canonical_schema=schema_fields, ) + if self.config.include_view_lineage: + self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) description, properties, _ = self.get_table_properties(inspector, schema, view) try: view_definition = inspector.get_view_definition(view, schema) @@ -889,12 +962,9 @@ def _process_view( view_definition = "" properties["view_definition"] = view_definition properties["is_view"] = "True" - dataset_urn = make_dataset_urn_with_platform_instance( - self.platform, - dataset_name, - self.config.platform_instance, - self.config.env, - ) + if view_definition and self.config.include_view_lineage: + self._view_definition_cache[dataset_name] = view_definition + dataset_snapshot = DatasetSnapshot( urn=dataset_urn, aspects=[StatusClass(removed=False)], @@ -941,6 +1011,51 @@ def _process_view( domain_registry=self.domain_registry, ) + def _run_sql_parser( + self, view_identifier: str, query: str, schema_resolver: SchemaResolver + ) -> Optional[SqlParsingResult]: + try: + database, schema = self.get_db_schema(view_identifier) + except ValueError: + logger.warning(f"Invalid view identifier: {view_identifier}") + return None + raw_lineage = sqlglot_lineage( + query, + schema_resolver=schema_resolver, + default_db=database, + default_schema=schema, + ) + view_urn = make_dataset_urn_with_platform_instance( + self.platform, + view_identifier, + self.config.platform_instance, + self.config.env, + ) + + if raw_lineage.debug_info.table_error: + logger.debug( + f"Failed to parse lineage for view {view_identifier}: " + f"{raw_lineage.debug_info.table_error}" + ) + self.report.num_view_definitions_failed_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}" + ) + return None + + elif raw_lineage.debug_info.column_error: + self.report.num_view_definitions_failed_column_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}" + ) + else: + self.report.num_view_definitions_parsed += 1 + return view_definition_lineage_helper(raw_lineage, view_urn) + + def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: + database, schema, _view = dataset_identifier.split(".") + return database, schema + def get_profiler_instance(self, inspector: Inspector) -> "DatahubGEProfiler": from datahub.ingestion.source.ge_data_profiler import DatahubGEProfiler diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 677d32c8bac08..095b8e6443171 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -1,13 +1,16 @@ import logging from abc import abstractmethod from typing import Any, Dict, Optional -from urllib.parse import quote_plus import pydantic from pydantic import Field +from sqlalchemy.engine import URL -from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.common import AllowDenyPattern, ConfigModel, LineageConfig +from datahub.configuration.source_common import ( + DatasetSourceConfigMixin, + LowerCaseDatasetUrnConfigMixin, +) from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig from datahub.ingestion.source.state.stale_entity_removal_handler import ( @@ -21,7 +24,12 @@ logger: logging.Logger = logging.getLogger(__name__) -class SQLCommonConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): +class SQLCommonConfig( + StatefulIngestionConfigBase, + DatasetSourceConfigMixin, + LowerCaseDatasetUrnConfigMixin, + LineageConfig, +): options: dict = pydantic.Field( default_factory=dict, description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.", @@ -63,6 +71,22 @@ class SQLCommonConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): description="If the source supports it, include table lineage to the underlying storage location.", ) + include_view_lineage: bool = Field( + default=True, + description="Populates view->view and table->view lineage using DataHub's sql parser.", + ) + + include_view_column_lineage: bool = Field( + default=True, + description="Populates column-level lineage for view->view and table->view lineage using DataHub's sql parser." + " Requires `include_view_lineage` to be enabled.", + ) + + use_file_backed_cache: bool = Field( + default=True, + description="Whether to use a file backed cache for the view definitions.", + ) + profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None @@ -118,7 +142,11 @@ class SQLAlchemyConnectionConfig(ConfigModel): # Duplicate of SQLCommonConfig.options options: dict = pydantic.Field( default_factory=dict, - description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.", + description=( + "Any options specified here will be passed to " + "[SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs." + " To set connection arguments in the URL, specify them under `connect_args`." + ), ) _database_alias_deprecation = pydantic_field_deprecated( @@ -154,21 +182,26 @@ def make_sqlalchemy_uri( db: Optional[str], uri_opts: Optional[Dict[str, Any]] = None, ) -> str: - url = f"{scheme}://" - if username is not None: - url += f"{quote_plus(username)}" - if password is not None: - url += f":{quote_plus(password)}" - url += "@" - if at is not None: - url += f"{at}" - if db is not None: - url += f"/{db}" - if uri_opts is not None: - if db is None: - url += "/" - params = "&".join( - f"{key}={quote_plus(value)}" for (key, value) in uri_opts.items() if value + host: Optional[str] = None + port: Optional[int] = None + if at: + try: + host, port_str = at.rsplit(":", 1) + port = int(port_str) + except ValueError: + host = at + port = None + if uri_opts: + uri_opts = {k: v for k, v in uri_opts.items() if v is not None} + + return str( + URL.create( + drivername=scheme, + username=username, + password=password, + host=host, + port=port, + database=db, + query=uri_opts or {}, ) - url = f"{url}?{params}" - return url + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py new file mode 100644 index 0000000000000..899a7b6697c0a --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -0,0 +1,223 @@ +import logging +from dataclasses import dataclass +from datetime import datetime +from typing import Iterable, Optional, Union + +# This import verifies that the dependencies are available. +import teradatasqlalchemy # noqa: F401 +import teradatasqlalchemy.types as custom_types +from pydantic.fields import Field +from sqlalchemy import create_engine +from sqlalchemy.engine import Engine + +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.time_window_config import BaseTimeWindowConfig +from datahub.emitter.sql_parsing_builder import SqlParsingBuilder +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SourceCapability, + SupportStatus, + capability, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type +from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport +from datahub.ingestion.source.sql.two_tier_sql_source import ( + TwoTierSQLAlchemyConfig, + TwoTierSQLAlchemySource, +) +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig +from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport +from datahub.ingestion.source_report.time_window import BaseTimeWindowReport +from datahub.metadata.com.linkedin.pegasus2avro.schema import ( + BytesTypeClass, + TimeTypeClass, +) +from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage + +logger: logging.Logger = logging.getLogger(__name__) + +register_custom_type(custom_types.JSON, BytesTypeClass) +register_custom_type(custom_types.INTERVAL_DAY, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_DAY_TO_SECOND, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_DAY_TO_MINUTE, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_DAY_TO_HOUR, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_SECOND, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_MINUTE, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_MINUTE_TO_SECOND, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_HOUR, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_HOUR_TO_MINUTE, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_HOUR_TO_SECOND, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_MONTH, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_YEAR, TimeTypeClass) +register_custom_type(custom_types.INTERVAL_YEAR_TO_MONTH, TimeTypeClass) +register_custom_type(custom_types.MBB, BytesTypeClass) +register_custom_type(custom_types.MBR, BytesTypeClass) +register_custom_type(custom_types.GEOMETRY, BytesTypeClass) +register_custom_type(custom_types.TDUDT, BytesTypeClass) +register_custom_type(custom_types.XML, BytesTypeClass) + + +@dataclass +class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport): + num_queries_parsed: int = 0 + num_view_ddl_parsed: int = 0 + num_table_parse_failures: int = 0 + + +class BaseTeradataConfig(TwoTierSQLAlchemyConfig): + scheme = Field(default="teradatasql", description="database scheme") + + +class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig): + database_pattern = Field( + default=AllowDenyPattern(deny=["dbc"]), + description="Regex patterns for databases to filter in ingestion.", + ) + include_table_lineage = Field( + default=False, + description="Whether to include table lineage in the ingestion. " + "This requires to have the table lineage feature enabled.", + ) + + usage: BaseUsageConfig = Field( + description="The usage config to use when generating usage statistics", + default=BaseUsageConfig(), + ) + + default_db: Optional[str] = Field( + default=None, + description="The default database to use for unqualified table names", + ) + + include_usage_statistics: bool = Field( + default=False, + description="Generate usage statistic.", + ) + + +@platform_name("Teradata") +@config_class(TeradataConfig) +@support_status(SupportStatus.TESTING) +@capability(SourceCapability.DOMAINS, "Enabled by default") +@capability(SourceCapability.CONTAINERS, "Enabled by default") +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.DELETION_DETECTION, "Optionally enabled via configuration") +@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") +@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") +@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration") +@capability(SourceCapability.USAGE_STATS, "Optionally enabled via configuration") +class TeradataSource(TwoTierSQLAlchemySource): + """ + This plugin extracts the following: + + - Metadata for databases, schemas, views, and tables + - Column types associated with each table + - Table, row, and column statistics via optional SQL profiling + """ + + config: TeradataConfig + + LINEAGE_QUERY: str = """SELECT ProcID, UserName as "user", StartTime AT TIME ZONE 'GMT' as "timestamp", DefaultDatabase as default_database, QueryText as query + FROM "DBC".DBQLogTbl + where ErrorCode = 0 + and QueryText like 'create table demo_user.test_lineage%' + and "timestamp" >= TIMESTAMP '{start_time}' + and "timestamp" < TIMESTAMP '{end_time}' + """ + + def __init__(self, config: TeradataConfig, ctx: PipelineContext): + super().__init__(config, ctx, "teradata") + + self.report: TeradataReport = TeradataReport() + self.graph: Optional[DataHubGraph] = ctx.graph + + self.builder: SqlParsingBuilder = SqlParsingBuilder( + usage_config=self.config.usage + if self.config.include_usage_statistics + else None, + generate_lineage=True, + generate_usage_statistics=self.config.include_usage_statistics, + generate_operations=self.config.usage.include_operational_stats, + ) + + self.schema_resolver = SchemaResolver( + platform=self.platform, + platform_instance=self.config.platform_instance, + graph=None, + env=self.config.env, + ) + + @classmethod + def create(cls, config_dict, ctx): + config = TeradataConfig.parse_obj(config_dict) + return cls(config, ctx) + + def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]: + engine = self.get_metadata_engine() + for entry in engine.execute( + self.LINEAGE_QUERY.format( + start_time=self.config.start_time, end_time=self.config.end_time + ) + ): + self.report.num_queries_parsed += 1 + if self.report.num_queries_parsed % 1000 == 0: + logger.info(f"Parsed {self.report.num_queries_parsed} queries") + + yield from self.gen_lineage_from_query( + query=entry.query, + default_database=entry.default_database, + timestamp=entry.timestamp, + user=entry.user, + is_view_ddl=False, + ) + + def gen_lineage_from_query( + self, + query: str, + default_database: Optional[str] = None, + timestamp: Optional[datetime] = None, + user: Optional[str] = None, + is_view_ddl: bool = False, + ) -> Iterable[MetadataWorkUnit]: + result = sqlglot_lineage( + sql=query, + schema_resolver=self.schema_resolver, + default_db=None, + default_schema=default_database + if default_database + else self.config.default_db, + ) + if result.debug_info.table_error: + logger.debug( + f"Error parsing table lineage, {result.debug_info.table_error}" + ) + self.report.num_table_parse_failures += 1 + else: + yield from self.builder.process_sql_parsing_result( + result, + query=query, + is_view_ddl=is_view_ddl, + query_timestamp=timestamp, + user=f"urn:li:corpuser:{user}", + include_urns=self.schema_resolver.get_urns(), + ) + + def get_metadata_engine(self) -> Engine: + url = self.config.get_sql_alchemy_url() + logger.debug(f"sql_alchemy_url={url}") + return create_engine(url, **self.config.options) + + def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: + # Add all schemas to the schema resolver + yield from super().get_workunits_internal() + + if self.config.include_table_lineage or self.config.include_usage_statistics: + self.report.report_ingestion_stage_start("audit log extraction") + yield from self.get_audit_log_mcps() + + yield from self.builder.gen_workunits() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index d9062cef06eae..efb1d3ffe119f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -1,8 +1,10 @@ import typing -from typing import Any, Dict, Iterable, Optional +import urllib.parse +from typing import Any, Dict, Iterable, Optional, Tuple from pydantic.fields import Field from sqlalchemy import create_engine, inspect +from sqlalchemy.engine import URL from sqlalchemy.engine.reflection import Inspector from datahub.configuration.common import AllowDenyPattern @@ -41,14 +43,27 @@ def get_sql_alchemy_url( uri_opts: typing.Optional[typing.Dict[str, typing.Any]] = None, current_db: typing.Optional[str] = None, ) -> str: - return self.sqlalchemy_uri or make_sqlalchemy_uri( - self.scheme, - self.username, - self.password.get_secret_value() if self.password else None, - self.host_port, - current_db if current_db else self.database, - uri_opts=uri_opts, - ) + if self.sqlalchemy_uri: + parsed_url = urllib.parse.urlsplit(self.sqlalchemy_uri) + url = URL.create( + drivername=parsed_url.scheme, + username=parsed_url.username, + password=parsed_url.password, + host=parsed_url.hostname, + port=parsed_url.port, + database=current_db or parsed_url.path.lstrip("/"), + query=urllib.parse.parse_qs(parsed_url.query), + ).update_query_dict(uri_opts or {}) + return str(url) + else: + return make_sqlalchemy_uri( + self.scheme, + self.username, + self.password.get_secret_value() if self.password else None, + self.host_port, + current_db or self.database, + uri_opts=uri_opts, + ) class TwoTierSQLAlchemySource(SQLAlchemySource): @@ -56,6 +71,10 @@ def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) self.config: TwoTierSQLAlchemyConfig = config + def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: + schema, _view = dataset_identifier.split(".", 1) + return None, schema + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index a417cae2b1ab0..b89db755853bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -86,7 +86,7 @@ class VerticaConfig(BasicSQLAlchemyConfig): default=True, description="Whether Models should be ingested." ) - include_view_lineage: Optional[bool] = pydantic.Field( + include_view_lineage: bool = pydantic.Field( default=True, description="If the source supports it, include view lineage to the underlying storage location.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py index bce4d1ec76e6e..fcf97e461967c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py @@ -20,11 +20,17 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import ( + MetadataWorkUnitProcessor, + Source, + SourceCapability, + SourceReport, +) from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.graph.client import DataHubGraph @@ -83,6 +89,8 @@ def compute_stats(self) -> None: @platform_name("SQL Queries") @config_class(SqlQueriesSourceConfig) @support_status(SupportStatus.TESTING) +@capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries") +@capability(SourceCapability.LINEAGE_FINE, "Parsed from SQL queries") class SqlQueriesSource(Source): # TODO: Documentation urns: Optional[Set[str]] diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index be97e9380f1f5..7fb2cf9813cab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -11,7 +11,6 @@ ConfigModel, ConfigurationError, DynamicTypedConfig, - LineageConfig, ) from datahub.configuration.time_window_config import BaseTimeWindowConfig from datahub.configuration.validate_field_rename import pydantic_renamed_field @@ -100,7 +99,7 @@ class StatefulIngestionConfigBase(GenericModel, Generic[CustomConfig]): ) -class StatefulLineageConfigMixin(LineageConfig): +class StatefulLineageConfigMixin: enable_stateful_lineage_ingestion: bool = Field( default=True, description="Enable stateful lineage ingestion." diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 14bc4242d2a91..e491a1e8b82fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -142,6 +142,7 @@ def get_filter_name(filter_obj): @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" ) +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class SupersetSource(StatefulIngestionSourceBase): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index e347cd26d245a..4bc40b0aac964 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -77,6 +77,7 @@ FIELD_TYPE_MAPPING, MetadataQueryException, TableauLineageOverrides, + TableauUpstreamReference, clean_query, custom_sql_graphql_query, dashboard_graphql_query, @@ -85,7 +86,6 @@ get_overridden_info, get_unique_custom_sql, make_fine_grained_lineage_class, - make_table_urn, make_upstream_class, published_datasource_graphql_query, query_metadata, @@ -271,7 +271,7 @@ class TableauConfig( "You can change this if your Tableau projects contain slashes in their names, and you'd like to filter by project.", ) - default_schema_map: dict = Field( + default_schema_map: Dict[str, str] = Field( default={}, description="Default schema to use when schema is not found." ) ingest_tags: Optional[bool] = Field( @@ -452,6 +452,10 @@ class TableauSourceReport(StaleEntityRemovalSourceReport): @capability(SourceCapability.OWNERSHIP, "Requires recipe configuration") @capability(SourceCapability.TAGS, "Requires recipe configuration") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, configure using `extract_column_level_lineage`", +) class TableauSource(StatefulIngestionSourceBase): platform = "tableau" @@ -533,7 +537,7 @@ def fetch_projects(): path=[], ) # Set parent project name - for project_id, project in all_project_map.items(): + for _project_id, project in all_project_map.items(): if ( project.parent_id is not None and project.parent_id in all_project_map @@ -997,41 +1001,16 @@ def get_upstream_tables( ) continue - schema = table.get(tableau_constant.SCHEMA) or "" - table_name = table.get(tableau_constant.NAME) or "" - full_name = table.get(tableau_constant.FULL_NAME) or "" - upstream_db = ( - table[tableau_constant.DATABASE][tableau_constant.NAME] - if table.get(tableau_constant.DATABASE) - and table[tableau_constant.DATABASE].get(tableau_constant.NAME) - else "" - ) - logger.debug( - "Processing Table with Connection Type: {0} and id {1}".format( - table.get(tableau_constant.CONNECTION_TYPE) or "", - table.get(tableau_constant.ID) or "", - ) - ) - schema = self._get_schema(schema, upstream_db, full_name) - # if the schema is included within the table name we omit it - if ( - schema - and table_name - and full_name - and table_name == full_name - and schema in table_name - ): - logger.debug( - f"Omitting schema for upstream table {table[tableau_constant.ID]}, schema included in table name" + try: + ref = TableauUpstreamReference.create( + table, default_schema_map=self.config.default_schema_map ) - schema = "" + except Exception as e: + logger.info(f"Failed to generate upstream reference for {table}: {e}") + continue - table_urn = make_table_urn( + table_urn = ref.make_dataset_urn( self.config.env, - upstream_db, - table.get(tableau_constant.CONNECTION_TYPE) or "", - schema, - table_name, self.config.platform_instance_map, self.config.lineage_overrides, ) @@ -1052,7 +1031,7 @@ def get_upstream_tables( urn=table_urn, id=table[tableau_constant.ID], num_cols=num_tbl_cols, - paths=set([table_path]) if table_path else set(), + paths={table_path} if table_path else set(), ) else: self.database_tables[table_urn].update_table( @@ -2462,35 +2441,6 @@ def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]: is_embedded_ds=True, ) - @lru_cache(maxsize=None) - def _get_schema(self, schema_provided: str, database: str, fullName: str) -> str: - # For some databases, the schema attribute in tableau api does not return - # correct schema name for the table. For more information, see - # https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_model.html#schema_attribute. - # Hence we extract schema from fullName whenever fullName is available - schema = self._extract_schema_from_fullName(fullName) if fullName else "" - if not schema: - schema = schema_provided - elif schema != schema_provided: - logger.debug( - "Correcting schema, provided {0}, corrected {1}".format( - schema_provided, schema - ) - ) - - if not schema and database in self.config.default_schema_map: - schema = self.config.default_schema_map[database] - - return schema - - @lru_cache(maxsize=None) - def _extract_schema_from_fullName(self, fullName: str) -> str: - # fullName is observed to be in format [schemaName].[tableName] - # OR simply tableName OR [tableName] - if fullName.startswith("[") and "].[" in fullName: - return fullName[1 : fullName.index("]")] - return "" - @lru_cache(maxsize=None) def get_last_modified( self, creator: Optional[str], created_at: bytes, updated_at: bytes diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 2c92285fdba77..7c4852042ce7c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -1,4 +1,6 @@ import html +import logging +from dataclasses import dataclass from functools import lru_cache from typing import Dict, List, Optional, Tuple @@ -6,6 +8,7 @@ import datahub.emitter.mce_builder as builder from datahub.configuration.common import ConfigModel +from datahub.ingestion.source import tableau_constant as tc from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageType, FineGrainedLineage, @@ -31,6 +34,8 @@ ) from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult +logger = logging.getLogger(__name__) + class TableauLineageOverrides(ConfigModel): platform_override_map: Optional[Dict[str, str]] = Field( @@ -537,12 +542,12 @@ def get_fully_qualified_table_name( platform: str, upstream_db: str, schema: str, - full_name: str, + table_name: str, ) -> str: if platform == "athena": upstream_db = "" database_name = f"{upstream_db}." if upstream_db else "" - final_name = full_name.replace("[", "").replace("]", "") + final_name = table_name.replace("[", "").replace("]", "") schema_name = f"{schema}." if schema else "" @@ -573,17 +578,123 @@ def get_fully_qualified_table_name( return fully_qualified_table_name -def get_platform_instance( - platform: str, platform_instance_map: Optional[Dict[str, str]] -) -> Optional[str]: - if platform_instance_map is not None and platform in platform_instance_map.keys(): - return platform_instance_map[platform] +@dataclass +class TableauUpstreamReference: + database: Optional[str] + schema: Optional[str] + table: str + + connection_type: str + + @classmethod + def create( + cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None + ) -> "TableauUpstreamReference": + # Values directly from `table` object from Tableau + database = t_database = d.get(tc.DATABASE, {}).get(tc.NAME) + schema = t_schema = d.get(tc.SCHEMA) + table = t_table = d.get(tc.NAME) or "" + t_full_name = d.get(tc.FULL_NAME) + t_connection_type = d[tc.CONNECTION_TYPE] # required to generate urn + t_id = d[tc.ID] + + parsed_full_name = cls.parse_full_name(t_full_name) + if parsed_full_name and len(parsed_full_name) == 3: + database, schema, table = parsed_full_name + elif parsed_full_name and len(parsed_full_name) == 2: + schema, table = parsed_full_name + else: + logger.debug( + f"Upstream urn generation ({t_id}):" + f" Did not parse full name {t_full_name}: unexpected number of values", + ) + + if not schema and default_schema_map and database in default_schema_map: + schema = default_schema_map[database] + + if database != t_database: + logger.debug( + f"Upstream urn generation ({t_id}):" + f" replacing database {t_database} with {database} from full name {t_full_name}" + ) + if schema != t_schema: + logger.debug( + f"Upstream urn generation ({t_id}):" + f" replacing schema {t_schema} with {schema} from full name {t_full_name}" + ) + if table != t_table: + logger.debug( + f"Upstream urn generation ({t_id}):" + f" replacing table {t_table} with {table} from full name {t_full_name}" + ) + + # TODO: See if we can remove this -- made for redshift + if ( + schema + and t_table + and t_full_name + and t_table == t_full_name + and schema in t_table + ): + logger.debug( + f"Omitting schema for upstream table {t_id}, schema included in table name" + ) + schema = "" + + return cls( + database=database, + schema=schema, + table=table, + connection_type=t_connection_type, + ) + + @staticmethod + def parse_full_name(full_name: Optional[str]) -> Optional[List[str]]: + # fullName is observed to be in formats: + # [database].[schema].[table] + # [schema].[table] + # [table] + # table + # schema + + # TODO: Validate the startswith check. Currently required for our integration tests + if full_name is None or not full_name.startswith("["): + return None + + return full_name.replace("[", "").replace("]", "").split(".") + + def make_dataset_urn( + self, + env: str, + platform_instance_map: Optional[Dict[str, str]], + lineage_overrides: Optional[TableauLineageOverrides] = None, + ) -> str: + ( + upstream_db, + platform_instance, + platform, + original_platform, + ) = get_overridden_info( + connection_type=self.connection_type, + upstream_db=self.database, + lineage_overrides=lineage_overrides, + platform_instance_map=platform_instance_map, + ) + + table_name = get_fully_qualified_table_name( + original_platform, + upstream_db or "", + self.schema, + self.table, + ) - return None + return builder.make_dataset_urn_with_platform_instance( + platform, table_name, platform_instance, env + ) def get_overridden_info( - connection_type: str, + connection_type: Optional[str], upstream_db: Optional[str], platform_instance_map: Optional[Dict[str, str]], lineage_overrides: Optional[TableauLineageOverrides] = None, @@ -605,7 +716,9 @@ def get_overridden_info( ): upstream_db = lineage_overrides.database_override_map[upstream_db] - platform_instance = get_platform_instance(original_platform, platform_instance_map) + platform_instance = ( + platform_instance_map.get(original_platform) if platform_instance_map else None + ) if original_platform in ("athena", "hive", "mysql"): # Two tier databases upstream_db = None @@ -613,35 +726,6 @@ def get_overridden_info( return upstream_db, platform_instance, platform, original_platform -def make_table_urn( - env: str, - upstream_db: Optional[str], - connection_type: str, - schema: str, - full_name: str, - platform_instance_map: Optional[Dict[str, str]], - lineage_overrides: Optional[TableauLineageOverrides] = None, -) -> str: - - upstream_db, platform_instance, platform, original_platform = get_overridden_info( - connection_type=connection_type, - upstream_db=upstream_db, - lineage_overrides=lineage_overrides, - platform_instance_map=platform_instance_map, - ) - - table_name = get_fully_qualified_table_name( - original_platform, - upstream_db if upstream_db is not None else "", - schema, - full_name, - ) - - return builder.make_dataset_urn_with_platform_instance( - platform, table_name, platform_instance, env - ) - - def make_description_from_params(description, formula): """ Generate column description diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py index 51390873712d3..16820c37d546e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py @@ -7,7 +7,10 @@ from pydantic import Field from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.source_common import ( + DatasetSourceConfigMixin, + LowerCaseDatasetUrnConfigMixin, +) from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.ingestion.source.state.stale_entity_removal_handler import ( @@ -91,6 +94,7 @@ class UnityCatalogSourceConfig( BaseUsageConfig, DatasetSourceConfigMixin, StatefulProfilingConfigMixin, + LowerCaseDatasetUrnConfigMixin, ): token: str = pydantic.Field(description="Databricks personal access token") workspace_url: str = pydantic.Field( @@ -162,6 +166,14 @@ class UnityCatalogSourceConfig( description="Option to enable/disable lineage generation.", ) + include_external_lineage: bool = pydantic.Field( + default=True, + description=( + "Option to enable/disable lineage generation for external tables." + " Only external S3 tables are supported at the moment." + ), + ) + include_notebooks: bool = pydantic.Field( default=False, description="Ingest notebooks, represented as DataHub datasets.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py index 9bcdb200f180e..3fb77ce512ed2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py @@ -33,6 +33,7 @@ ALLOWED_STATEMENT_TYPES, Catalog, Column, + ExternalTableReference, Metastore, Notebook, Query, @@ -248,6 +249,13 @@ def table_lineage(self, table: Table, include_entity_lineage: bool) -> None: ) if table_ref: table.upstreams[table_ref] = {} + elif "fileInfo" in item: + external_ref = ExternalTableReference.create_from_lineage( + item["fileInfo"] + ) + if external_ref: + table.external_upstreams.add(external_ref) + for notebook in item.get("notebookInfos") or []: table.upstream_notebooks.add(notebook["notebook_id"]) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py index 18ac2475b51e0..315c1c0d20186 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py @@ -10,6 +10,7 @@ CatalogType, ColumnTypeName, DataSourceFormat, + SecurableType, TableType, ) from databricks.sdk.service.sql import QueryStatementType @@ -176,6 +177,35 @@ def external_path(self) -> str: return f"{self.catalog}/{self.schema}/{self.table}" +@dataclass(frozen=True, order=True) +class ExternalTableReference: + path: str + has_permission: bool + name: Optional[str] + type: Optional[SecurableType] + storage_location: Optional[str] + + @classmethod + def create_from_lineage(cls, d: dict) -> Optional["ExternalTableReference"]: + try: + securable_type: Optional[SecurableType] + try: + securable_type = SecurableType(d.get("securable_type", "").lower()) + except ValueError: + securable_type = None + + return cls( + path=d["path"], + has_permission=d.get("has_permission") or True, + name=d.get("securable_name"), + type=securable_type, + storage_location=d.get("storage_location"), + ) + except Exception as e: + logger.warning(f"Failed to create ExternalTableReference from {d}: {e}") + return None + + @dataclass class Table(CommonProperty): schema: Schema @@ -193,6 +223,7 @@ class Table(CommonProperty): view_definition: Optional[str] properties: Dict[str, str] upstreams: Dict[TableReference, Dict[str, List[str]]] = field(default_factory=dict) + external_upstreams: Set[ExternalTableReference] = field(default_factory=set) upstream_notebooks: Set[NotebookId] = field(default_factory=set) downstream_notebooks: Set[NotebookId] = field(default_factory=set) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py index fa61571fa92cb..4153d9dd88eb8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py @@ -19,6 +19,8 @@ class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport): notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook") num_column_lineage_skipped_column_count: int = 0 + num_external_upstreams_lacking_permissions: int = 0 + num_external_upstreams_unsupported: int = 0 num_queries: int = 0 num_queries_dropped_parse_failure: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 27c1f341aa84d..b63cf65d55dc8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -41,6 +41,7 @@ TestConnectionReport, ) from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage from datahub.ingestion.source.common.subtypes import ( DatasetContainerSubTypes, DatasetSubTypes, @@ -455,6 +456,28 @@ def _generate_lineage_aspect( ) ) + if self.config.include_external_lineage: + for external_ref in table.external_upstreams: + if not external_ref.has_permission or not external_ref.path: + self.report.num_external_upstreams_lacking_permissions += 1 + logger.warning( + f"Lacking permissions for external file upstream on {table.ref}" + ) + elif external_ref.path.startswith("s3://"): + upstreams.append( + UpstreamClass( + dataset=make_s3_urn_for_lineage( + external_ref.path, self.config.env + ), + type=DatasetLineageTypeClass.COPY, + ) + ) + else: + self.report.num_external_upstreams_unsupported += 1 + logger.warning( + f"Unsupported external file upstream on {table.ref}: {external_ref.path}" + ) + if upstreams: return UpstreamLineageClass( upstreams=upstreams, diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py index 8ca1296d819c1..0a73bb5203e72 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py @@ -4,7 +4,13 @@ from datahub.configuration.common import ConfigModel, ConfigurationError -_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = "((.+)[_$])?(\\d{8})$" +# Regexp for sharded tables. +# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date. +# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid +# then it is not a sharded table. +_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = ( + "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$" +) class BigQueryBaseConfig(ConfigModel): diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index 0d72fc52da0ca..c3e8c175f1de5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -166,13 +166,17 @@ def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None: "but should be set when using use_certificate false for oauth_config" ) - @pydantic.validator("include_view_lineage") - def validate_include_view_lineage(cls, v, values): - if not values.get("include_table_lineage") and v: + @pydantic.root_validator() + def validate_include_view_lineage(cls, values): + if ( + "include_table_lineage" in values + and not values.get("include_table_lineage") + and values.get("include_view_lineage") + ): raise ValueError( "include_table_lineage must be True for include_view_lineage to be set." ) - return v + return values def get_sql_alchemy_url( self, diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py index 8516a7054a9cd..2b610947e9043 100644 --- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py +++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py @@ -24,6 +24,7 @@ def assert_sql_result_with_resolver( *, expected_file: pathlib.Path, schema_resolver: SchemaResolver, + allow_table_error: bool = False, **kwargs: Any, ) -> None: # HACK: Our BigQuery source overwrites this value and doesn't undo it. @@ -36,6 +37,14 @@ def assert_sql_result_with_resolver( **kwargs, ) + if res.debug_info.table_error: + if allow_table_error: + logger.info( + f"SQL parser table error: {res.debug_info.table_error}", + exc_info=res.debug_info.table_error, + ) + else: + raise res.debug_info.table_error if res.debug_info.column_error: logger.warning( f"SQL parser column error: {res.debug_info.column_error}", @@ -70,11 +79,14 @@ def assert_sql_result( sql: str, *, dialect: str, + platform_instance: Optional[str] = None, expected_file: pathlib.Path, schemas: Optional[Dict[str, SchemaInfo]] = None, **kwargs: Any, ) -> None: - schema_resolver = SchemaResolver(platform=dialect) + schema_resolver = SchemaResolver( + platform=dialect, platform_instance=platform_instance + ) if schemas: for urn, schema in schemas.items(): schema_resolver.add_raw_schema_info(urn, schema) diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index c04d2138bc116..18493edded4b7 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -3,6 +3,7 @@ import logging import pathlib import pickle +import shutil import sqlite3 import tempfile from dataclasses import dataclass, field @@ -56,15 +57,15 @@ class ConnectionWrapper: conn: sqlite3.Connection filename: pathlib.Path - _temp_directory: Optional[tempfile.TemporaryDirectory] + _temp_directory: Optional[str] def __init__(self, filename: Optional[pathlib.Path] = None): self._temp_directory = None # Warning: If filename is provided, the file will not be automatically cleaned up. if not filename: - self._temp_directory = tempfile.TemporaryDirectory() - filename = pathlib.Path(self._temp_directory.name) / _DEFAULT_FILE_NAME + self._temp_directory = tempfile.mkdtemp() + filename = pathlib.Path(self._temp_directory) / _DEFAULT_FILE_NAME self.conn = sqlite3.connect(filename, isolation_level=None) self.conn.row_factory = sqlite3.Row @@ -101,7 +102,8 @@ def executemany( def close(self) -> None: self.conn.close() if self._temp_directory: - self._temp_directory.cleanup() + shutil.rmtree(self._temp_directory) + self._temp_directory = None def __enter__(self) -> "ConnectionWrapper": return self diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index eb2d975ee607f..f91c01d901ac1 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -4,7 +4,7 @@ import re import time from functools import reduce -from typing import Any, Dict, List, Match, Optional, Union, cast +from typing import Any, Dict, List, Mapping, Match, Optional, Union, cast from datahub.emitter import mce_builder from datahub.emitter.mce_builder import OwnerType @@ -111,7 +111,7 @@ def __init__( self.owner_source_type = owner_source_type self.match_nested_props = match_nested_props - def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]: + def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: # Defining the following local variables - # operations_map - the final resulting map when operations are processed. # Against each operation the values to be applied are stored. diff --git a/metadata-ingestion/src/datahub/utilities/ratelimiter.py b/metadata-ingestion/src/datahub/utilities/ratelimiter.py new file mode 100644 index 0000000000000..3d47d25e14c49 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/ratelimiter.py @@ -0,0 +1,56 @@ +import collections +import threading +import time +from contextlib import AbstractContextManager +from typing import Any, Deque + + +# Modified version of https://github.com/RazerM/ratelimiter/blob/master/ratelimiter/_sync.py +class RateLimiter(AbstractContextManager): + + """Provides rate limiting for an operation with a configurable number of + requests for a time period. + """ + + def __init__(self, max_calls: int, period: float = 1.0) -> None: + """Initialize a RateLimiter object which enforces as much as max_calls + operations on period (eventually floating) number of seconds. + """ + if period <= 0: + raise ValueError("Rate limiting period should be > 0") + if max_calls <= 0: + raise ValueError("Rate limiting number of calls should be > 0") + + # We're using a deque to store the last execution timestamps, not for + # its maxlen attribute, but to allow constant time front removal. + self.calls: Deque = collections.deque() + + self.period = period + self.max_calls = max_calls + self._lock = threading.Lock() + + def __enter__(self) -> "RateLimiter": + with self._lock: + # We want to ensure that no more than max_calls were run in the allowed + # period. For this, we store the last timestamps of each call and run + # the rate verification upon each __enter__ call. + if len(self.calls) >= self.max_calls: + until = time.time() + self.period - self._timespan + sleeptime = until - time.time() + if sleeptime > 0: + time.sleep(sleeptime) + return self + + def __exit__(self, exc_type: Any, exc: Any, traceback: Any) -> None: + with self._lock: + # Store the last operation timestamp. + self.calls.append(time.time()) + + # Pop the timestamp list front (ie: the older calls) until the sum goes + # back below the period. This is our 'sliding period' window. + while self._timespan >= self.period: + self.calls.popleft() + + @property + def _timespan(self) -> float: + return self.calls[-1] - self.calls[0] diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py new file mode 100644 index 0000000000000..5d2fc6872c7bd --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py @@ -0,0 +1,211 @@ +import json +import logging +import uuid +from typing import Any, Dict, List, Optional, Type, Union + +from sqlalchemy import types + +from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField +from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass + +logger = logging.getLogger(__name__) + +try: + # This is used for both BigQuery and Athena. + from sqlalchemy_bigquery import STRUCT +except ImportError: + STRUCT = None + + +class MapType(types.TupleType): + # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub + pass + + +class SqlAlchemyColumnToAvroConverter: + """Helper class that collects some methods to convert SQLalchemy columns to Avro schema.""" + + # tuple of complex data types that require a special handling + _COMPLEX_TYPES = (STRUCT, types.ARRAY, MapType) + + # mapping of primitive SQLalchemy data types to AVRO schema data types + PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE: Dict[Type[types.TypeEngine], str] = { + types.String: "string", + types.BINARY: "string", + types.BOOLEAN: "boolean", + types.FLOAT: "float", + types.INTEGER: "int", + types.BIGINT: "long", + types.VARCHAR: "string", + types.CHAR: "string", + } + + @classmethod + def get_avro_type( + cls, column_type: Union[types.TypeEngine, STRUCT, MapType], nullable: bool + ) -> Dict[str, Any]: + """Determines the concrete AVRO schema type for a SQLalchemy-typed column""" + + if isinstance( + column_type, tuple(cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys()) + ): + return { + "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)], + "native_data_type": str(column_type), + "_nullable": nullable, + } + if isinstance(column_type, types.DECIMAL): + return { + "type": "bytes", + "logicalType": "decimal", + "precision": int(column_type.precision), + "scale": int(column_type.scale), + "native_data_type": str(column_type), + "_nullable": nullable, + } + if isinstance(column_type, types.DATE): + return { + "type": "int", + "logicalType": "date", + "native_data_type": str(column_type), + "_nullable": nullable, + } + if isinstance(column_type, types.TIMESTAMP): + return { + "type": "long", + "logicalType": "timestamp-millis", + "native_data_type": str(column_type), + "_nullable": nullable, + } + if isinstance(column_type, types.ARRAY): + array_type = column_type.item_type + return { + "type": "array", + "items": cls.get_avro_type(column_type=array_type, nullable=nullable), + "native_data_type": f"array<{str(column_type.item_type)}>", + } + if isinstance(column_type, MapType): + key_type = column_type.types[0] + value_type = column_type.types[1] + return { + "type": "map", + "values": cls.get_avro_type(column_type=value_type, nullable=nullable), + "native_data_type": str(column_type), + "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable), + "key_native_data_type": str(key_type), + } + if STRUCT and isinstance(column_type, STRUCT): + fields = [] + for field_def in column_type._STRUCT_fields: + field_name, field_type = field_def + fields.append( + { + "name": field_name, + "type": cls.get_avro_type( + column_type=field_type, nullable=nullable + ), + } + ) + struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}" + + return { + "type": "record", + "name": struct_name, + "fields": fields, + "native_data_type": str(column_type), + "_nullable": nullable, + } + + return { + "type": "null", + "native_data_type": str(column_type), + "_nullable": nullable, + } + + @classmethod + def get_avro_for_sqlalchemy_column( + cls, + column_name: str, + column_type: types.TypeEngine, + nullable: bool, + ) -> Union[object, Dict[str, object]]: + """Returns the AVRO schema representation of a SQLalchemy column.""" + if isinstance(column_type, cls._COMPLEX_TYPES): + return { + "type": "record", + "name": "__struct_", + "fields": [ + { + "name": column_name, + "type": cls.get_avro_type( + column_type=column_type, nullable=nullable + ), + } + ], + } + return cls.get_avro_type(column_type=column_type, nullable=nullable) + + +def get_schema_fields_for_sqlalchemy_column( + column_name: str, + column_type: types.TypeEngine, + description: Optional[str] = None, + nullable: Optional[bool] = True, + is_part_of_key: Optional[bool] = False, +) -> List[SchemaField]: + """Creates SchemaFields from a given SQLalchemy column. + + This function is analogous to `get_schema_fields_for_hive_column` from datahub.utilities.hive_schema_to_avro. + The main purpose of implementing it this way, is to make it ready/compatible for second field path generation, + which allows to explore nested structures within the UI. + """ + + if nullable is None: + nullable = True + + try: + # as a first step, the column is converted to AVRO JSON which can then be used by an existing function + avro_schema_json = ( + SqlAlchemyColumnToAvroConverter.get_avro_for_sqlalchemy_column( + column_name=column_name, + column_type=column_type, + nullable=nullable, + ) + ) + # retrieve schema field definitions from the above generated AVRO JSON structure + schema_fields = avro_schema_to_mce_fields( + avro_schema=json.dumps(avro_schema_json), + default_nullable=nullable, + swallow_exceptions=False, + ) + except Exception as e: + logger.warning( + f"Unable to parse column {column_name} and type {column_type} the error was: {e}" + ) + + # fallback description in case any exception occurred + schema_fields = [ + SchemaField( + fieldPath=column_name, + type=SchemaFieldDataTypeClass(type=NullTypeClass()), + nativeDataType=str(column_type), + ) + ] + + # for all non-nested data types an additional modification of the `fieldPath` property is required + if type(column_type) in ( + *SqlAlchemyColumnToAvroConverter.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys(), + types.TIMESTAMP, + types.DATE, + types.DECIMAL, + ): + schema_fields[0].fieldPath += f".{column_name}" + + if description: + schema_fields[0].description = description + schema_fields[0].isPartOfKey = ( + is_part_of_key if is_part_of_key is not None else False + ) + + return schema_fields diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 81c43884fdf7d..1d74b20569814 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -5,12 +5,13 @@ import logging import pathlib from collections import defaultdict -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union import pydantic.dataclasses import sqlglot import sqlglot.errors import sqlglot.lineage +import sqlglot.optimizer.annotate_types import sqlglot.optimizer.qualify import sqlglot.optimizer.qualify_columns from pydantic import BaseModel @@ -23,7 +24,17 @@ from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier -from datahub.metadata.schema_classes import OperationTypeClass, SchemaMetadataClass +from datahub.metadata.schema_classes import ( + ArrayTypeClass, + BooleanTypeClass, + DateTypeClass, + NumberTypeClass, + OperationTypeClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + StringTypeClass, + TimeTypeClass, +) from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict from datahub.utilities.urns.dataset_urn import DatasetUrn @@ -90,8 +101,18 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType: return QueryType.UNKNOWN +class _ParserBaseModel( + BaseModel, + arbitrary_types_allowed=True, + json_encoders={ + SchemaFieldDataTypeClass: lambda v: v.to_obj(), + }, +): + pass + + @functools.total_ordering -class _FrozenModel(BaseModel, frozen=True): +class _FrozenModel(_ParserBaseModel, frozen=True): def __lt__(self, other: "_FrozenModel") -> bool: for field in self.__fields__: self_v = getattr(self, field) @@ -146,29 +167,42 @@ class _ColumnRef(_FrozenModel): column: str -class ColumnRef(BaseModel): +class ColumnRef(_ParserBaseModel): table: Urn column: str -class _DownstreamColumnRef(BaseModel): +class _DownstreamColumnRef(_ParserBaseModel): table: Optional[_TableName] column: str + column_type: Optional[sqlglot.exp.DataType] -class DownstreamColumnRef(BaseModel): +class DownstreamColumnRef(_ParserBaseModel): table: Optional[Urn] column: str + column_type: Optional[SchemaFieldDataTypeClass] + native_column_type: Optional[str] + + @pydantic.validator("column_type", pre=True) + def _load_column_type( + cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]] + ) -> Optional[SchemaFieldDataTypeClass]: + if v is None: + return None + if isinstance(v, SchemaFieldDataTypeClass): + return v + return SchemaFieldDataTypeClass.from_obj(v) -class _ColumnLineageInfo(BaseModel): +class _ColumnLineageInfo(_ParserBaseModel): downstream: _DownstreamColumnRef upstreams: List[_ColumnRef] logic: Optional[str] -class ColumnLineageInfo(BaseModel): +class ColumnLineageInfo(_ParserBaseModel): downstream: DownstreamColumnRef upstreams: List[ColumnRef] @@ -176,7 +210,7 @@ class ColumnLineageInfo(BaseModel): logic: Optional[str] = pydantic.Field(default=None, exclude=True) -class SqlParsingDebugInfo(BaseModel, arbitrary_types_allowed=True): +class SqlParsingDebugInfo(_ParserBaseModel): confidence: float = 0.0 tables_discovered: int = 0 @@ -190,7 +224,7 @@ def error(self) -> Optional[Exception]: return self.table_error or self.column_error -class SqlParsingResult(BaseModel): +class SqlParsingResult(_ParserBaseModel): query_type: QueryType = QueryType.UNKNOWN in_tables: List[Urn] @@ -207,9 +241,9 @@ class SqlParsingResult(BaseModel): ) -def _parse_statement(sql: str, dialect: str) -> sqlglot.Expression: - statement = sqlglot.parse_one( - sql, read=dialect, error_level=sqlglot.ErrorLevel.RAISE +def _parse_statement(sql: sqlglot.exp.ExpOrStr, dialect: str) -> sqlglot.Expression: + statement: sqlglot.Expression = sqlglot.maybe_parse( + sql, dialect=dialect, error_level=sqlglot.ErrorLevel.RAISE ) return statement @@ -433,14 +467,20 @@ def _column_level_lineage( # noqa: C901 default_db: Optional[str], default_schema: Optional[str], ) -> List[_ColumnLineageInfo]: - if not isinstance( - statement, - _SupportedColumnLineageTypesTuple, + is_create_ddl = _is_create_table_ddl(statement) + if ( + not isinstance( + statement, + _SupportedColumnLineageTypesTuple, + ) + and not is_create_ddl ): raise UnsupportedStatementTypeError( f"Can only generate column-level lineage for select-like inner statements, not {type(statement)}" ) + column_lineage: List[_ColumnLineageInfo] = [] + use_case_insensitive_cols = dialect in { # Column identifiers are case-insensitive in BigQuery, so we need to # do a normalization step beforehand to make sure it's resolved correctly. @@ -448,6 +488,11 @@ def _column_level_lineage( # noqa: C901 # Our snowflake source lowercases column identifiers, so we are forced # to do fuzzy (case-insensitive) resolution instead of exact resolution. "snowflake", + # Teradata column names are case-insensitive. + # A name, even when enclosed in double quotation marks, is not case sensitive. For example, CUSTOMER and Customer are the same. + # See more below: + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acreldb/n0ejgx4895bofnn14rlguktfx5r3.htm + "teradata", } sqlglot_db_schema = sqlglot.MappingSchema( @@ -541,7 +586,46 @@ def _schema_aware_fuzzy_column_resolve( ) from e logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect)) - column_lineage = [] + # Handle the create DDL case. + if is_create_ddl: + assert ( + output_table is not None + ), "output_table must be set for create DDL statements" + + create_schema: sqlglot.exp.Schema = statement.this + sqlglot_columns = create_schema.expressions + + for column_def in sqlglot_columns: + if not isinstance(column_def, sqlglot.exp.ColumnDef): + # Ignore things like constraints. + continue + + output_col = _schema_aware_fuzzy_column_resolve( + output_table, column_def.name + ) + output_col_type = column_def.args.get("kind") + + column_lineage.append( + _ColumnLineageInfo( + downstream=_DownstreamColumnRef( + table=output_table, + column=output_col, + column_type=output_col_type, + ), + upstreams=[], + ) + ) + + return column_lineage + + # Try to figure out the types of the output columns. + try: + statement = sqlglot.optimizer.annotate_types.annotate_types( + statement, schema=sqlglot_db_schema + ) + except (sqlglot.errors.OptimizeError, sqlglot.errors.ParseError) as e: + # This is not a fatal error, so we can continue. + logger.debug("sqlglot failed to annotate or parse types: %s", e) try: assert isinstance(statement, _SupportedColumnLineageTypesTuple) @@ -551,9 +635,7 @@ def _schema_aware_fuzzy_column_resolve( (select_col.alias_or_name, select_col) for select_col in statement.selects ] logger.debug("output columns: %s", [col[0] for col in output_columns]) - output_col: str for output_col, original_col_expression in output_columns: - # print(f"output column: {output_col}") if output_col == "*": # If schema information is available, the * will be expanded to the actual columns. # Otherwise, we can't process it. @@ -581,7 +663,7 @@ def _schema_aware_fuzzy_column_resolve( # Generate SELECT lineage. # Using a set here to deduplicate upstreams. - direct_col_upstreams: Set[_ColumnRef] = set() + direct_raw_col_upstreams: Set[_ColumnRef] = set() for node in lineage_node.walk(): if node.downstream: # We only want the leaf nodes. @@ -596,8 +678,9 @@ def _schema_aware_fuzzy_column_resolve( if node.subfield: normalized_col = f"{normalized_col}.{node.subfield}" - col = _schema_aware_fuzzy_column_resolve(table_ref, normalized_col) - direct_col_upstreams.add(_ColumnRef(table=table_ref, column=col)) + direct_raw_col_upstreams.add( + _ColumnRef(table=table_ref, column=normalized_col) + ) else: # This branch doesn't matter. For example, a count(*) column would go here, and # we don't get any column-level lineage for that. @@ -613,19 +696,35 @@ def _schema_aware_fuzzy_column_resolve( output_col = _schema_aware_fuzzy_column_resolve(output_table, output_col) - if not direct_col_upstreams: + # Guess the output column type. + output_col_type = None + if original_col_expression.type: + output_col_type = original_col_expression.type + + # Fuzzy resolve upstream columns. + direct_resolved_col_upstreams = { + _ColumnRef( + table=edge.table, + column=_schema_aware_fuzzy_column_resolve(edge.table, edge.column), + ) + for edge in direct_raw_col_upstreams + } + + if not direct_resolved_col_upstreams: logger.debug(f' "{output_col}" has no upstreams') column_lineage.append( _ColumnLineageInfo( downstream=_DownstreamColumnRef( - table=output_table, column=output_col + table=output_table, + column=output_col, + column_type=output_col_type, ), - upstreams=sorted(direct_col_upstreams), + upstreams=sorted(direct_resolved_col_upstreams), # logic=column_logic.sql(pretty=True, dialect=dialect), ) ) - # TODO: Also extract referenced columns (e.g. non-SELECT lineage) + # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage) except (sqlglot.errors.OptimizeError, ValueError) as e: raise SqlUnderstandingError( f"sqlglot failed to compute some lineage: {e}" @@ -646,6 +745,53 @@ def _extract_select_from_create( return statement +_UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT: Set[str] = set( + sqlglot.exp.Update.arg_types.keys() +) - set(sqlglot.exp.Select.arg_types.keys()) + + +def _extract_select_from_update( + statement: sqlglot.exp.Update, +) -> sqlglot.exp.Select: + statement = statement.copy() + + # The "SET" expressions need to be converted. + # For the update command, it'll be a list of EQ expressions, but the select + # should contain aliased columns. + new_expressions = [] + for expr in statement.expressions: + if isinstance(expr, sqlglot.exp.EQ) and isinstance( + expr.left, sqlglot.exp.Column + ): + new_expressions.append( + sqlglot.exp.Alias( + this=expr.right, + alias=expr.left.this, + ) + ) + else: + # If we don't know how to convert it, just leave it as-is. If this causes issues, + # they'll get caught later. + new_expressions.append(expr) + + return sqlglot.exp.Select( + **{ + **{ + k: v + for k, v in statement.args.items() + if k not in _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT + }, + "expressions": new_expressions, + } + ) + + +def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool: + return isinstance(statement, sqlglot.exp.Create) and isinstance( + statement.this, sqlglot.exp.Schema + ) + + def _try_extract_select( statement: sqlglot.exp.Expression, ) -> sqlglot.exp.Expression: @@ -662,6 +808,9 @@ def _try_extract_select( elif isinstance(statement, sqlglot.exp.Insert): # TODO Need to map column renames in the expressions part of the statement. statement = statement.expression + elif isinstance(statement, sqlglot.exp.Update): + # Assumption: the output table is already captured in the modified tables list. + statement = _extract_select_from_update(statement) elif isinstance(statement, sqlglot.exp.Create): # TODO May need to map column renames. # Assumption: the output table is already captured in the modified tables list. @@ -673,9 +822,46 @@ def _try_extract_select( return statement +def _translate_sqlglot_type( + sqlglot_type: sqlglot.exp.DataType.Type, +) -> Optional[SchemaFieldDataTypeClass]: + TypeClass: Any + if sqlglot_type in sqlglot.exp.DataType.TEXT_TYPES: + TypeClass = StringTypeClass + elif sqlglot_type in sqlglot.exp.DataType.NUMERIC_TYPES or sqlglot_type in { + sqlglot.exp.DataType.Type.DECIMAL, + }: + TypeClass = NumberTypeClass + elif sqlglot_type in { + sqlglot.exp.DataType.Type.BOOLEAN, + sqlglot.exp.DataType.Type.BIT, + }: + TypeClass = BooleanTypeClass + elif sqlglot_type in { + sqlglot.exp.DataType.Type.DATE, + }: + TypeClass = DateTypeClass + elif sqlglot_type in sqlglot.exp.DataType.TEMPORAL_TYPES: + TypeClass = TimeTypeClass + elif sqlglot_type in { + sqlglot.exp.DataType.Type.ARRAY, + }: + TypeClass = ArrayTypeClass + elif sqlglot_type in { + sqlglot.exp.DataType.Type.UNKNOWN, + }: + return None + else: + logger.debug("Unknown sqlglot type: %s", sqlglot_type) + return None + + return SchemaFieldDataTypeClass(type=TypeClass()) + + def _translate_internal_column_lineage( table_name_urn_mapping: Dict[_TableName, str], raw_column_lineage: _ColumnLineageInfo, + dialect: str, ) -> ColumnLineageInfo: downstream_urn = None if raw_column_lineage.downstream.table: @@ -684,6 +870,18 @@ def _translate_internal_column_lineage( downstream=DownstreamColumnRef( table=downstream_urn, column=raw_column_lineage.downstream.column, + column_type=_translate_sqlglot_type( + raw_column_lineage.downstream.column_type.this + ) + if raw_column_lineage.downstream.column_type + else None, + native_column_type=raw_column_lineage.downstream.column_type.sql( + dialect=dialect + ) + if raw_column_lineage.downstream.column_type + and raw_column_lineage.downstream.column_type.this + != sqlglot.exp.DataType.Type.UNKNOWN + else None, ), upstreams=[ ColumnRef( @@ -700,12 +898,14 @@ def _get_dialect(platform: str) -> str: # TODO: convert datahub platform names to sqlglot dialect if platform == "presto-on-hive": return "hive" + if platform == "mssql": + return "tsql" else: return platform def _sqlglot_lineage_inner( - sql: str, + sql: sqlglot.exp.ExpOrStr, schema_resolver: SchemaResolver, default_db: Optional[str] = None, default_schema: Optional[str] = None, @@ -786,19 +986,25 @@ def _sqlglot_lineage_inner( ) # Simplify the input statement for column-level lineage generation. - select_statement = _try_extract_select(statement) + try: + select_statement = _try_extract_select(statement) + except Exception as e: + logger.debug(f"Failed to extract select from statement: {e}", exc_info=True) + debug_info.column_error = e + select_statement = None # Generate column-level lineage. column_lineage: Optional[List[_ColumnLineageInfo]] = None try: - column_lineage = _column_level_lineage( - select_statement, - dialect=dialect, - input_tables=table_name_schema_mapping, - output_table=downstream_table, - default_db=default_db, - default_schema=default_schema, - ) + if select_statement is not None: + column_lineage = _column_level_lineage( + select_statement, + dialect=dialect, + input_tables=table_name_schema_mapping, + output_table=downstream_table, + default_db=default_db, + default_schema=default_schema, + ) except UnsupportedStatementTypeError as e: # Inject details about the outer statement type too. e.args = (f"{e.args[0]} (outer statement type: {type(statement)})",) @@ -818,7 +1024,7 @@ def _sqlglot_lineage_inner( if column_lineage: column_lineage_urns = [ _translate_internal_column_lineage( - table_name_urn_mapping, internal_col_lineage + table_name_urn_mapping, internal_col_lineage, dialect=dialect ) for internal_col_lineage in column_lineage ] @@ -950,3 +1156,20 @@ def create_lineage_sql_parsed_result( finally: if needs_close: schema_resolver.close() + + +def view_definition_lineage_helper( + result: SqlParsingResult, view_urn: str +) -> SqlParsingResult: + if result.query_type is QueryType.SELECT: + # Some platforms (e.g. postgres) store only ` . For such view definitions, `result.out_tables` and + # `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream + # details and downstream column details are extracted correctly. + # Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream` + # to get complete lineage result. + result.out_tables = [view_urn] + if result.column_lineage: + for col_result in result.column_lineage: + col_result.downstream.table = view_urn + return result diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py index 261f95331af61..e13d439161064 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py +++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py @@ -3,7 +3,11 @@ from avro.schema import Field, RecordSchema from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.metadata.schema_classes import DictWrapper +from datahub.metadata.schema_classes import ( + DictWrapper, + MetadataChangeEventClass, + MetadataChangeProposalClass, +) from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.urn import Urn, guess_entity_type @@ -32,7 +36,7 @@ def list_urns_with_path( if isinstance(model, MetadataChangeProposalWrapper): if model.entityUrn: - urns.append((model.entityUrn, ["urn"])) + urns.append((model.entityUrn, ["entityUrn"])) if model.entityKeyAspect: urns.extend( _add_prefix_to_paths( @@ -83,7 +87,15 @@ def list_urns(model: Union[DictWrapper, MetadataChangeProposalWrapper]) -> List[ return [urn for urn, _ in list_urns_with_path(model)] -def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None: +def transform_urns( + model: Union[ + DictWrapper, + MetadataChangeEventClass, + MetadataChangeProposalClass, + MetadataChangeProposalWrapper, + ], + func: Callable[[str], str], +) -> None: """ Rewrites all URNs in the given object according to the given function. """ @@ -95,7 +107,9 @@ def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None: def _modify_at_path( - model: Union[DictWrapper, list], path: _Path, new_value: str + model: Union[DictWrapper, MetadataChangeProposalWrapper, list], + path: _Path, + new_value: str, ) -> None: assert len(path) > 0 @@ -103,6 +117,8 @@ def _modify_at_path( if isinstance(path[0], int): assert isinstance(model, list) model[path[0]] = new_value + elif isinstance(model, MetadataChangeProposalWrapper): + setattr(model, path[0], new_value) else: assert isinstance(model, DictWrapper) model._inner_dict[path[0]] = new_value @@ -120,7 +136,14 @@ def _lowercase_dataset_urn(dataset_urn: str) -> str: return str(cur_urn) -def lowercase_dataset_urns(model: DictWrapper) -> None: +def lowercase_dataset_urns( + model: Union[ + DictWrapper, + MetadataChangeEventClass, + MetadataChangeProposalClass, + MetadataChangeProposalWrapper, + ] +) -> None: def modify_urn(urn: str) -> str: if guess_entity_type(urn) == "dataset": return _lowercase_dataset_urn(urn) diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index 52e92d27549f0..ed65d74037796 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -136,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -156,7 +157,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -171,7 +173,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -186,7 +189,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -203,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -218,7 +223,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -238,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -253,7 +260,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -268,7 +276,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -285,7 +294,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -300,7 +310,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -320,7 +331,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -335,7 +347,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -355,14 +368,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -386,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 4dcdf71ce0095..6ec6eb2809a10 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -115,7 +116,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -130,7 +132,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +167,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -183,7 +188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +210,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -219,7 +226,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -235,7 +243,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -252,7 +261,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -267,7 +277,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -291,7 +302,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -312,7 +324,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -327,7 +340,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -343,7 +357,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -360,7 +375,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -375,7 +391,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -403,7 +420,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -424,7 +442,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -439,7 +458,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -455,7 +475,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +493,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +509,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +542,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -540,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -571,7 +597,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +615,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -603,7 +631,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -639,7 +668,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -654,7 +684,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -674,14 +705,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -702,14 +736,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -730,14 +766,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -758,14 +796,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -786,14 +826,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -833,7 +874,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -973,7 +1015,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -988,7 +1031,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1008,14 +1052,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1055,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1199,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1167,7 +1215,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,14 +1236,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831649166 + "lastUpdatedTimestamp": 1655831649788 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,14 +1267,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831649715 + "lastUpdatedTimestamp": 1655831649754 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1243,14 +1297,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831649731 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1271,14 +1327,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831649754 + "lastUpdatedTimestamp": 1655831649715 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1299,14 +1357,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831649788 + "lastUpdatedTimestamp": 1655831649166 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1346,7 +1405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1444,7 +1504,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1465,7 +1526,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1480,7 +1542,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1496,7 +1559,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1513,7 +1577,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1528,7 +1593,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1568,7 +1634,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1583,7 +1650,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1603,14 +1671,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831865396 + "lastUpdatedTimestamp": 1655831866541 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1631,14 +1702,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831866337 + "lastUpdatedTimestamp": 1655831866447 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1659,14 +1732,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831866398 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1687,14 +1762,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831866447 + "lastUpdatedTimestamp": 1655831866337 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1715,14 +1792,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831866541 + "lastUpdatedTimestamp": 1655831865396 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1766,7 +1844,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 901e4c1262d3f..715beebfe22fb 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +115,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -144,7 +147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +165,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -176,7 +181,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -211,7 +218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -226,7 +234,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -243,7 +252,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -258,7 +268,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -278,7 +289,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -298,7 +310,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +326,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +342,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -345,7 +360,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -360,7 +376,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -404,7 +422,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -419,7 +438,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -434,7 +454,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +472,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -466,7 +488,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -494,7 +517,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -514,7 +538,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -529,7 +554,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -544,7 +570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -561,7 +588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -576,7 +604,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -608,7 +637,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -623,7 +653,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -643,14 +674,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -671,14 +705,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -699,14 +735,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -727,14 +765,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -755,14 +795,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -798,7 +839,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -938,7 +980,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -953,7 +996,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -973,14 +1017,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1016,7 +1062,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1113,7 +1160,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1128,7 +1176,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1148,14 +1197,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831649166 + "lastUpdatedTimestamp": 1655831649788 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1176,14 +1228,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831649715 + "lastUpdatedTimestamp": 1655831649754 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1204,14 +1258,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831649731 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1232,14 +1288,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831649754 + "lastUpdatedTimestamp": 1655831649715 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1260,14 +1318,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831649788 + "lastUpdatedTimestamp": 1655831649166 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1303,7 +1362,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1401,7 +1461,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1421,7 +1482,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1436,7 +1498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1451,7 +1514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1468,7 +1532,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1483,7 +1548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1519,7 +1585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1534,7 +1601,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1554,14 +1622,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831865396 + "lastUpdatedTimestamp": 1655831866541 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1582,14 +1653,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831866337 + "lastUpdatedTimestamp": 1655831866447 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1610,14 +1683,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831866398 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1638,14 +1713,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831866447 + "lastUpdatedTimestamp": 1655831866337 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1666,14 +1743,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831866541 + "lastUpdatedTimestamp": 1655831865396 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1713,7 +1791,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index 18474e819334e..2076ec4096f68 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +115,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -144,7 +147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +165,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -176,7 +181,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -191,7 +197,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -211,14 +218,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -239,14 +249,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -267,14 +279,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -295,14 +309,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -323,14 +339,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -350,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index bb47a077e878b..42e3b19612c2b 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -93,7 +93,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -113,7 +114,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -128,7 +130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -143,7 +146,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -160,7 +164,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -175,7 +180,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -195,7 +201,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +217,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +233,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -242,7 +251,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -257,7 +267,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -277,7 +288,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -297,7 +309,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -312,7 +325,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -327,7 +341,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -344,7 +359,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -359,7 +375,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -383,7 +400,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -403,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -418,7 +437,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -433,7 +453,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -450,7 +471,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -465,7 +487,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -493,7 +516,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -513,7 +537,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -528,7 +553,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -543,7 +569,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -560,7 +587,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +603,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -607,7 +636,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +652,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -643,14 +674,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "readVersion": "3", + "version": "4" }, "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -686,7 +719,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json index f3b6d2b8138cc..6774d4c7055b9 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +117,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -121,7 +127,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -187,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +211,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -224,7 +232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -239,7 +248,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -257,17 +267,19 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", "Table Type:": "MANAGED_TABLE", "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: another.comment": "This table has no partitions", + "Table Parameters: comment": "This table has array of structs", "Table Parameters: numFiles": "1", "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721976", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -278,6 +290,7 @@ "Storage Desc Params: serialization.format": "1" }, "name": "array_struct_test", + "description": "This table has array of structs", "tags": [] } }, @@ -304,6 +317,7 @@ { "fieldPath": "property_id", "nullable": true, + "description": "id of property", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -316,6 +330,7 @@ { "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", "nullable": true, + "description": "service types and providers", "type": { "type": { "com.linkedin.pegasus2avro.schema.ArrayType": { @@ -368,7 +383,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -385,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -405,7 +422,189 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test", + "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +619,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -438,7 +638,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -448,7 +648,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -518,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -535,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +757,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +773,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +792,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -598,7 +802,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -717,7 +921,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -734,7 +939,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -754,7 +960,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +976,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -787,16 +995,17 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "Table Type:": "MANAGED_TABLE", "Table Parameters: numFiles": "1", + "Table Parameters: numPartitions": "1", "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578704", + "Table Parameters: transient_lastDdlTime": "1697721968", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -853,6 +1062,18 @@ "nativeDataType": "string", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false } ] } @@ -862,7 +1083,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -879,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +1122,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +1138,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -932,7 +1157,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -942,7 +1167,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1039,7 +1264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1056,7 +1282,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1076,7 +1303,188 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", + "Table Type:": "MATERIALIZED_VIEW", + "Table Parameters: numFiles": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.struct_test", + "View Expanded Text:": "null", + "View Rewrite Enabled:": "No" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1091,7 +1499,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1109,7 +1518,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1119,10 +1528,10 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", - "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", - "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", "Compressed:": "No", "Num Buckets:": "-1", "Bucket Columns:": "[]", @@ -1285,7 +1694,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1302,7 +1712,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1322,7 +1733,26 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1342,7 +1772,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1357,7 +1788,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1372,7 +1804,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1389,7 +1822,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1404,7 +1838,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1419,7 +1854,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1437,7 +1873,7 @@ "customProperties": { "Database:": "db2", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:24 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:10 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db2.db/pokes", @@ -1446,7 +1882,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721971", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1454,10 +1890,7 @@ "Num Buckets:": "-1", "Bucket Columns:": "[]", "Sort Columns:": "[]", - "Storage Desc Params: serialization.format": "1", - "Table:": "db2.pokes", - "Constraint Name:": "pk_1173723383_1683022998392_0", - "Column Names:": "foo" + "Storage Desc Params: serialization.format": "1" }, "name": "pokes", "tags": [] @@ -1515,7 +1948,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1966,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1552,7 +1987,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1572,7 +2008,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1587,7 +2024,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1602,7 +2040,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1619,7 +2058,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1634,7 +2074,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json index 08f281f398909..e93924049f626 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +117,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -121,7 +127,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -187,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +211,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -224,7 +232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -239,7 +248,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -257,17 +267,19 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", "Table Type:": "MANAGED_TABLE", "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: another.comment": "This table has no partitions", + "Table Parameters: comment": "This table has array of structs", "Table Parameters: numFiles": "1", "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721976", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -278,6 +290,7 @@ "Storage Desc Params: serialization.format": "1" }, "name": "array_struct_test", + "description": "This table has array of structs", "tags": [] } }, @@ -304,6 +317,7 @@ { "fieldPath": "property_id", "nullable": true, + "description": "id of property", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -316,6 +330,7 @@ { "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", "nullable": true, + "description": "service types and providers", "type": { "type": { "com.linkedin.pegasus2avro.schema.ArrayType": { @@ -368,7 +383,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -385,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -405,7 +422,189 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test", + "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +619,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -438,7 +638,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -448,7 +648,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -518,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -535,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +757,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +773,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +792,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -598,7 +802,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -717,7 +921,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -734,7 +939,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -754,7 +960,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +976,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -787,16 +995,17 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "Table Type:": "MANAGED_TABLE", "Table Parameters: numFiles": "1", + "Table Parameters: numPartitions": "1", "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578704", + "Table Parameters: transient_lastDdlTime": "1697721968", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -853,6 +1062,18 @@ "nativeDataType": "string", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false } ] } @@ -862,7 +1083,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -879,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +1122,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +1138,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -932,7 +1157,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -942,7 +1167,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1039,7 +1264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1056,7 +1282,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1076,7 +1303,188 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", + "Table Type:": "MATERIALIZED_VIEW", + "Table Parameters: numFiles": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.struct_test", + "View Expanded Text:": "null", + "View Rewrite Enabled:": "No" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1091,7 +1499,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1109,7 +1518,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1119,10 +1528,10 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", - "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", - "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", "Compressed:": "No", "Num Buckets:": "-1", "Bucket Columns:": "[]", @@ -1285,7 +1694,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1302,7 +1712,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1322,7 +1733,26 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_setup.sql b/metadata-ingestion/tests/integration/hive/hive_setup.sql index 8fb8498894bc0..323a78e24d10b 100644 --- a/metadata-ingestion/tests/integration/hive/hive_setup.sql +++ b/metadata-ingestion/tests/integration/hive/hive_setup.sql @@ -1,10 +1,10 @@ CREATE DATABASE IF NOT EXISTS db1; CREATE DATABASE IF NOT EXISTS db2; -- Setup a "pokes" example table. -CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING); -LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes; +CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING) PARTITIONED BY (baz STRING); +LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes PARTITION (baz='dummy'); -CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING, CONSTRAINT pk_1173723383_1683022998392_0 primary key(foo) DISABLE NOVALIDATE NORELY); +CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING); LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db2.pokes; -- Setup a table with a special character. @@ -23,12 +23,12 @@ CREATE TABLE IF NOT EXISTS db1.struct_test CREATE TABLE IF NOT EXISTS db1.array_struct_test ( - property_id INT, + property_id INT COMMENT 'id of property', service array - >> -); + >> COMMENT 'service types and providers' +) TBLPROPERTIES ('comment' = 'This table has array of structs', 'another.comment' = 'This table has no partitions');; WITH test_data as ( @@ -39,6 +39,9 @@ test_data as ( INSERT INTO TABLE db1.array_struct_test select * from test_data; +CREATE MATERIALIZED VIEW db1.struct_test_view_materialized as select * from db1.struct_test; +CREATE VIEW db1.array_struct_test_view as select * from db1.array_struct_test; + CREATE TABLE IF NOT EXISTS db1.nested_struct_test ( property_id INT, @@ -50,9 +53,6 @@ CREATE TABLE IF NOT EXISTS db1.nested_struct_test CREATE TABLE db1.union_test( foo UNIONTYPE, struct, struct> -); +) STORED AS ORC ; -CREATE TABLE db1.map_test( - KeyValue String, - RecordId map -); \ No newline at end of file +CREATE TABLE db1.map_test(KeyValue String, RecordId map); \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json index dee85b40bb7a8..1da42b94e320c 100644 --- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json @@ -533,20 +533,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -566,20 +552,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -599,20 +571,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json index 72db36e63daf7..685a606a57c33 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json @@ -327,20 +327,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -360,20 +346,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -393,20 +365,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json index e5508bdb06b9e..069788cb088ac 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json @@ -327,20 +327,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -360,20 +346,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -393,20 +365,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json index b0f66e7b245c9..f1c932ebd5a70 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json @@ -335,20 +335,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -369,20 +355,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -403,20 +375,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json index 91e13debfa028..9521c9af4bbdc 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json @@ -550,20 +550,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -583,20 +569,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -616,20 +588,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index e93079119e4f4..dbacd52fe83de 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -327,20 +327,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -360,20 +346,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -393,20 +365,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json index a9c8efa7cdb98..aaa874d9ff348 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json @@ -351,20 +351,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -384,20 +370,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -417,20 +389,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json index edd15624a14cd..be8db0722aea3 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json @@ -343,20 +343,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -376,20 +362,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -409,20 +381,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json index aebc89b609a08..05b74f163ad45 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json @@ -327,20 +327,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -360,20 +346,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -393,20 +365,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json index 34bded3cf691e..0778aa0050b00 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json @@ -279,20 +279,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -312,20 +298,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -345,20 +317,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index 238f4c2580cdf..5a0bd4e12fd3a 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -2121,20 +2121,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2154,20 +2140,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2187,20 +2159,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index 45d5d839e9d21..1b0ee3216383c 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -2121,20 +2121,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2154,20 +2140,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2187,20 +2159,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json index 187cedaefb6b2..b960ba581e6b5 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json @@ -2004,20 +2004,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2037,20 +2023,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2070,20 +2042,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index c2c879e38f37b..e29292a44c949 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -2121,20 +2121,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2154,20 +2140,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2187,20 +2159,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json index c1ac54b0fb588..04ecaecbd4afb 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json @@ -584,20 +584,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -617,20 +603,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -650,20 +622,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index f602ca37b3160..080931ae637bc 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -2121,20 +2121,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2154,20 +2140,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2187,20 +2159,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index 104bd365669e3..5826c4316b539 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -2134,20 +2134,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -2167,20 +2153,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -2200,20 +2172,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json index 37a6c94c6952e..53d1ec0229de1 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json @@ -681,20 +681,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", @@ -714,20 +700,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", @@ -747,20 +719,6 @@ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index 1f662cfe514e2..e16101b137ac9 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -2,7 +2,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -41,7 +41,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -345,7 +345,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -3988,7 +3988,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -4135,7 +4135,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4150,7 +4150,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4165,7 +4165,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4180,7 +4180,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py index 5228c21223e24..56fb471d4c9f1 100644 --- a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py +++ b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py @@ -25,6 +25,7 @@ def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time "username": "mongoadmin", "password": "examplepass", "maxDocumentSize": 25000, + "platform_instance": "instance", }, }, "sink": { diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index 4aaefb48d33e1..38b03ce238d1c 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -213,7 +219,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -230,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -250,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -265,7 +274,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -361,7 +371,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -378,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -398,7 +410,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +567,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -969,7 +983,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -989,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1004,7 +1020,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1019,7 +1036,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1036,7 +1054,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1053,7 +1072,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1068,7 +1088,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1083,7 +1104,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,7 +1237,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1232,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1249,7 +1273,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1269,7 +1294,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1284,7 +1310,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1445,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1435,7 +1463,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1452,7 +1481,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1472,7 +1502,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1487,7 +1518,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1586,7 +1618,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1603,7 +1636,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1620,7 +1654,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1637,7 +1672,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1657,7 +1693,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1677,7 +1714,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1730,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1707,7 +1746,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1724,7 +1764,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1739,7 +1780,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1754,7 +1796,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1874,7 +1917,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1891,7 +1935,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1911,7 +1956,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1926,7 +1972,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2022,7 +2069,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2039,7 +2087,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2059,7 +2108,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2182,7 +2232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2233,7 +2284,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2253,7 +2305,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2268,7 +2321,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2283,7 +2337,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2300,7 +2355,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2315,7 +2371,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2330,7 +2387,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2390,7 +2448,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2407,7 +2466,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2427,7 +2487,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2442,7 +2503,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2502,7 +2564,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2519,7 +2582,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2539,7 +2603,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2568,7 +2633,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2597,7 +2663,79 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json index 535ce964c6058..b9b2a3b2141a8 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -201,7 +212,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -216,7 +228,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -231,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -248,7 +262,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -263,7 +278,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -284,7 +300,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -299,7 +316,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -314,7 +332,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -331,7 +350,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -346,7 +366,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -366,7 +387,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -381,7 +403,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -537,7 +560,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +578,186 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "is_view": "True" + }, + "name": "metadata_aspect_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "postgrestest.public.metadata_aspect_view", + "platform": "urn:li:dataPlatform:postgres", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "urn", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=500)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "aspect", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=200)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -634,31 +837,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", - "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" - }, - { - "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -675,29 +855,39 @@ "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "type": "TRANSFORMED" + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml index b390d9246677e..2bfa39a65363b 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml @@ -25,7 +25,7 @@ source: include_field_distinct_value_frequencies: false include_field_histogram: false catch_exceptions: true - include_views: false + include_views: true sink: type: file config: diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json index bf36a39a8c103..f6fa0a0ed032e 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -352,7 +364,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +382,186 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "is_view": "True" + }, + "name": "metadata_aspect_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "postgrestest.public.metadata_aspect_view", + "platform": "urn:li:dataPlatform:postgres", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "urn", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=500)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "aspect", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=200)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -391,31 +583,57 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "upstreamLineage", "aspect": { "json": { - "path": [ + "upstreams": [ { - "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", - "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + ], + "confidenceScore": 1.0 }, { - "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml index a489877d52a23..4a2cc543f2d01 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml +++ b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml @@ -13,7 +13,7 @@ source: profile_table_row_count_estimate_only: true turn_off_expensive_profiling_metrics: true catch_exceptions: true - include_views: false + include_views: true sink: type: file config: diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index e3cc6c8101650..b6cb578217a2c 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -17,7 +17,6 @@ ) from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage -from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef pytestmark = pytest.mark.integration_batch_2 @@ -742,75 +741,25 @@ def test_sqlglot_parser(): == "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)" ) - assert lineage[0].column_lineage == [ - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="client_director"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="tier"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column='upper("manager")'), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="team_type"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="date_target"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="monthid"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="target_team"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="seller_email"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="agent_key"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="sme_quota"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="revenue_quota"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="service_quota"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="bl_target"), - upstreams=[], - logic=None, - ), - ColumnLineageInfo( - downstream=DownstreamColumnRef(table=None, column="software_quota"), - upstreams=[], - logic=None, - ), + # TODO: None of these columns have upstreams? + # That doesn't seem right - we probably need to add fake schemas for the two tables above. + cols = [ + "client_director", + "tier", + 'upper("manager")', + "team_type", + "date_target", + "monthid", + "target_team", + "seller_email", + "agent_key", + "sme_quota", + "revenue_quota", + "service_quota", + "bl_target", + "software_quota", ] + for i, column in enumerate(cols): + assert lineage[0].column_lineage[i].downstream.table is None + assert lineage[0].column_lineage[i].downstream.column == column + assert lineage[0].column_lineage[i].upstreams == [] diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 81e307a78ae9e..ff448eca01071 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -94,7 +94,7 @@ def default_query_results( # noqa: C901 "name": "VIEW_{}".format(view_idx), "created_on": datetime(2021, 6, 8, 0, 0, 0, 0), "comment": "Comment for View", - "text": None, + "text": f"create view view_{view_idx} as select * from table_{view_idx}", } for view_idx in range(1, num_views + 1) ] diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json index a424b258e68ff..c7273fee5a2e5 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json @@ -24,7 +24,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -39,7 +40,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -54,7 +56,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -71,7 +74,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -90,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -105,7 +110,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -121,7 +127,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +157,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -165,7 +173,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -180,7 +189,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -197,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -216,7 +227,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -231,7 +243,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -251,7 +264,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -462,7 +477,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -488,7 +504,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -503,7 +520,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -520,7 +538,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -544,7 +563,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -559,7 +579,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -755,7 +776,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +819,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -813,7 +837,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -837,7 +862,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -852,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1048,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1074,7 +1102,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1089,7 +1118,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1106,7 +1136,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1130,7 +1161,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1145,7 +1177,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1341,7 +1374,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1367,7 +1401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1382,7 +1417,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1399,7 +1435,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1423,7 +1460,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1438,7 +1476,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1634,7 +1673,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1660,7 +1700,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1675,7 +1716,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1734,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1716,7 +1759,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1731,7 +1775,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1927,7 +1972,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1953,7 +1999,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1968,7 +2015,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1985,7 +2033,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2009,7 +2058,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2024,7 +2074,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2220,7 +2271,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2246,7 +2298,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2261,7 +2314,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2278,7 +2332,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2302,7 +2357,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2317,7 +2373,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2513,7 +2570,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2539,7 +2597,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2554,7 +2613,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2571,7 +2631,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2595,7 +2656,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2610,7 +2672,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2806,7 +2869,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2832,7 +2896,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2847,7 +2912,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2864,7 +2930,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2888,7 +2955,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2903,7 +2971,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3099,7 +3168,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3125,7 +3195,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3140,7 +3211,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3157,7 +3229,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3181,7 +3254,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3197,7 +3271,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3212,7 +3287,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3382,7 +3458,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3408,7 +3485,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3423,7 +3501,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3440,7 +3519,26 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -3464,7 +3562,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3480,7 +3579,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3496,7 +3596,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3512,7 +3613,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3527,7 +3629,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3690,7 +3793,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3716,7 +3820,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3731,7 +3836,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3748,7 +3854,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3773,7 +3880,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3797,7 +3905,26 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -3819,7 +3946,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3841,7 +3969,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3863,7 +3992,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3885,7 +4015,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3907,7 +4038,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3929,7 +4061,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3951,7 +4084,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3973,7 +4107,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3995,7 +4130,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4017,7 +4153,145 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -4034,14 +4308,127 @@ "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -4204,7 +4591,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4340,7 +4728,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4476,7 +4865,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4612,7 +5002,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4748,7 +5139,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4884,7 +5276,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5020,7 +5413,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5156,7 +5550,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5292,7 +5687,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5428,7 +5824,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5456,7 +5853,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5484,7 +5882,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5512,7 +5911,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5540,7 +5940,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5568,7 +5969,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5596,7 +5998,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5624,7 +6027,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5652,7 +6056,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5680,7 +6085,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5708,7 +6114,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5736,7 +6143,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5764,7 +6172,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5786,7 +6195,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5808,7 +6218,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5830,7 +6241,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5852,7 +6264,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5874,7 +6287,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5896,7 +6310,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5918,7 +6333,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5940,7 +6356,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5962,7 +6379,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5984,7 +6402,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5999,7 +6418,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6014,7 +6434,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6029,7 +6450,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6044,7 +6466,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6059,7 +6482,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json index 7687b99ac8d6d..5e55860483d24 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json @@ -1,13 +1,14 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "snowflake", + "instance": "instance1", "env": "PROD", "database": "test_db" }, @@ -24,205 +25,228 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:snowflake" + "removed": false } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Database" - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [] + "typeNames": [ + "Database" + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "snowflake", - "env": "PROD", - "database": "test_db", - "schema": "test_schema" - }, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", - "name": "TEST_SCHEMA", - "description": "comment for TEST_DB.TEST_SCHEMA", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - } + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:snowflake" + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", + "name": "TABLE_3", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Schema" + "Table" ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "snowflake", + "instance": "instance1", + "env": "PROD", + "database": "test_db", + "schema": "test_schema" + }, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", + "name": "TEST_SCHEMA", + "description": "comment for TEST_DB.TEST_SCHEMA", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + } } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_1", + "schemaName": "test_db.test_schema.table_3", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -375,94 +399,199 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", - "name": "TABLE_1", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "removed": false } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Schema" ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/", + "name": "VIEW_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1", + "description": "Comment for View", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -472,17 +601,18 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_2", + "schemaName": "test_db.test_schema.table_1", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -635,114 +765,108 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", - "name": "TABLE_2", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "typeNames": [ + "View" + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", + "name": "TABLE_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "schemaMetadata", "aspect": { "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "test_db.test_schema.table_3", + "schemaName": "test_db.test_schema.view_1", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -895,94 +1019,263 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", - "name": "TABLE_3", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "upstreamLineage", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, + "upstreams": [ { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -992,17 +1285,18 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_4", + "schemaName": "test_db.test_schema.table_2", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -1155,20 +1449,21 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", - "name": "TABLE_4", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", + "name": "TABLE_10", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -1181,68 +1476,13 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1252,12 +1492,13 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { @@ -1415,12 +1656,45 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -1441,27 +1715,58 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", + "name": "TABLE_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1473,75 +1778,159 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_6", "platform": "urn:li:dataPlatform:snowflake", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ { - "fieldPath": "col_1", + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_10", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", "nullable": false, "description": "Comment for column", "type": { @@ -1675,94 +2064,13 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", - "name": "TABLE_6", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1772,17 +2080,18 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_7", + "schemaName": "test_db.test_schema.table_6", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -1935,20 +2244,54 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", - "name": "TABLE_7", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", + "name": "TABLE_6", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -1961,27 +2304,46 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1993,36 +2355,75 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2032,17 +2433,18 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_8", + "schemaName": "test_db.test_schema.table_4", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2195,20 +2597,21 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", - "name": "TABLE_8", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", + "name": "TABLE_7", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -2221,27 +2624,56 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", + "name": "TABLE_4", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2253,56 +2685,94 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_9", + "schemaName": "test_db.test_schema.table_7", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2455,96 +2925,15 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", - "name": "TABLE_9", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", - "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "status", "aspect": { "json": { "removed": false @@ -2552,17 +2941,18 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_10", + "schemaName": "test_db.test_schema.table_8", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2715,329 +3105,877 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", - "name": "TABLE_10", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", + "name": "TABLE_8", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "datasetProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", + "name": "TABLE_9", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "schemaMetadata", "aspect": { "json": { - "upstreams": [ + "schemaName": "test_db.test_schema.table_9", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/", + "name": "VIEW_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2", + "description": "Comment for View", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index 2c77ace8b53e5..4c00e48ede9fb 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): validate_upstreams_against_patterns=False, include_operational_stats=True, email_as_user_identifier=True, + incremental_lineage=False, start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace( tzinfo=timezone.utc ), @@ -210,10 +211,12 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_ include_technical_schema=True, include_table_lineage=True, include_column_lineage=False, - include_views=False, - include_view_lineage=False, + include_views=True, + include_view_lineage=True, include_usage_stats=False, + incremental_lineage=False, include_operational_stats=False, + platform_instance="instance1", start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace( tzinfo=timezone.utc ), diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py index bba53c1e97a47..4b0dd2b1045a3 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -283,10 +283,13 @@ def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warni ) snowflake_pipeline_config1 = snowflake_pipeline_config.copy() - cast( + config = cast( SnowflakeV2Config, cast(PipelineConfig, snowflake_pipeline_config1).source.config, - ).include_view_lineage = True + ) + config.include_table_lineage = True + config.include_view_lineage = True + pipeline = Pipeline(snowflake_pipeline_config1) pipeline.run() pipeline.raise_from_status() # pipeline should not fail diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py index f72bd5b72d2cd..7e2ac94fa4e35 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py @@ -31,6 +31,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig: match_fully_qualified_names=True, schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), include_tables=include_tables, + incremental_lineage=False, stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj( { "enabled": True, @@ -49,7 +50,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig: @freeze_time(FROZEN_TIME) -def test_tableau_stateful(mock_datahub_graph): +def test_stale_metadata_removal(mock_datahub_graph): with mock.patch( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index a495d04c4e398..2fe7a76fd01ae 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,7 +2297,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2208,7 +2318,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2228,7 +2339,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2243,7 +2355,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2258,7 +2371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2275,7 +2389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2290,7 +2405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2311,7 +2427,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2326,7 +2443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2341,7 +2459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2358,7 +2477,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2373,7 +2493,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2393,7 +2514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2414,7 +2536,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2429,7 +2552,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2444,7 +2568,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2461,7 +2586,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2476,7 +2602,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2496,7 +2623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2517,7 +2645,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2532,7 +2661,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2547,7 +2677,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2564,7 +2695,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2579,7 +2711,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2599,7 +2732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2620,7 +2754,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2635,7 +2770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2650,7 +2786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2667,7 +2804,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2682,7 +2820,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2702,7 +2841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2723,7 +2863,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2738,7 +2879,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2753,7 +2895,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2770,7 +2913,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2785,7 +2929,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2805,7 +2950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2826,7 +2972,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2841,7 +2988,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2856,7 +3004,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2873,7 +3022,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2888,7 +3038,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2908,7 +3059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2929,7 +3081,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2944,7 +3097,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2959,7 +3113,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2976,7 +3131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2991,7 +3147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3011,7 +3168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3032,7 +3190,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3047,7 +3206,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3062,7 +3222,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3079,7 +3240,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3094,7 +3256,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3114,7 +3277,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3135,7 +3299,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3150,7 +3315,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3165,7 +3331,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3182,7 +3349,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3197,7 +3365,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3217,7 +3386,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3238,7 +3408,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3253,7 +3424,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3268,7 +3440,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3285,7 +3458,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3300,7 +3474,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3320,7 +3495,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3335,7 +3511,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3407,7 +3584,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3424,7 +3602,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3448,7 +3627,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3469,7 +3649,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3484,7 +3665,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3499,7 +3681,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3516,7 +3699,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3531,7 +3715,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3551,7 +3736,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3566,7 +3752,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3638,7 +3825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3655,7 +3843,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3679,7 +3868,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3694,7 +3884,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3790,7 +3981,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3807,7 +3999,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3831,7 +4024,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3852,7 +4046,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3867,7 +4062,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3882,7 +4078,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3899,7 +4096,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3914,7 +4112,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3934,7 +4133,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3955,7 +4155,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3970,7 +4171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3985,7 +4187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4002,7 +4205,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4017,7 +4221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4037,7 +4242,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4058,7 +4264,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4073,7 +4280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4088,7 +4296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4105,7 +4314,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4120,27 +4330,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", + "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4150,12 +4367,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4165,12 +4383,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4180,27 +4399,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 8277ff8bf7e89..c1984828750eb 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,27 +2297,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2218,12 +2334,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2233,12 +2350,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2248,27 +2366,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index f3714bba6364d..804a8d74d0d51 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,27 +2297,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2218,12 +2334,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2233,12 +2350,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2248,27 +2366,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index d25d23daae2ea..9d1b288057a16 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -95,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -106,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "b6a0c1e2-f90a-4c86-a226-bf7ca59ad79f", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-08-06 21:01:05.157000", - "date_modified": "2023-08-06 21:01:05.283000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -125,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -142,7 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1837,7 +1926,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1854,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1867,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-08-06 21:01:05.093000", - "date_modified": "2023-08-06 21:01:05.093000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1882,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1899,7 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,7 +2297,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2208,7 +2318,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2223,7 +2334,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2238,7 +2350,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2253,12 +2366,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2268,7 +2382,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index c1347a7c8caca..a17d52f9a39b1 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -45,7 +45,7 @@ CREATE TABLE Foo.SalesReason ) ; GO -CREATE PROCEDURE Foo.DBs @ID INT +CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT AS SELECT @ID AS ThatDB; GO diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index c31867f5aa904..0510f4a40f659 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -20,7 +20,7 @@ from datahub.ingestion.source.tableau import TableauConfig, TableauSource from datahub.ingestion.source.tableau_common import ( TableauLineageOverrides, - make_table_urn, + TableauUpstreamReference, ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageType, @@ -546,13 +546,13 @@ def test_lineage_overrides(): enable_logging() # Simple - specify platform instance to presto table assert ( - make_table_urn( - DEFAULT_ENV, + TableauUpstreamReference( "presto_catalog", - "presto", "test-schema", - "presto_catalog.test-schema.test-table", - platform_instance_map={"presto": "my_presto_instance"}, + "test-table", + "presto", + ).make_dataset_urn( + env=DEFAULT_ENV, platform_instance_map={"presto": "my_presto_instance"} ) == "urn:li:dataset:(urn:li:dataPlatform:presto,my_presto_instance.presto_catalog.test-schema.test-table,PROD)" ) @@ -560,12 +560,13 @@ def test_lineage_overrides(): # Transform presto urn to hive urn # resulting platform instance for hive = mapped platform instance + presto_catalog assert ( - make_table_urn( - DEFAULT_ENV, + TableauUpstreamReference( "presto_catalog", - "presto", "test-schema", - "presto_catalog.test-schema.test-table", + "test-table", + "presto", + ).make_dataset_urn( + env=DEFAULT_ENV, platform_instance_map={"presto": "my_instance"}, lineage_overrides=TableauLineageOverrides( platform_override_map={"presto": "hive"}, @@ -574,14 +575,15 @@ def test_lineage_overrides(): == "urn:li:dataset:(urn:li:dataPlatform:hive,my_instance.presto_catalog.test-schema.test-table,PROD)" ) - # tranform hive urn to presto urn + # transform hive urn to presto urn assert ( - make_table_urn( - DEFAULT_ENV, - "", - "hive", + TableauUpstreamReference( + None, "test-schema", - "test-schema.test-table", + "test-table", + "hive", + ).make_dataset_urn( + env=DEFAULT_ENV, platform_instance_map={"hive": "my_presto_instance.presto_catalog"}, lineage_overrides=TableauLineageOverrides( platform_override_map={"hive": "presto"}, diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index 19961e48b4a33..c43223c68a6b6 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -219,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1688422059" + "transient_lastddltime": "1698223433" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -315,7 +327,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -332,7 +345,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -356,7 +370,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -371,7 +386,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -392,7 +408,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422063" + "transient_lastddltime": "1698223435" }, "name": "map_test", "tags": [] @@ -454,7 +470,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -471,7 +488,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -495,7 +513,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -510,7 +529,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -531,7 +551,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "nested_struct_test", "tags": [] @@ -642,7 +662,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -659,7 +680,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -683,7 +705,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -698,7 +721,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -714,7 +738,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1688421792" + "transient_lastddltime": "1698223429" }, "name": "pokes", "tags": [] @@ -784,7 +808,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -801,7 +826,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -825,7 +851,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -840,7 +867,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -861,7 +889,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688421808" + "transient_lastddltime": "1698223431" }, "name": "struct_test", "tags": [] @@ -950,7 +978,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -967,7 +996,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -991,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1006,7 +1037,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1024,7 +1056,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "struct_test_view_materialized", "tags": [] @@ -1113,7 +1145,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1130,7 +1163,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1154,7 +1188,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1169,7 +1204,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1190,7 +1226,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688421807" + "transient_lastddltime": "1698223431" }, "name": "_test_table_underscore", "tags": [] @@ -1248,7 +1284,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1265,7 +1302,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1289,7 +1327,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1304,7 +1343,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1325,7 +1365,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "union_test", "tags": [] @@ -1467,7 +1507,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1484,7 +1525,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1550,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1523,7 +1566,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1539,7 +1583,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1688422062", + "transient_lastddltime": "1698223435", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -1634,7 +1678,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1651,7 +1696,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1668,7 +1714,57 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1788,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/vertica/ddl.sql b/metadata-ingestion/tests/integration/vertica/ddl.sql index 59a71a1a1f7b5..ceebcd8e9ce2a 100644 --- a/metadata-ingestion/tests/integration/vertica/ddl.sql +++ b/metadata-ingestion/tests/integration/vertica/ddl.sql @@ -1,5 +1,4 @@ -\set AUTOCOMMIT on -ALTER USER dbadmin IDENTIFIED BY 'abc123'; + -- Create a Top-k projection CREATE TABLE readings (meter_id INT, reading_date TIMESTAMP, reading_value FLOAT); @@ -35,12 +34,16 @@ SELECT tokenize(phrase) OVER () FROM phrases; -- Create a temp table -CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS; -INSERT INTO sampletemp VALUES(1,2); +-- CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS; +-- INSERT INTO sampletemp VALUES(1,2); -- Create partition key -ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE); -SELECT PARTITION_TABLE('store.store_orders_fact'); -CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered - ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL; +-- ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE); +-- SELECT PARTITION_TABLE('store.store_orders_fact'); +-- CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered +-- ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL; + + + + SELECT start_refresh(); \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/vertica/docker-compose.yml b/metadata-ingestion/tests/integration/vertica/docker-compose.yml index 84af5c32a60e3..1ba7990c826b2 100644 --- a/metadata-ingestion/tests/integration/vertica/docker-compose.yml +++ b/metadata-ingestion/tests/integration/vertica/docker-compose.yml @@ -6,7 +6,7 @@ services: APP_DB_USER: "dbadmin" APP_DB_PASSWORD: "abc123" container_name: vertica-ce - image: vertica/vertica-ce:12.0.2-0 + image: vertica/vertica-ce:23.4.0-0 ports: - "5433:5433" - "5444:5444" diff --git a/metadata-ingestion/tests/integration/vertica/test_vertica.py b/metadata-ingestion/tests/integration/vertica/test_vertica.py index fe306d1d0b2b8..94ad33ba21ce4 100644 --- a/metadata-ingestion/tests/integration/vertica/test_vertica.py +++ b/metadata-ingestion/tests/integration/vertica/test_vertica.py @@ -1,6 +1,5 @@ import subprocess -import time -from typing import List, Optional +from typing import List import pytest from freezegun import freeze_time @@ -17,13 +16,12 @@ def test_resources_dir(pytestconfig): return pytestconfig.rootpath / "tests/integration/vertica" -def is_vertica_responsive( - container_name: str, port: int, hostname: Optional[str] -) -> bool: - if hostname: - cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' " - ret = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL) - +def is_vertica_responsive(container_name: str) -> bool: + cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' " + ret = subprocess.run( + cmd, + shell=True, + ) return ret.returncode == 0 @@ -37,28 +35,22 @@ def vertica_runner(docker_compose_runner, test_resources_dir): "vertica-ce", 5433, timeout=120, - checker=lambda: is_vertica_responsive( - "vertica-ce", 5433, hostname="vertica-ce" - ), + checker=lambda: is_vertica_responsive("vertica-ce"), ) commands = """ docker cp tests/integration/vertica/ddl.sql vertica-ce:/home/dbadmin/ && - docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql + docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql" """ ret = subprocess.run(commands, shell=True, stdout=subprocess.DEVNULL) - # waiting for vertica to create default table and system table and ml models - time.sleep(60) - assert ret.returncode >= 1 + assert ret.returncode == 0 yield docker_services -# Test needs more work to be done , currently it is working fine. @freeze_time(FROZEN_TIME) -@pytest.mark.skip("Failing in CI, cmd failing with exit code 1") @pytest.mark.integration def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path): test_resources_dir = pytestconfig.rootpath / "tests/integration/vertica" @@ -72,7 +64,7 @@ def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path): ignore_paths: List[str] = [ r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['create_time'\]", r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['table_size'\]", - r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['projection_size'\]", + r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['Projection_size'\]", r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['ROS_Count'\]", r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['cluster_size'\]", r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['udx_language'\]", diff --git a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json index 44a5e07d7b996..ef535158165da 100644 --- a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json @@ -11,7 +11,7 @@ "env": "PROD", "database": "vmart", "cluster_type": "Enterprise", - "cluster_size": "122 GB", + "cluster_size": "101 GB", "subcluster": " ", "communal_storage_path": "" }, @@ -20,7 +20,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -35,7 +36,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -50,7 +52,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -67,7 +70,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -82,7 +86,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -97,16 +102,17 @@ "env": "PROD", "database": "vmart", "schema": "public", - "projection_count": "9", - "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_prune, xgb_save_model, yule_walker, ", - "udx_language": "ComplexTypesLib -- Functions for Complex Types | DelimitedExportLib -- Delimited data export package | JsonExportLib -- Json data export package | MachineLearningLib -- Machine learning package | OrcExportLib -- Orc export package | ParquetExportLib -- Parquet export package | ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | KafkaLib -- Kafka streaming load and export | PlaceLib -- Geospatial package | VoltageSecureLib -- Voltage SecureData Connector | " + "projection_count": "12", + "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ", + "udx_language": "ComplexTypesLib -- Functions for Complex Types | DelimitedExportLib -- Delimited data export package | JsonExportLib -- Json data export package | MachineLearningLib -- Machine learning package | OrcExportLib -- Orc export package | ParquetExportLib -- Parquet export package | ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | KafkaLib -- Kafka streaming load and export | PlaceLib -- Geospatial package | VoltageSecureLib -- Voltage SecureData Connector | TransformFunctions -- User-defined Python library | " }, "name": "public" } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -121,7 +127,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -136,7 +143,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -153,7 +161,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -168,7 +177,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -188,7 +198,184 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "create_time": "2023-10-13 11:23:05.308022+00:00", + "table_size": "0 KB" + }, + "name": "clicks", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.clicks", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "user_id", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "page_id", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "click_time", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -212,7 +399,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -227,7 +415,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -243,7 +432,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.358215+00:00" + "create_time": "2023-10-13 11:22:37.846965+00:00", + "table_size": "2119 KB" }, "name": "customer_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -551,7 +741,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -568,7 +759,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -592,7 +784,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -616,7 +809,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -631,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -647,7 +842,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.368954+00:00" + "create_time": "2023-10-13 11:22:37.857152+00:00", + "table_size": "138 KB" }, "name": "date_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -955,7 +1151,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -972,7 +1169,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -996,7 +1194,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1020,7 +1219,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1035,7 +1235,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1051,7 +1252,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.375896+00:00" + "create_time": "2023-10-13 11:22:37.863745+00:00", + "table_size": "327 KB" }, "name": "employee_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -1320,7 +1522,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1337,7 +1540,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1361,7 +1565,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1385,7 +1590,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1400,7 +1606,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1416,7 +1623,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.385843+00:00" + "create_time": "2023-10-13 11:22:37.873181+00:00", + "table_size": "2564 KB" }, "name": "inventory_fact", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -1529,7 +1737,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1546,7 +1755,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1570,12 +1780,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1594,12 +1805,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1609,13 +1821,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1625,16 +1838,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.362016+00:00" + "create_time": "2023-10-13 11:23:05.408507+00:00", + "table_size": "0 KB" }, - "name": "product_dimension", + "name": "phrases", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.product_dimension", + "schemaName": "public.phrases", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -1653,33 +1867,7 @@ }, "fields": [ { - "fieldPath": "product_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": true - }, - { - "fieldPath": "product_version", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_description", + "fieldPath": "phrase", "nullable": true, "description": "", "type": { @@ -1690,76 +1878,252 @@ "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false - }, - { - "fieldPath": "sku_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "category_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "department_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "package_type_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "package_size", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "fat_content", - "nullable": true, - "description": "", + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "create_time": "2023-10-13 11:22:37.850505+00:00", + "table_size": "19 KB" + }, + "name": "product_dimension", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.product_dimension", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "product_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "product_version", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=128)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "sku_number", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "category_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "department_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "package_type_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "package_size", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "fat_content", + "nullable": true, + "description": "", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1933,7 +2297,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2315,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1974,7 +2340,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1998,7 +2365,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2013,7 +2381,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2029,7 +2398,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.365453+00:00" + "create_time": "2023-10-13 11:22:37.853878+00:00", + "table_size": "3 KB" }, "name": "promotion_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -2220,7 +2590,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2237,7 +2608,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2261,12 +2633,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -2285,12 +2658,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -2300,13 +2674,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2316,16 +2691,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.379273+00:00" + "create_time": "2023-10-13 11:23:05.296044+00:00", + "table_size": "0 KB" }, - "name": "shipping_dimension", + "name": "readings", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.shipping_dimension", + "schemaName": "public.readings", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -2344,7 +2720,7 @@ }, "fields": [ { - "fieldPath": "shipping_key", + "fieldPath": "meter_id", "nullable": true, "description": "", "type": { @@ -2354,39 +2730,215 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": true + "isPartOfKey": false }, { - "fieldPath": "ship_type", + "fieldPath": "reading_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "CHAR(length=30)", + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ship_mode", + "fieldPath": "reading_value", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=10)", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false - }, - { - "fieldPath": "ship_carrier", - "nullable": true, - "description": "", - "type": { + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "create_time": "2023-10-13 11:22:37.867119+00:00", + "table_size": "1 KB" + }, + "name": "shipping_dimension", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.shipping_dimension", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "shipping_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "ship_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=30)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ship_mode", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=10)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ship_carrier", + "nullable": true, + "description": "", + "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} } @@ -2403,7 +2955,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2420,7 +2973,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2444,7 +2998,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2468,7 +3023,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2483,7 +3039,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2499,7 +3056,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.372409+00:00" + "create_time": "2023-10-13 11:22:37.860541+00:00", + "table_size": "1 KB" }, "name": "vendor_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -2638,7 +3196,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2655,7 +3214,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2679,7 +3239,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2703,7 +3264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2718,7 +3280,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2734,7 +3297,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:38:20.045598+00:00" + "create_time": "2023-10-13 11:23:04.970568+00:00", + "table_size": "0 KB" }, "name": "vmart_load_success", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -2782,7 +3346,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2799,7 +3364,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2823,7 +3389,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2847,7 +3414,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2862,7 +3430,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2878,7 +3447,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.382549+00:00" + "create_time": "2023-10-13 11:22:37.870169+00:00", + "table_size": "2 KB" }, "name": "warehouse_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -2991,7 +3561,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3008,7 +3579,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3032,12 +3604,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3056,12 +3629,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3071,13 +3645,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3087,23 +3662,19 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(date_dimension.date_key)", - "projection_size": "138 KB", - "Partition_Key": "Not Available", - "Partition_Size": "0", - "Projection_Cached": "False" + "create_time": "2023-10-13 11:23:05.319029+00:00", + "table_size": "0 KB", + "view_definition": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state", + "is_view": "True" }, - "name": "date_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "sampleview", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.date_dimension_super", + "schemaName": "public.sampleview", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3122,7 +3693,7 @@ }, "fields": [ { - "fieldPath": "date_key", + "fieldPath": "SUM", "nullable": true, "description": "", "type": { @@ -3135,33 +3706,7 @@ "isPartOfKey": false }, { - "fieldPath": "date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "full_date_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=18)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "day_of_week", + "fieldPath": "customer_state", "nullable": true, "description": "", "type": { @@ -3169,228 +3714,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "day_number_in_calendar_month", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "day_number_in_calendar_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "day_number_in_fiscal_month", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "day_number_in_fiscal_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "last_day_in_week_indicator", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "last_day_in_month_indicator", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_week_number_in_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_month_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=9)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_month_number_in_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_year_month", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=7)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_quarter", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_year_quarter", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=7)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_half_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_year", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "holiday_indicator", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=10)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weekday_indicator", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=7)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "selling_season", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=2)", "recursive": false, "isPartOfKey": false } @@ -3402,29 +3726,49 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Projections" + "View" ] } }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -3435,7 +3779,15 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", "type": "TRANSFORMED" } ] @@ -3443,12 +3795,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3467,12 +3820,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3491,12 +3845,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3506,13 +3861,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3524,21 +3880,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)", - "projection_size": "19 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(date_dimension.date_key)", + "Projection_size": "138 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "product_dimension_super", + "name": "date_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.product_dimension_super", + "schemaName": "public.date_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3557,7 +3913,7 @@ }, "fields": [ { - "fieldPath": "product_key", + "fieldPath": "date_key", "nullable": true, "description": "", "type": { @@ -3570,20 +3926,20 @@ "isPartOfKey": false }, { - "fieldPath": "product_version", + "fieldPath": "date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "product_description", + "fieldPath": "full_date_description", "nullable": true, "description": "", "type": { @@ -3591,12 +3947,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(length=18)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "sku_number", + "fieldPath": "day_of_week", "nullable": true, "description": "", "type": { @@ -3604,64 +3960,64 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "VARCHAR(length=9)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "category_description", + "fieldPath": "day_number_in_calendar_month", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "department_description", + "fieldPath": "day_number_in_calendar_year", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "package_type_description", + "fieldPath": "day_number_in_fiscal_month", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "package_size", + "fieldPath": "day_number_in_fiscal_year", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "fat_content", + "fieldPath": "last_day_in_week_indicator", "nullable": true, "description": "", "type": { @@ -3674,20 +4030,20 @@ "isPartOfKey": false }, { - "fieldPath": "diet_type", + "fieldPath": "last_day_in_month_indicator", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "calendar_week_number_in_year", "nullable": true, "description": "", "type": { @@ -3700,7 +4056,7 @@ "isPartOfKey": false }, { - "fieldPath": "weight_units_of_measure", + "fieldPath": "calendar_month_name", "nullable": true, "description": "", "type": { @@ -3708,12 +4064,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "VARCHAR(length=9)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "shelf_width", + "fieldPath": "calendar_month_number_in_year", "nullable": true, "description": "", "type": { @@ -3726,20 +4082,20 @@ "isPartOfKey": false }, { - "fieldPath": "shelf_height", + "fieldPath": "calendar_year_month", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=7)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "shelf_depth", + "fieldPath": "calendar_quarter", "nullable": true, "description": "", "type": { @@ -3752,20 +4108,20 @@ "isPartOfKey": false }, { - "fieldPath": "product_price", + "fieldPath": "calendar_year_quarter", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=7)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "product_cost", + "fieldPath": "calendar_half_year", "nullable": true, "description": "", "type": { @@ -3778,7 +4134,7 @@ "isPartOfKey": false }, { - "fieldPath": "lowest_competitor_price", + "fieldPath": "calendar_year", "nullable": true, "description": "", "type": { @@ -3791,41 +4147,41 @@ "isPartOfKey": false }, { - "fieldPath": "highest_competitor_price", + "fieldPath": "holiday_indicator", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=10)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "average_competitor_price", + "fieldPath": "weekday_indicator", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=7)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "discontinued_flag", + "fieldPath": "selling_season", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false } @@ -3837,12 +4193,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3854,12 +4211,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -3870,7 +4228,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", "type": "TRANSFORMED" } ] @@ -3878,12 +4236,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3902,12 +4261,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3926,12 +4286,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3941,13 +4302,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3959,21 +4321,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(promotion_dimension.promotion_key)", - "projection_size": "3 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)", + "Projection_size": "19 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "promotion_dimension_super", + "name": "product_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.promotion_dimension_super", + "schemaName": "public.product_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3992,7 +4354,7 @@ }, "fields": [ { - "fieldPath": "promotion_key", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { @@ -4005,7 +4367,20 @@ "isPartOfKey": false }, { - "fieldPath": "promotion_name", + "fieldPath": "product_version", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_description", "nullable": true, "description": "", "type": { @@ -4018,7 +4393,7 @@ "isPartOfKey": false }, { - "fieldPath": "price_reduction_type", + "fieldPath": "sku_number", "nullable": true, "description": "", "type": { @@ -4026,12 +4401,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_media_type", + "fieldPath": "category_description", "nullable": true, "description": "", "type": { @@ -4039,12 +4414,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ad_type", + "fieldPath": "department_description", "nullable": true, "description": "", "type": { @@ -4052,12 +4427,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "display_type", + "fieldPath": "package_type_description", "nullable": true, "description": "", "type": { @@ -4065,12 +4440,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "coupon_type", + "fieldPath": "package_size", "nullable": true, "description": "", "type": { @@ -4078,12 +4453,25 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ad_media_name", + "fieldPath": "fat_content", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "diet_type", "nullable": true, "description": "", "type": { @@ -4091,12 +4479,25 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "display_provider", + "fieldPath": "weight", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight_units_of_measure", "nullable": true, "description": "", "type": { @@ -4104,12 +4505,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_cost", + "fieldPath": "shelf_width", "nullable": true, "description": "", "type": { @@ -4122,28 +4523,106 @@ "isPartOfKey": false }, { - "fieldPath": "promotion_begin_date", + "fieldPath": "shelf_height", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_end_date", + "fieldPath": "shelf_depth", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_cost", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lowest_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "highest_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "average_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "discontinued_flag", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -4155,12 +4634,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -4172,12 +4652,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -4188,7 +4669,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", "type": "TRANSFORMED" } ] @@ -4196,12 +4677,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -4220,12 +4702,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -4244,12 +4727,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -4259,13 +4743,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -4277,21 +4762,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(vendor_dimension.vendor_key)", - "projection_size": "1 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(promotion_dimension.promotion_key)", + "Projection_size": "3 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "vendor_dimension_super", + "name": "promotion_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.vendor_dimension_super", + "schemaName": "public.promotion_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -4310,7 +4795,7 @@ }, "fields": [ { - "fieldPath": "vendor_key", + "fieldPath": "promotion_key", "nullable": true, "description": "", "type": { @@ -4323,7 +4808,7 @@ "isPartOfKey": false }, { - "fieldPath": "vendor_name", + "fieldPath": "promotion_name", "nullable": true, "description": "", "type": { @@ -4331,12 +4816,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_address", + "fieldPath": "price_reduction_type", "nullable": true, "description": "", "type": { @@ -4344,12 +4829,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_city", + "fieldPath": "promotion_media_type", "nullable": true, "description": "", "type": { @@ -4357,12 +4842,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_state", + "fieldPath": "ad_type", "nullable": true, "description": "", "type": { @@ -4370,12 +4855,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_region", + "fieldPath": "display_type", "nullable": true, "description": "", "type": { @@ -4388,7 +4873,46 @@ "isPartOfKey": false }, { - "fieldPath": "deal_size", + "fieldPath": "coupon_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ad_media_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "display_provider", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=128)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "promotion_cost", "nullable": true, "description": "", "type": { @@ -4401,7 +4925,20 @@ "isPartOfKey": false }, { - "fieldPath": "last_deal_update", + "fieldPath": "promotion_begin_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "promotion_end_date", "nullable": true, "description": "", "type": { @@ -4421,12 +4958,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -4438,12 +4976,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -4454,7 +4993,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", "type": "TRANSFORMED" } ] @@ -4462,12 +5001,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -4486,12 +5026,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -4510,12 +5051,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -4525,13 +5067,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -4543,21 +5086,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(customer_dimension.customer_key)", - "projection_size": "2119 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(vendor_dimension.vendor_key)", + "Projection_size": "1 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "customer_dimension_super", + "name": "vendor_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.customer_dimension_super", + "schemaName": "public.vendor_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -4576,7 +5119,7 @@ }, "fields": [ { - "fieldPath": "customer_key", + "fieldPath": "vendor_key", "nullable": true, "description": "", "type": { @@ -4589,46 +5132,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_gender", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=8)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "title", + "fieldPath": "vendor_name", "nullable": true, "description": "", "type": { @@ -4636,25 +5140,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "household_id", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "customer_address", + "fieldPath": "vendor_address", "nullable": true, "description": "", "type": { @@ -4662,12 +5153,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "customer_city", + "fieldPath": "vendor_city", "nullable": true, "description": "", "type": { @@ -4680,7 +5171,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_state", + "fieldPath": "vendor_state", "nullable": true, "description": "", "type": { @@ -4693,20 +5184,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_region", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "marital_status", + "fieldPath": "vendor_region", "nullable": true, "description": "", "type": { @@ -4719,72 +5197,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_age", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "number_of_children", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "annual_income", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "occupation", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "largest_bill_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_membership_card", + "fieldPath": "deal_size", "nullable": true, "description": "", "type": { @@ -4797,46 +5210,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_since", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "deal_stage", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "deal_size", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "last_deal_update", + "fieldPath": "last_deal_update", "nullable": true, "description": "", "type": { @@ -4856,12 +5230,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -4873,12 +5248,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -4889,7 +5265,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", "type": "TRANSFORMED" } ] @@ -4897,12 +5273,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -4921,12 +5298,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -4945,12 +5323,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -4960,13 +5339,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -4978,21 +5358,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(employee_dimension.employee_key)", - "projection_size": "327 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(customer_dimension.customer_key)", + "Projection_size": "2119 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "employee_dimension_super", + "name": "customer_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.employee_dimension_super", + "schemaName": "public.customer_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5011,7 +5391,7 @@ }, "fields": [ { - "fieldPath": "employee_key", + "fieldPath": "customer_key", "nullable": true, "description": "", "type": { @@ -5024,7 +5404,33 @@ "isPartOfKey": false }, { - "fieldPath": "employee_gender", + "fieldPath": "customer_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=16)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_gender", "nullable": true, "description": "", "type": { @@ -5037,7 +5443,7 @@ "isPartOfKey": false }, { - "fieldPath": "courtesy_title", + "fieldPath": "title", "nullable": true, "description": "", "type": { @@ -5050,7 +5456,33 @@ "isPartOfKey": false }, { - "fieldPath": "employee_first_name", + "fieldPath": "household_id", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_city", "nullable": true, "description": "", "type": { @@ -5063,7 +5495,7 @@ "isPartOfKey": false }, { - "fieldPath": "employee_middle_initial", + "fieldPath": "customer_state", "nullable": true, "description": "", "type": { @@ -5071,12 +5503,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "CHAR(length=2)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_last_name", + "fieldPath": "customer_region", "nullable": true, "description": "", "type": { @@ -5089,7 +5521,20 @@ "isPartOfKey": false }, { - "fieldPath": "employee_age", + "fieldPath": "marital_status", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_age", "nullable": true, "description": "", "type": { @@ -5102,33 +5547,33 @@ "isPartOfKey": false }, { - "fieldPath": "hire_date", + "fieldPath": "number_of_children", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_street_address", + "fieldPath": "annual_income", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_city", + "fieldPath": "occupation", "nullable": true, "description": "", "type": { @@ -5141,20 +5586,46 @@ "isPartOfKey": false }, { - "fieldPath": "employee_state", + "fieldPath": "largest_bill_amount", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_region", + "fieldPath": "store_membership_card", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_since", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "deal_stage", "nullable": true, "description": "", "type": { @@ -5162,25 +5633,1087 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "job_title", + "fieldPath": "deal_size", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_deal_update", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Projections" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(employee_dimension.employee_key)", + "Projection_size": "327 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "employee_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.employee_dimension_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "employee_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_gender", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "courtesy_title", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_first_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_middle_initial", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_last_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_age", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "hire_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_street_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_region", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "job_title", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "reports_to", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "salaried_flag", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "annual_salary", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "hourly_rate", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vacation_days", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Projections" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(warehouse_dimension.warehouse_key)", + "Projection_size": "2 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "warehouse_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.warehouse_dimension_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "warehouse_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "warehouse_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=20)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "warehouse_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "warehouse_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=60)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "warehouse_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "warehouse_region", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Projections" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(shipping_dimension.shipping_key)", + "Projection_size": "1 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "shipping_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.shipping_dimension_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "shipping_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ship_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=30)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ship_mode", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=10)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ship_carrier", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=20)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Projections" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)", + "Projection_size": "2564 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "inventory_fact_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "public.inventory_fact_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "date_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "reports_to", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { @@ -5193,7 +6726,7 @@ "isPartOfKey": false }, { - "fieldPath": "salaried_flag", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { @@ -5206,7 +6739,7 @@ "isPartOfKey": false }, { - "fieldPath": "annual_salary", + "fieldPath": "warehouse_key", "nullable": true, "description": "", "type": { @@ -5219,7 +6752,7 @@ "isPartOfKey": false }, { - "fieldPath": "hourly_rate", + "fieldPath": "qty_in_stock", "nullable": true, "description": "", "type": { @@ -5227,20 +6760,20 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vacation_days", + "fieldPath": "inventory_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false } @@ -5252,12 +6785,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -5269,12 +6803,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -5285,7 +6820,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", "type": "TRANSFORMED" } ] @@ -5293,12 +6828,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5317,12 +6853,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5341,12 +6878,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -5356,13 +6894,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5373,22 +6912,22 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(warehouse_dimension.warehouse_key)", - "projection_size": "2 KB", + "Projection_Type": "is_aggregate_projection, has_expressions", + "Is_Segmented": "True", + "Segmentation_key": "hash(readings.meter_id)", + "Projection_size": "0 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "warehouse_dimension_super", + "name": "readings_topk", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.warehouse_dimension_super", + "schemaName": "public.readings_topk", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5407,7 +6946,7 @@ }, "fields": [ { - "fieldPath": "warehouse_key", + "fieldPath": "meter_id", "nullable": true, "description": "", "type": { @@ -5420,67 +6959,28 @@ "isPartOfKey": false }, { - "fieldPath": "warehouse_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=20)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_address", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_city", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=60)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_state", + "fieldPath": "recent_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "warehouse_region", + "fieldPath": "recent_value", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false } @@ -5492,12 +6992,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -5509,12 +7010,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -5525,7 +7027,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "type": "TRANSFORMED" } ] @@ -5533,12 +7035,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5557,12 +7060,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5581,12 +7085,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -5596,13 +7101,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5613,22 +7119,22 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(shipping_dimension.shipping_key)", - "projection_size": "1 KB", + "Projection_Type": "is_aggregate_projection, has_expressions", + "Is_Segmented": "True", + "Segmentation_key": "hash(clicks.page_id, (clicks.click_time)::date)", + "Projection_size": "0 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "shipping_dimension_super", + "name": "clicks_agg", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.shipping_dimension_super", + "schemaName": "public.clicks_agg", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5647,7 +7153,7 @@ }, "fields": [ { - "fieldPath": "shipping_key", + "fieldPath": "page_id", "nullable": true, "description": "", "type": { @@ -5658,45 +7164,6 @@ "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false - }, - { - "fieldPath": "ship_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=30)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "ship_mode", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=10)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "ship_carrier", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=20)", - "recursive": false, - "isPartOfKey": false } ] } @@ -5706,12 +7173,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -5723,12 +7191,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -5739,7 +7208,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "type": "TRANSFORMED" } ] @@ -5747,12 +7216,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5771,12 +7241,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5795,12 +7266,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -5810,13 +7282,14 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5828,21 +7301,21 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", - "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)", - "projection_size": "2566 KB", + "Is_Segmented": "True", + "Segmentation_key": "hash(phrases.phrase)", + "Projection_size": "0 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "inventory_fact_super", + "name": "phrases_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.inventory_fact_super", + "schemaName": "public.phrases_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5861,80 +7334,15 @@ }, "fields": [ { - "fieldPath": "date_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_version", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "qty_in_stock", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "inventory_date", + "fieldPath": "phrase", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false } @@ -5946,12 +7354,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -5963,12 +7372,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -5979,7 +7389,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", "type": "TRANSFORMED" } ] @@ -5987,12 +7397,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -6011,7 +7422,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6035,7 +7447,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6050,7 +7463,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6065,7 +7479,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6082,7 +7497,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6097,7 +7513,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6117,7 +7534,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6141,7 +7559,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6156,7 +7575,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6172,7 +7592,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.393181+00:00" + "create_time": "2023-10-13 11:22:37.879951+00:00", + "table_size": "2 KB" }, "name": "store_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -6441,7 +7862,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6458,7 +7880,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6482,7 +7905,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6506,7 +7930,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6521,7 +7946,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6537,7 +7963,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.404717+00:00" + "create_time": "2023-10-13 11:22:37.890717+00:00", + "table_size": "8646 KB" }, "name": "store_orders_fact", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -6819,7 +8246,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6836,7 +8264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6860,7 +8289,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6884,7 +8314,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6899,7 +8330,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6915,7 +8347,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.396731+00:00" + "create_time": "2023-10-13 11:22:37.883186+00:00", + "table_size": "225060 KB" }, "name": "store_sales_fact", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -7171,7 +8604,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7188,7 +8622,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7212,7 +8647,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7236,7 +8672,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7251,7 +8688,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7269,11 +8707,11 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(store_dimension.store_key)", - "projection_size": "2 KB", + "Projection_size": "2 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "store_dimension_super", @@ -7543,7 +8981,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7560,7 +8999,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7584,7 +9024,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7608,7 +9049,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7632,7 +9074,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7647,7 +9090,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7665,11 +9109,11 @@ "customProperties": { "ROS_Count": "2", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(store_sales_fact.date_key, store_sales_fact.product_key, store_sales_fact.product_version, store_sales_fact.store_key, store_sales_fact.promotion_key, store_sales_fact.customer_key, store_sales_fact.employee_key, store_sales_fact.pos_transaction_number)", - "projection_size": "225089 KB", + "Projection_size": "225060 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "store_sales_fact_super", @@ -7926,7 +9370,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7943,7 +9388,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7967,7 +9413,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -7991,7 +9438,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8015,7 +9463,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8030,7 +9479,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8048,11 +9498,11 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(store_orders_fact.product_key, store_orders_fact.product_version, store_orders_fact.store_key, store_orders_fact.vendor_key, store_orders_fact.employee_key, store_orders_fact.order_number, store_orders_fact.date_ordered, store_orders_fact.date_shipped)", - "projection_size": "8648 KB", + "Projection_size": "8646 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "store_orders_fact_super", @@ -8335,7 +9785,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8352,7 +9803,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8376,7 +9828,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8400,7 +9853,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8424,7 +9878,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8439,7 +9894,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8454,7 +9910,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8471,7 +9928,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8486,7 +9944,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8506,7 +9965,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8530,7 +9990,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8545,7 +10006,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8561,7 +10023,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.415595+00:00" + "create_time": "2023-10-13 11:22:37.900841+00:00", + "table_size": "6 KB" }, "name": "call_center_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -8752,7 +10215,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8769,7 +10233,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8793,7 +10258,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8817,7 +10283,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8832,7 +10299,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8848,7 +10316,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.412266+00:00" + "create_time": "2023-10-13 11:22:37.897788+00:00", + "table_size": "9 KB" }, "name": "online_page_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -8961,7 +10430,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -8978,7 +10448,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9002,7 +10473,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9026,7 +10498,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9041,7 +10514,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9057,7 +10531,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-07-26 06:37:53.419260+00:00" + "create_time": "2023-10-13 11:22:37.903963+00:00", + "table_size": "182356 KB" }, "name": "online_sales_fact", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -9352,7 +10827,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9369,7 +10845,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9393,7 +10870,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9417,7 +10895,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9432,7 +10911,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9450,11 +10930,11 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(online_page_dimension.online_page_key)", - "projection_size": "9 KB", + "Projection_size": "9 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "online_page_dimension_super", @@ -9568,7 +11048,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9585,7 +11066,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9609,7 +11091,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9633,7 +11116,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9657,7 +11141,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9672,7 +11157,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9690,11 +11176,11 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(call_center_dimension.call_center_key)", - "projection_size": "6 KB", + "Projection_size": "6 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "call_center_dimension_super", @@ -9886,7 +11372,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9903,7 +11390,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9927,7 +11415,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9951,7 +11440,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9975,7 +11465,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -9990,7 +11481,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -10008,11 +11500,11 @@ "customProperties": { "ROS_Count": "1", "Projection_Type": "is_super_projection", - "is_segmented": "True", + "Is_Segmented": "True", "Segmentation_key": "hash(online_sales_fact.sale_date_key, online_sales_fact.ship_date_key, online_sales_fact.product_key, online_sales_fact.product_version, online_sales_fact.customer_key, online_sales_fact.call_center_key, online_sales_fact.online_page_key, online_sales_fact.shipping_key)", - "projection_size": "182385 KB", + "Projection_size": "182356 KB", "Partition_Key": "Not Available", - "Partition_Size": "0", + "Number_Of_Partitions": "0", "Projection_Cached": "False" }, "name": "online_sales_fact_super", @@ -10308,7 +11800,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -10325,7 +11818,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -10349,7 +11843,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -10373,7 +11868,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00" + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml index ebd800ee09ff5..a182e54bd53c7 100644 --- a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml +++ b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml @@ -5,6 +5,13 @@ source: database: Vmart username: dbadmin password: abc123 + include_tables: true + include_views: true + include_projections: true + include_models: true + include_view_lineage: true + include_projection_lineage: true + sink: type: file diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py b/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py new file mode 100644 index 0000000000000..7be8b667a500b --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py @@ -0,0 +1,55 @@ +from datahub.api.entities.datacontract.data_quality_assertion import ( + DataQualityAssertion, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.schema_classes import ( + AssertionInfoClass, + AssertionStdOperatorClass, + AssertionStdParameterClass, + AssertionStdParametersClass, + AssertionStdParameterTypeClass, + AssertionTypeClass, + AssertionValueChangeTypeClass, + SqlAssertionInfoClass, + SqlAssertionTypeClass, +) + + +def test_parse_sql_assertion(): + assertion_urn = "urn:li:assertion:a" + entity_urn = "urn:li:dataset:d" + statement = "SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL" + + d = { + "type": "custom_sql", + "sql": statement, + "operator": {"type": "between", "min": 5, "max": 10}, + } + + assert DataQualityAssertion.parse_obj(d).generate_mcp( + assertion_urn, entity_urn + ) == [ + MetadataChangeProposalWrapper( + entityUrn=assertion_urn, + aspect=AssertionInfoClass( + type=AssertionTypeClass.SQL, + sqlAssertion=SqlAssertionInfoClass( + type=SqlAssertionTypeClass.METRIC, + changeType=AssertionValueChangeTypeClass.ABSOLUTE, + entity=entity_urn, + statement="SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL", + operator=AssertionStdOperatorClass.BETWEEN, + parameters=AssertionStdParametersClass( + minValue=AssertionStdParameterClass( + value="5", + type=AssertionStdParameterTypeClass.NUMBER, + ), + maxValue=AssertionStdParameterClass( + value="10", + type=AssertionStdParameterTypeClass.NUMBER, + ), + ), + ), + ), + ) + ] diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json new file mode 100644 index 0000000000000..812566143014b --- /dev/null +++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json @@ -0,0 +1,106 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "run-id", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json new file mode 100644 index 0000000000000..17f4d10728268 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json @@ -0,0 +1,120 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "run-id", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json new file mode 100644 index 0000000000000..c828373c73080 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json @@ -0,0 +1,41 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream1%2CPROD%29", + "value": { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)", + "type": "TRANSFORMED" + } + }, + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream2%2CPROD%29", + "value": { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)", + "type": "TRANSFORMED" + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "run-id", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py new file mode 100644 index 0000000000000..e8485106c6a81 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py @@ -0,0 +1,261 @@ +from typing import List, Optional +from unittest.mock import MagicMock + +import pytest + +import datahub.metadata.schema_classes as models +from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.sink.file import write_metadata_file +from tests.test_helpers import mce_helpers + +platform = "platform" +system_metadata = models.SystemMetadataClass(lastObserved=1643871600000, runId="run-id") + + +def make_lineage_aspect( + dataset_name: str, + upstreams: List[str], + timestamp: int = 0, + columns: List[str] = [], + include_cll: bool = False, +) -> models.UpstreamLineageClass: + """ + Generates dataset properties and upstream lineage aspects + with simple column to column lineage between current dataset and all upstreams + """ + + dataset_urn = make_dataset_urn(platform, dataset_name) + return models.UpstreamLineageClass( + upstreams=[ + models.UpstreamClass( + dataset=upstream_urn, + type=models.DatasetLineageTypeClass.TRANSFORMED, + auditStamp=models.AuditStampClass( + time=timestamp, actor="urn:li:corpuser:unknown" + ), + ) + for upstream_urn in upstreams + ], + fineGrainedLineages=[ + models.FineGrainedLineageClass( + upstreamType=models.FineGrainedLineageUpstreamTypeClass.FIELD_SET, + downstreamType=models.FineGrainedLineageDownstreamTypeClass.FIELD, + upstreams=[ + make_schema_field_urn(upstream_urn, col) + for upstream_urn in upstreams + ], + downstreams=[make_schema_field_urn(dataset_urn, col)], + ) + for col in columns + ] + if include_cll + else None, + ) + + +def base_table_lineage_aspect() -> models.UpstreamLineageClass: + return make_lineage_aspect( + "dataset1", + upstreams=[ + make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"] + ], + ) + + +def base_cll_aspect(timestamp: int = 0) -> models.UpstreamLineageClass: + return make_lineage_aspect( + "dataset1", + upstreams=[ + make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"] + ], + timestamp=timestamp, + columns=["col_a", "col_b", "col_c"], + include_cll=True, + ) + + +def test_incremental_table_lineage(tmp_path, pytestconfig): + test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers" + test_file = tmp_path / "incremental_table_lineage.json" + golden_file = test_resources_dir / "incremental_table_lineage_golden.json" + + urn = make_dataset_urn(platform, "dataset1") + aspect = base_table_lineage_aspect() + + processed_wus = auto_incremental_lineage( + graph=None, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=urn, aspect=aspect, systemMetadata=system_metadata + ).as_workunit() + ], + ) + + write_metadata_file( + test_file, + [wu.metadata for wu in processed_wus], + ) + mce_helpers.check_golden_file( + pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file + ) + + +def test_incremental_table_lineage_empty_upstreams(tmp_path, pytestconfig): + + urn = make_dataset_urn(platform, "dataset1") + aspect = make_lineage_aspect( + "dataset1", + upstreams=[], + ) + + processed_wus = auto_incremental_lineage( + graph=None, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=urn, aspect=aspect, systemMetadata=system_metadata + ).as_workunit() + ], + ) + + assert [wu.metadata for wu in processed_wus] == [] + + +@pytest.mark.parametrize( + "gms_aspect,current_aspect,output_aspect", + [ + # emitting CLL upstreamLineage over table level upstreamLineage + [ + base_table_lineage_aspect(), + base_cll_aspect(), + base_cll_aspect(), + ], + # emitting upstreamLineage for the first time + [ + None, + base_cll_aspect(), + base_cll_aspect(), + ], + # emitting CLL upstreamLineage over same CLL upstreamLineage + [ + base_cll_aspect(), + base_cll_aspect(), + base_cll_aspect(), + ], + # emitting CLL upstreamLineage over same CLL upstreamLineage but with earlier timestamp + [ + base_cll_aspect(), # default timestamp is 0 + base_cll_aspect(timestamp=1643871600000), + base_cll_aspect(timestamp=1643871600000), + ], + ], +) +def test_incremental_column_level_lineage( + gms_aspect: Optional[models.UpstreamLineageClass], + current_aspect: models.UpstreamLineageClass, + output_aspect: models.UpstreamLineageClass, +) -> None: + mock_graph = MagicMock() + mock_graph.get_aspect.return_value = gms_aspect + dataset_urn = make_dataset_urn(platform, "dataset1") + + processed_wus = auto_incremental_lineage( + graph=mock_graph, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=current_aspect, + systemMetadata=system_metadata, + ).as_workunit() + ], + ) + + wu: MetadataWorkUnit = next(iter(processed_wus)) + aspect = wu.get_aspect_of_type(models.UpstreamLineageClass) + assert aspect == output_aspect + + +def test_incremental_column_lineage_less_upstreams_in_gms_aspect( + tmp_path, pytestconfig +): + test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers" + test_file = tmp_path / "incremental_cll_less_upstreams_in_gms_aspect.json" + golden_file = ( + test_resources_dir / "incremental_cll_less_upstreams_in_gms_aspect_golden.json" + ) + + urn = make_dataset_urn(platform, "dataset1") + aspect = base_cll_aspect() + + mock_graph = MagicMock() + mock_graph.get_aspect.return_value = make_lineage_aspect( + "dataset1", + upstreams=[make_dataset_urn(platform, name) for name in ["upstream1"]], + columns=["col_a", "col_b", "col_c"], + include_cll=True, + ) + + processed_wus = auto_incremental_lineage( + graph=mock_graph, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=urn, aspect=aspect, systemMetadata=system_metadata + ).as_workunit() + ], + ) + + write_metadata_file( + test_file, + [wu.metadata for wu in processed_wus], + ) + mce_helpers.check_golden_file( + pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file + ) + + +def test_incremental_column_lineage_more_upstreams_in_gms_aspect( + tmp_path, pytestconfig +): + test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers" + test_file = tmp_path / "incremental_cll_more_upstreams_in_gms_aspect.json" + golden_file = ( + test_resources_dir / "incremental_cll_more_upstreams_in_gms_aspect_golden.json" + ) + + urn = make_dataset_urn(platform, "dataset1") + aspect = base_cll_aspect() + + mock_graph = MagicMock() + mock_graph.get_aspect.return_value = make_lineage_aspect( + "dataset1", + upstreams=[ + make_dataset_urn(platform, name) + for name in ["upstream1", "upstream2", "upstream3"] + ], + columns=["col_a", "col_b", "col_c"], + include_cll=True, + ) + + processed_wus = auto_incremental_lineage( + graph=mock_graph, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=urn, aspect=aspect, systemMetadata=system_metadata + ).as_workunit() + ], + ) + + write_metadata_file( + test_file, + [wu.metadata for wu in processed_wus], + ) + mce_helpers.check_golden_file( + pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file + ) diff --git a/metadata-ingestion/tests/unit/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py similarity index 86% rename from metadata-ingestion/tests/unit/test_source_helpers.py rename to metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py index b6ec6ebce240c..b667af8bb41e9 100644 --- a/metadata-ingestion/tests/unit/test_source_helpers.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py @@ -16,6 +16,7 @@ from datahub.ingestion.api.source_helpers import ( auto_browse_path_v2, auto_empty_dataset_usage_statistics, + auto_lowercase_urns, auto_status_aspect, auto_workunit, ) @@ -275,6 +276,75 @@ def test_auto_browse_path_v2_legacy_browse_path(telemetry_ping_mock): assert paths["platform,dataset-2,PROD)"] == _make_browse_path_entries(["something"]) +def test_auto_lowercase_aspects(): + mcws = auto_workunit( + [ + MetadataChangeProposalWrapper( + entityUrn=make_dataset_urn( + "bigquery", "myProject.mySchema.myTable", "PROD" + ), + aspect=models.DatasetKeyClass( + "urn:li:dataPlatform:bigquery", "myProject.mySchema.myTable", "PROD" + ), + ), + MetadataChangeProposalWrapper( + entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a", + aspect=models.ContainerPropertiesClass( + name="test", + ), + ), + models.MetadataChangeEventClass( + proposedSnapshot=models.DatasetSnapshotClass( + urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-Public-Data.Covid19_Aha.staffing,PROD)", + aspects=[ + models.DatasetPropertiesClass( + customProperties={ + "key": "value", + }, + ), + ], + ), + ), + ] + ) + + expected = [ + *list( + auto_workunit( + [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.myschema.mytable,PROD)", + aspect=models.DatasetKeyClass( + "urn:li:dataPlatform:bigquery", + "myProject.mySchema.myTable", + "PROD", + ), + ), + MetadataChangeProposalWrapper( + entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a", + aspect=models.ContainerPropertiesClass( + name="test", + ), + ), + models.MetadataChangeEventClass( + proposedSnapshot=models.DatasetSnapshotClass( + urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_aha.staffing,PROD)", + aspects=[ + models.DatasetPropertiesClass( + customProperties={ + "key": "value", + }, + ), + ], + ), + ), + ] + ) + ), + ] + assert list(auto_lowercase_urns(mcws)) == expected + + @patch("datahub.ingestion.api.source_helpers.telemetry.telemetry_instance.ping") def test_auto_browse_path_v2_container_over_legacy_browse_path(telemetry_ping_mock): structure = {"a": {"b": ["c"]}} diff --git a/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py b/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py index cbd5be9e7d832..4a69deb572fbd 100644 --- a/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py +++ b/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py @@ -1,14 +1,14 @@ import tempfile from typing import List, Type -import avro.schema import pandas as pd import ujson from avro import schema as avro_schema from avro.datafile import DataFileWriter from avro.io import DatumWriter -from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet +from datahub.ingestion.source.schema_inference import csv_tsv, json, parquet +from datahub.ingestion.source.schema_inference.avro import AvroInferrer from datahub.metadata.com.linkedin.pegasus2avro.schema import ( BooleanTypeClass, NumberTypeClass, @@ -123,7 +123,7 @@ def test_infer_schema_avro(): file.seek(0) - fields = avro.AvroInferrer().infer_schema(file) + fields = AvroInferrer().infer_schema(file) fields.sort(key=lambda x: x.fieldPath) assert_field_paths_match(fields, expected_field_paths_avro) diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py index d116f1f5473fa..d2d6a0bdda5b9 100644 --- a/metadata-ingestion/tests/unit/serde/test_serde.py +++ b/metadata-ingestion/tests/unit/serde/test_serde.py @@ -238,7 +238,7 @@ def test_missing_optional_simple() -> None: "criteria": [ { "condition": "EQUALS", - "field": "RESOURCE_TYPE", + "field": "TYPE", "values": ["notebook", "dataset", "dashboard"], } ] @@ -252,7 +252,7 @@ def test_missing_optional_simple() -> None: "criteria": [ { "condition": "EQUALS", - "field": "RESOURCE_TYPE", + "field": "TYPE", "values": ["notebook", "dataset", "dashboard"], } ] @@ -267,13 +267,13 @@ def test_missing_optional_simple() -> None: def test_missing_optional_in_union() -> None: # This one doesn't contain any optional fields and should work fine. revised_json = json.loads( - '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}' + '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}' ) revised = models.DataHubPolicyInfoClass.from_obj(revised_json) # This one is missing the optional filters.allResources field. original_json = json.loads( - '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}' + '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}' ) original = models.DataHubPolicyInfoClass.from_obj(original_json) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json index e50d944ce72e3..d610b0a83f229 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json @@ -12,7 +12,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)", - "column": "col5" + "column": "col5", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -24,7 +30,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)", - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -36,7 +48,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)", - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -48,7 +66,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)", - "column": "col3" + "column": "col3", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json index 78591286feb50..2d3d188d28316 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json index 0e93d31fbb6a6..41ae0885941b0 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json index 78591286feb50..2d3d188d28316 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json index 17a801a63e3ff..26f8f8f59a3ff 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json @@ -10,7 +10,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)", - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -22,7 +28,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)", - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -34,7 +46,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)", - "column": "something" + "column": "something", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json index fd8a586ac74ac..83365c09f69c2 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json @@ -11,7 +11,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)", - "column": "col1" + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { @@ -27,7 +33,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)", - "column": "col2" + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json index 4773974545bfa..cf31b71cb50f6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json @@ -4,5 +4,58 @@ "out_tables": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)" ], - "column_lineage": null + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "id", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INTEGER" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "month", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "total_cost", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "REAL" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "area", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "REAL" + }, + "upstreams": [] + } + ] } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json index 1ca56840531e4..8a6b60d0f1bde 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json @@ -10,7 +10,9 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)", - "column": "Department" + "column": "Department", + "column_type": null, + "native_column_type": null }, "upstreams": [ { @@ -22,14 +24,22 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)", - "column": "Employees" + "column": "Employees", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "NUMBER" }, "upstreams": [] }, { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)", - "column": "Salary" + "column": "Salary", + "column_type": null, + "native_column_type": null }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json index e241bdd08e243..eecb2265eaec5 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "total_agg" + "column": "total_agg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "orderkey" + "column": "orderkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" }, "upstreams": [ { @@ -32,7 +44,13 @@ { "downstream": { "table": null, - "column": "custkey" + "column": "custkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" }, "upstreams": [ { @@ -44,7 +62,13 @@ { "downstream": { "table": null, - "column": "orderstatus" + "column": "orderstatus", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" }, "upstreams": [ { @@ -56,7 +80,13 @@ { "downstream": { "table": null, - "column": "totalprice" + "column": "totalprice", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { @@ -68,7 +98,13 @@ { "downstream": { "table": null, - "column": "orderdate" + "column": "orderdate", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "native_column_type": "DATE" }, "upstreams": [ { @@ -80,7 +116,13 @@ { "downstream": { "table": null, - "column": "orderpriority" + "column": "orderpriority", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" }, "upstreams": [ { @@ -92,7 +134,13 @@ { "downstream": { "table": null, - "column": "clerk" + "column": "clerk", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" }, "upstreams": [ { @@ -104,7 +152,13 @@ { "downstream": { "table": null, - "column": "shippriority" + "column": "shippriority", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" }, "upstreams": [ { @@ -116,7 +170,13 @@ { "downstream": { "table": null, - "column": "comment" + "column": "comment", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json index d7264fd2db6b2..326db47e7ab33 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json @@ -18,21 +18,27 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "i_item_desc" + "column": "i_item_desc", + "column_type": null, + "native_column_type": null }, "upstreams": [] }, { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "w_warehouse_name" + "column": "w_warehouse_name", + "column_type": null, + "native_column_type": null }, "upstreams": [] }, { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "d_week_seq" + "column": "d_week_seq", + "column_type": null, + "native_column_type": null }, "upstreams": [ { @@ -44,7 +50,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "no_promo" + "column": "no_promo", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" }, "upstreams": [ { @@ -56,7 +68,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "promo" + "column": "promo", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" }, "upstreams": [ { @@ -68,7 +86,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)", - "column": "total_cnt" + "column": "total_cnt", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" }, "upstreams": [] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json index 10f5ee20b0c1f..b5fd5eebeb1b1 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json @@ -9,21 +9,27 @@ { "downstream": { "table": null, - "column": "a" + "column": "a", + "column_type": null, + "native_column_type": null }, "upstreams": [] }, { "downstream": { "table": null, - "column": "b" + "column": "b", + "column_type": null, + "native_column_type": null }, "upstreams": [] }, { "downstream": { "table": null, - "column": "c" + "column": "c", + "column_type": null, + "native_column_type": null }, "upstreams": [] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json index 9f6eeae46c294..a67c944822138 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)" + "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json index 109de96180422..2424fcda34752 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "post_id" + "column": "post_id", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "NUMERIC" }, "upstreams": [ { @@ -20,7 +26,9 @@ { "downstream": { "table": null, - "column": "id" + "column": "id", + "column_type": null, + "native_column_type": null }, "upstreams": [ { @@ -32,7 +40,9 @@ { "downstream": { "table": null, - "column": "min_metric" + "column": "min_metric", + "column_type": null, + "native_column_type": null }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json index 2340b2e95b0d0..902aa010c8afc 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json @@ -9,14 +9,26 @@ { "downstream": { "table": null, - "column": "label" + "column": "label", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" }, "upstreams": [] }, { "downstream": { "table": null, - "column": "total_agg" + "column": "total_agg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json index 326c07d332c26..6ea88f45847ce 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json @@ -8,7 +8,9 @@ { "downstream": { "table": null, - "column": "max_col" + "column": "max_col", + "column_type": null, + "native_column_type": null }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json index 3e02314d6e8c3..67e9fd2d21a0e 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json @@ -9,7 +9,9 @@ { "downstream": { "table": null, - "column": "COL1" + "column": "COL1", + "column_type": null, + "native_column_type": null }, "upstreams": [ { @@ -21,7 +23,9 @@ { "downstream": { "table": null, - "column": "COL3" + "column": "COL3", + "column_type": null, + "native_column_type": null }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json index c12ad23b2f03b..8dd2633eff612 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "post_id" + "column": "post_id", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "NUMERIC" }, "upstreams": [ { @@ -20,7 +26,9 @@ { "downstream": { "table": null, - "column": "id" + "column": "id", + "column_type": null, + "native_column_type": null }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json index 64cd80e9a2d69..a876824127ec1 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "total_price_category" + "column": "total_price_category", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "total_price_success" + "column": "total_price_success", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json new file mode 100644 index 0000000000000..7545e2b3269dc --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json @@ -0,0 +1,63 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "orderkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL(20, 0)" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "o_orderkey" + } + ] + }, + { + "downstream": { + "table": null, + "column": "total_cast_int", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "o_totalprice" + } + ] + }, + { + "downstream": { + "table": null, + "column": "total_cast_float", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL(16, 4)" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "o_totalprice" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json index 7b22a46757e39..84e6b053000f1 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json @@ -8,7 +8,13 @@ { "downstream": { "table": null, - "column": "total_agg" + "column": "total_agg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { @@ -20,7 +26,13 @@ { "downstream": { "table": null, - "column": "total_avg" + "column": "total_avg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { @@ -32,7 +44,13 @@ { "downstream": { "table": null, - "column": "total_min" + "column": "total_min", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { @@ -44,7 +62,13 @@ { "downstream": { "table": null, - "column": "total_max" + "column": "total_max", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json index c912d99a3a8a3..39c94cf83c561 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json @@ -10,7 +10,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)", - "column": "Total_Agg" + "column": "Total_Agg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { @@ -22,7 +28,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)", - "column": "total_avg" + "column": "total_avg", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DOUBLE" }, "upstreams": [ { @@ -34,7 +46,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)", - "column": "TOTAL_MIN" + "column": "TOTAL_MIN", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { @@ -46,7 +64,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)", - "column": "total_max" + "column": "total_max", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "FLOAT" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json index 2af308ec60623..dbf5b1b9a4453 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json @@ -11,7 +11,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "user_fk" + "column": "user_fk", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL(38, 0)" }, "upstreams": [ { @@ -23,7 +29,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "email" + "column": "email", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR(16777216)" }, "upstreams": [ { @@ -35,7 +47,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "last_purchase_date" + "column": "last_purchase_date", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "native_column_type": "DATE" }, "upstreams": [ { @@ -47,7 +65,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "lifetime_purchase_amount" + "column": "lifetime_purchase_amount", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" }, "upstreams": [ { @@ -59,7 +83,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "lifetime_purchase_count" + "column": "lifetime_purchase_count", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "BIGINT" }, "upstreams": [ { @@ -71,7 +101,13 @@ { "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)", - "column": "average_purchase_amount" + "column": "average_purchase_amount", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json new file mode 100644 index 0000000000000..e2baa34e7fe28 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json @@ -0,0 +1,56 @@ +{ + "query_type": "UPDATE", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col1" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col2" + } + ] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col1" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)", + "column": "col2" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json new file mode 100644 index 0000000000000..b41ed61b37cdb --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json @@ -0,0 +1,35 @@ +{ + "query_type": "UPDATE", + "in_tables": [], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "orderkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "totalprice", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json new file mode 100644 index 0000000000000..ee80285d87f60 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json @@ -0,0 +1,40 @@ +{ + "query_type": "CREATE", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)", + "column": "PatientId", + "column_type": null, + "native_column_type": "INTEGER()" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)", + "column": "PatientId" + } + ] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)", + "column": "BMI", + "column_type": null, + "native_column_type": "FLOAT()" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)", + "column": "BMI" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 2a965a9bb1e61..dfc5b486abd35 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -3,6 +3,7 @@ import pytest from datahub.testing.check_sql_parser_result import assert_sql_result +from datahub.utilities.sqlglot_lineage import _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens" @@ -608,4 +609,162 @@ def test_snowflake_default_normalization(): ) +def test_snowflake_column_cast(): + assert_sql_result( + """ +SELECT + o.o_orderkey::NUMBER(20,0) as orderkey, + CAST(o.o_totalprice AS INT) as total_cast_int, + CAST(o.o_totalprice AS NUMBER(16,4)) as total_cast_float +FROM snowflake_sample_data.tpch_sf1.orders o +LIMIT 10 +""", + dialect="snowflake", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": { + "orderkey": "NUMBER(38,0)", + "totalprice": "NUMBER(12,2)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_column_cast.json", + ) + + # TODO: Add a test for setting platform_instance or env + + +def test_teradata_default_normalization(): + assert_sql_result( + """ +create table demo_user.test_lineage2 as + ( + select + ppd.PatientId, + ppf.bmi + from + demo_user.pima_patient_features ppf + join demo_user.pima_patient_diagnoses ppd on + ppd.PatientId = ppf.PatientId + ) with data; +""", + dialect="teradata", + default_schema="dbc", + platform_instance="myteradata", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)": { + "HasDiabetes": "INTEGER()", + "PatientId": "INTEGER()", + }, + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)": { + "Age": "INTEGER()", + "BMI": "FLOAT()", + "BloodP": "INTEGER()", + "DiPedFunc": "FLOAT()", + "NumTimesPrg": "INTEGER()", + "PatientId": "INTEGER()", + "PlGlcConc": "INTEGER()", + "SkinThick": "INTEGER()", + "TwoHourSerIns": "INTEGER()", + }, + "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)": { + "BMI": "FLOAT()", + "PatientId": "INTEGER()", + }, + }, + expected_file=RESOURCE_DIR / "test_teradata_default_normalization.json", + ) + + +def test_snowflake_update_hardcoded(): + assert_sql_result( + """ +UPDATE snowflake_sample_data.tpch_sf1.orders +SET orderkey = 1, totalprice = 2 +WHERE orderkey = 3 +""", + dialect="snowflake", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": { + "orderkey": "NUMBER(38,0)", + "totalprice": "NUMBER(12,2)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_update_hardcoded.json", + ) + + +def test_update_from_select(): + assert _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT == {"returning", "this"} + + +def test_snowflake_update_from_table(): + # Can create these tables with the following SQL: + """ + -- Create or replace my_table + CREATE OR REPLACE TABLE my_table ( + id INT IDENTITY PRIMARY KEY, + col1 VARCHAR(50), + col2 VARCHAR(50) + ); + + -- Create or replace table1 + CREATE OR REPLACE TABLE table1 ( + id INT IDENTITY PRIMARY KEY, + col1 VARCHAR(50), + col2 VARCHAR(50) + ); + + -- Create or replace table2 + CREATE OR REPLACE TABLE table2 ( + id INT IDENTITY PRIMARY KEY, + col2 VARCHAR(50) + ); + + -- Insert data into my_table + INSERT INTO my_table (col1, col2) + VALUES ('foo', 'bar'), + ('baz', 'qux'); + + -- Insert data into table1 + INSERT INTO table1 (col1, col2) + VALUES ('foo', 'bar'), + ('baz', 'qux'); + + -- Insert data into table2 + INSERT INTO table2 (col2) + VALUES ('bar'), + ('qux'); + """ + + assert_sql_result( + """ +UPDATE my_table +SET + col1 = t1.col1 || t1.col2, + col2 = t1.col1 || t2.col2 +FROM table1 t1 +JOIN table2 t2 ON t1.id = t2.id +WHERE my_table.id = t1.id; +""", + dialect="snowflake", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_update_from_table.json", + ) diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py index 2558f6a46715e..23dd7dd5a6e45 100644 --- a/metadata-ingestion/tests/unit/test_athena_source.py +++ b/metadata-ingestion/tests/unit/test_athena_source.py @@ -3,14 +3,17 @@ import pytest from freezegun import freeze_time +from sqlalchemy import types +from sqlalchemy_bigquery import STRUCT from datahub.ingestion.api.common import PipelineContext -from src.datahub.ingestion.source.aws.s3_util import make_s3_urn +from datahub.ingestion.source.aws.s3_util import make_s3_urn +from datahub.ingestion.source.sql.athena import CustomAthenaRestDialect +from datahub.utilities.sqlalchemy_type_converter import MapType FROZEN_TIME = "2020-04-14 07:00:00" -@pytest.mark.integration def test_athena_config_query_location_old_plus_new_value_not_allowed(): from datahub.ingestion.source.sql.athena import AthenaConfig @@ -25,7 +28,6 @@ def test_athena_config_query_location_old_plus_new_value_not_allowed(): ) -@pytest.mark.integration def test_athena_config_staging_dir_is_set_as_query_result(): from datahub.ingestion.source.sql.athena import AthenaConfig @@ -48,7 +50,6 @@ def test_athena_config_staging_dir_is_set_as_query_result(): assert config.json() == expected_config.json() -@pytest.mark.integration def test_athena_uri(): from datahub.ingestion.source.sql.athena import AthenaConfig @@ -59,9 +60,12 @@ def test_athena_uri(): "work_group": "test-workgroup", } ) - assert ( - config.get_sql_alchemy_url() - == "awsathena+rest://@athena.us-west-1.amazonaws.com:443/?s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F&work_group=test-workgroup&catalog_name=awsdatacatalog&duration_seconds=3600" + assert config.get_sql_alchemy_url() == ( + "awsathena+rest://@athena.us-west-1.amazonaws.com:443" + "?catalog_name=awsdatacatalog" + "&duration_seconds=3600" + "&s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F" + "&work_group=test-workgroup" ) @@ -104,7 +108,7 @@ def test_athena_get_table_properties(): mock_cursor = mock.MagicMock() mock_inspector = mock.MagicMock() mock_inspector.engine.raw_connection().cursor.return_value = mock_cursor - mock_cursor._get_table_metadata.return_value = AthenaTableMetadata( + mock_cursor.get_table_metadata.return_value = AthenaTableMetadata( response=table_metadata ) @@ -126,3 +130,81 @@ def test_athena_get_table_properties(): } assert location == make_s3_urn("s3://testLocation", "PROD") + + +def test_get_column_type_simple_types(): + assert isinstance( + CustomAthenaRestDialect()._get_column_type(type_="int"), types.Integer + ) + assert isinstance( + CustomAthenaRestDialect()._get_column_type(type_="string"), types.String + ) + assert isinstance( + CustomAthenaRestDialect()._get_column_type(type_="boolean"), types.BOOLEAN + ) + assert isinstance( + CustomAthenaRestDialect()._get_column_type(type_="long"), types.BIGINT + ) + assert isinstance( + CustomAthenaRestDialect()._get_column_type(type_="double"), types.FLOAT + ) + + +def test_get_column_type_array(): + result = CustomAthenaRestDialect()._get_column_type(type_="array") + + assert isinstance(result, types.ARRAY) + assert isinstance(result.item_type, types.String) + + +def test_get_column_type_map(): + result = CustomAthenaRestDialect()._get_column_type(type_="map") + + assert isinstance(result, MapType) + assert isinstance(result.types[0], types.String) + assert isinstance(result.types[1], types.Integer) + + +def test_column_type_struct(): + + result = CustomAthenaRestDialect()._get_column_type(type_="struct") + + assert isinstance(result, STRUCT) + assert isinstance(result._STRUCT_fields[0], tuple) + assert result._STRUCT_fields[0][0] == "test" + assert isinstance(result._STRUCT_fields[0][1], types.String) + + +def test_column_type_complex_combination(): + + result = CustomAthenaRestDialect()._get_column_type( + type_="struct>>" + ) + + assert isinstance(result, STRUCT) + + assert isinstance(result._STRUCT_fields[0], tuple) + assert result._STRUCT_fields[0][0] == "id" + assert isinstance(result._STRUCT_fields[0][1], types.String) + + assert isinstance(result._STRUCT_fields[1], tuple) + assert result._STRUCT_fields[1][0] == "name" + assert isinstance(result._STRUCT_fields[1][1], types.String) + + assert isinstance(result._STRUCT_fields[2], tuple) + assert result._STRUCT_fields[2][0] == "choices" + assert isinstance(result._STRUCT_fields[2][1], types.ARRAY) + + assert isinstance(result._STRUCT_fields[2][1].item_type, STRUCT) + + assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[0], tuple) + assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][0] == "id" + assert isinstance( + result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][1], types.String + ) + + assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[1], tuple) + assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][0] == "label" + assert isinstance( + result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][1], types.String + ) diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index e9e91361f49f4..4cfa5c48d2377 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -53,6 +53,59 @@ def test_bigquery_uri_on_behalf(): assert config.get_sql_alchemy_url() == "bigquery://test-project-on-behalf" +def test_bigquery_dataset_pattern(): + config = BigQueryV2Config.parse_obj( + { + "dataset_pattern": { + "allow": [ + "test-dataset", + "test-project.test-dataset", + ".*test-dataset", + ], + "deny": [ + "^test-dataset-2$", + "project\\.second_dataset", + ], + }, + } + ) + assert config.dataset_pattern.allow == [ + r".*\.test-dataset", + r"test-project.test-dataset", + r".*test-dataset", + ] + assert config.dataset_pattern.deny == [ + r"^.*\.test-dataset-2$", + r"project\.second_dataset", + ] + + config = BigQueryV2Config.parse_obj( + { + "dataset_pattern": { + "allow": [ + "test-dataset", + "test-project.test-dataset", + ".*test-dataset", + ], + "deny": [ + "^test-dataset-2$", + "project\\.second_dataset", + ], + }, + "match_fully_qualified_names": False, + } + ) + assert config.dataset_pattern.allow == [ + r"test-dataset", + r"test-project.test-dataset", + r".*test-dataset", + ] + assert config.dataset_pattern.deny == [ + r"^test-dataset-2$", + r"project\.second_dataset", + ] + + def test_bigquery_uri_with_credential(): expected_credential_json = { "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", @@ -765,11 +818,14 @@ def test_gen_view_dataset_workunits( ("project.dataset.table_20231215", "project.dataset.table", "20231215"), ("project.dataset.table_2023", "project.dataset.table_2023", None), # incorrectly handled special case where dataset itself is a sharded table if full name is specified - ("project.dataset.20231215", "project.dataset.20231215", None), + ("project.dataset.20231215", "project.dataset.20231215", "20231215"), + ("project1.dataset2.20231215", "project1.dataset2.20231215", "20231215"), # Cases with Just the table name as input ("table", "table", None), - ("table20231215", "table20231215", None), + ("table20231215", "table", "20231215"), ("table_20231215", "table", "20231215"), + ("table2_20231215", "table2", "20231215"), + ("table220231215", "table220231215", None), ("table_1624046611000_name", "table_1624046611000_name", None), ("table_1624046611000", "table_1624046611000", None), # Special case where dataset itself is a sharded table @@ -801,7 +857,6 @@ def test_get_table_and_shard_default( ("project.dataset.2023", "project.dataset.2023", None), # Cases with Just the table name as input ("table", "table", None), - ("table20231215", "table20231215", None), ("table_20231215", "table", "20231215"), ("table_2023", "table", "2023"), ("table_1624046611000_name", "table_1624046611000_name", None), @@ -842,7 +897,7 @@ def test_get_table_and_shard_custom_shard_pattern( "project.dataset.table_1624046611000_name", ), ("project.dataset.table_1624046611000", "project.dataset.table_1624046611000"), - ("project.dataset.table20231215", "project.dataset.table20231215"), + ("project.dataset.table20231215", "project.dataset.table"), ("project.dataset.table_*", "project.dataset.table"), ("project.dataset.table_2023*", "project.dataset.table"), ("project.dataset.table_202301*", "project.dataset.table"), diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py index 4cf42da4395f9..44fd840f28d59 100644 --- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py +++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py @@ -144,10 +144,10 @@ def test_bigquery_table_sanitasitation(): assert new_table_ref.dataset == "dataset-4567" table_ref = BigQueryTableRef( - BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20222110") + BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20221210") ) new_table_identifier = table_ref.table_identifier - assert new_table_identifier.table == "foo_20222110" + assert new_table_identifier.table == "foo_20221210" assert new_table_identifier.is_sharded_table() assert new_table_identifier.get_table_display_name() == "foo" assert new_table_identifier.project_id == "project-1234" diff --git a/metadata-ingestion/tests/unit/test_clickhouse_source.py b/metadata-ingestion/tests/unit/test_clickhouse_source.py index de7e7d66f2129..1b2ffb70c8d19 100644 --- a/metadata-ingestion/tests/unit/test_clickhouse_source.py +++ b/metadata-ingestion/tests/unit/test_clickhouse_source.py @@ -26,9 +26,7 @@ def test_clickhouse_uri_native(): "scheme": "clickhouse+native", } ) - assert ( - config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111/" - ) + assert config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111" def test_clickhouse_uri_native_secure(): diff --git a/metadata-ingestion/tests/unit/test_redshift_config.py b/metadata-ingestion/tests/unit/test_redshift_config.py new file mode 100644 index 0000000000000..8a165e7f5f3fe --- /dev/null +++ b/metadata-ingestion/tests/unit/test_redshift_config.py @@ -0,0 +1,6 @@ +from datahub.ingestion.source.redshift.config import RedshiftConfig + + +def test_incremental_lineage_default_to_false(): + config = RedshiftConfig(host_port="localhost:5439", database="test") + assert config.incremental_lineage is False diff --git a/metadata-ingestion/tests/unit/test_redshift_lineage.py b/metadata-ingestion/tests/unit/test_redshift_lineage.py index c7d6ac18e044c..db5af3a71efb9 100644 --- a/metadata-ingestion/tests/unit/test_redshift_lineage.py +++ b/metadata-ingestion/tests/unit/test_redshift_lineage.py @@ -1,6 +1,8 @@ +from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor from datahub.ingestion.source.redshift.report import RedshiftReport +from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef def test_get_sources_from_query(): @@ -10,14 +12,20 @@ def test_get_sources_from_query(): test_query = """ select * from my_schema.my_table """ - lineage_extractor = RedshiftLineageExtractor(config, report) - lineage_datasets = lineage_extractor._get_sources_from_query( + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + lineage_datasets, _ = lineage_extractor._get_sources_from_query( db_name="test", query=test_query ) assert len(lineage_datasets) == 1 lineage = lineage_datasets[0] - assert lineage.path == "test.my_schema.my_table" + + assert ( + lineage.urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)" + ) def test_get_sources_from_query_with_only_table_name(): @@ -27,14 +35,20 @@ def test_get_sources_from_query_with_only_table_name(): test_query = """ select * from my_table """ - lineage_extractor = RedshiftLineageExtractor(config, report) - lineage_datasets = lineage_extractor._get_sources_from_query( + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + lineage_datasets, _ = lineage_extractor._get_sources_from_query( db_name="test", query=test_query ) assert len(lineage_datasets) == 1 lineage = lineage_datasets[0] - assert lineage.path == "test.public.my_table" + + assert ( + lineage.urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)" + ) def test_get_sources_from_query_with_database(): @@ -44,14 +58,20 @@ def test_get_sources_from_query_with_database(): test_query = """ select * from test.my_schema.my_table """ - lineage_extractor = RedshiftLineageExtractor(config, report) - lineage_datasets = lineage_extractor._get_sources_from_query( + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + lineage_datasets, _ = lineage_extractor._get_sources_from_query( db_name="test", query=test_query ) assert len(lineage_datasets) == 1 lineage = lineage_datasets[0] - assert lineage.path == "test.my_schema.my_table" + + assert ( + lineage.urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)" + ) def test_get_sources_from_query_with_non_default_database(): @@ -61,14 +81,20 @@ def test_get_sources_from_query_with_non_default_database(): test_query = """ select * from test2.my_schema.my_table """ - lineage_extractor = RedshiftLineageExtractor(config, report) - lineage_datasets = lineage_extractor._get_sources_from_query( + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + lineage_datasets, _ = lineage_extractor._get_sources_from_query( db_name="test", query=test_query ) assert len(lineage_datasets) == 1 lineage = lineage_datasets[0] - assert lineage.path == "test2.my_schema.my_table" + + assert ( + lineage.urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,test2.my_schema.my_table,PROD)" + ) def test_get_sources_from_query_with_only_table(): @@ -78,27 +104,48 @@ def test_get_sources_from_query_with_only_table(): test_query = """ select * from my_table """ - lineage_extractor = RedshiftLineageExtractor(config, report) - lineage_datasets = lineage_extractor._get_sources_from_query( + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + lineage_datasets, _ = lineage_extractor._get_sources_from_query( db_name="test", query=test_query ) assert len(lineage_datasets) == 1 lineage = lineage_datasets[0] - assert lineage.path == "test.public.my_table" + + assert ( + lineage.urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)" + ) -def test_get_sources_from_query_with_four_part_table_should_throw_exception(): +def test_cll(): config = RedshiftConfig(host_port="localhost:5439", database="test") report = RedshiftReport() test_query = """ - select * from database.schema.my_table.test + select a,b,c from db.public.customer inner join db.public.order on db.public.customer.id = db.public.order.customer_id """ - lineage_extractor = RedshiftLineageExtractor(config, report) - try: - lineage_extractor._get_sources_from_query(db_name="test", query=test_query) - except ValueError: - pass - - assert f"{test_query} should have thrown a ValueError exception but it didn't" + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo") + ) + _, cll = lineage_extractor._get_sources_from_query(db_name="db", query=test_query) + + assert cll == [ + ColumnLineageInfo( + downstream=DownstreamColumnRef(table=None, column="a"), + upstreams=[], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef(table=None, column="b"), + upstreams=[], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef(table=None, column="c"), + upstreams=[], + logic=None, + ), + ] diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py index 1c26ca2487e5c..888a7c0441554 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_source.py @@ -179,10 +179,12 @@ def test_snowflake_uri_default_authentication(): } ) - assert ( - config.get_sql_alchemy_url() - == "snowflake://user:password@acctname/?authenticator=SNOWFLAKE&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" + assert config.get_sql_alchemy_url() == ( + "snowflake://user:password@acctname" + "?application=acryl_datahub" + "&authenticator=SNOWFLAKE" + "&role=sysadmin" + "&warehouse=COMPUTE_WH" ) @@ -198,10 +200,12 @@ def test_snowflake_uri_external_browser_authentication(): } ) - assert ( - config.get_sql_alchemy_url() - == "snowflake://user@acctname/?authenticator=EXTERNALBROWSER&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" + assert config.get_sql_alchemy_url() == ( + "snowflake://user@acctname" + "?application=acryl_datahub" + "&authenticator=EXTERNALBROWSER" + "&role=sysadmin" + "&warehouse=COMPUTE_WH" ) @@ -219,10 +223,12 @@ def test_snowflake_uri_key_pair_authentication(): } ) - assert ( - config.get_sql_alchemy_url() - == "snowflake://user@acctname/?authenticator=SNOWFLAKE_JWT&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" + assert config.get_sql_alchemy_url() == ( + "snowflake://user@acctname" + "?application=acryl_datahub" + "&authenticator=SNOWFLAKE_JWT" + "&role=sysadmin" + "&warehouse=COMPUTE_WH" ) diff --git a/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py new file mode 100644 index 0000000000000..0384e1f918881 --- /dev/null +++ b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py @@ -0,0 +1,20 @@ +from collections import defaultdict +from datetime import datetime +from typing import Dict + +from datahub.utilities.ratelimiter import RateLimiter + + +def test_rate_is_limited(): + MAX_CALLS_PER_SEC = 5 + TOTAL_CALLS = 18 + actual_calls: Dict[float, int] = defaultdict(lambda: 0) + + ratelimiter = RateLimiter(max_calls=MAX_CALLS_PER_SEC, period=1) + for _ in range(TOTAL_CALLS): + with ratelimiter: + actual_calls[datetime.now().replace(microsecond=0).timestamp()] += 1 + + assert len(actual_calls) == round(TOTAL_CALLS / MAX_CALLS_PER_SEC) + assert all(calls <= MAX_CALLS_PER_SEC for calls in actual_calls.values()) + assert sum(actual_calls.values()) == TOTAL_CALLS diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py new file mode 100644 index 0000000000000..6c719d351c4c2 --- /dev/null +++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py @@ -0,0 +1,93 @@ +from typing import no_type_check + +from sqlalchemy import types +from sqlalchemy_bigquery import STRUCT + +from datahub.metadata.schema_classes import ( + ArrayTypeClass, + MapTypeClass, + NullTypeClass, + NumberTypeClass, + RecordTypeClass, +) +from datahub.utilities.sqlalchemy_type_converter import ( + MapType, + get_schema_fields_for_sqlalchemy_column, +) + + +def test_get_avro_schema_for_sqlalchemy_column(): + schema_fields = get_schema_fields_for_sqlalchemy_column( + column_name="test", column_type=types.INTEGER() + ) + assert len(schema_fields) == 1 + assert schema_fields[0].fieldPath == "[version=2.0].[type=int].test" + assert schema_fields[0].type.type == NumberTypeClass() + assert schema_fields[0].nativeDataType == "INTEGER" + assert schema_fields[0].nullable is True + + schema_fields = get_schema_fields_for_sqlalchemy_column( + column_name="test", column_type=types.String(), nullable=False + ) + assert len(schema_fields) == 1 + assert schema_fields[0].fieldPath == "[version=2.0].[type=string].test" + assert schema_fields[0].type.type == NumberTypeClass() + assert schema_fields[0].nativeDataType == "VARCHAR" + assert schema_fields[0].nullable is False + + +def test_get_avro_schema_for_sqlalchemy_array_column(): + schema_fields = get_schema_fields_for_sqlalchemy_column( + column_name="test", column_type=types.ARRAY(types.FLOAT()) + ) + assert len(schema_fields) == 1 + assert ( + schema_fields[0].fieldPath + == "[version=2.0].[type=struct].[type=array].[type=float].test" + ) + assert schema_fields[0].type.type == ArrayTypeClass(nestedType=["float"]) + assert schema_fields[0].nativeDataType == "array" + + +def test_get_avro_schema_for_sqlalchemy_map_column(): + schema_fields = get_schema_fields_for_sqlalchemy_column( + column_name="test", column_type=MapType(types.String(), types.BOOLEAN()) + ) + assert len(schema_fields) == 1 + assert ( + schema_fields[0].fieldPath + == "[version=2.0].[type=struct].[type=map].[type=boolean].test" + ) + assert schema_fields[0].type.type == MapTypeClass( + keyType="string", valueType="boolean" + ) + assert schema_fields[0].nativeDataType == "MapType(String(), BOOLEAN())" + + +def test_get_avro_schema_for_sqlalchemy_struct_column() -> None: + + schema_fields = get_schema_fields_for_sqlalchemy_column( + column_name="test", column_type=STRUCT(("test", types.INTEGER())) + ) + assert len(schema_fields) == 2 + assert ( + schema_fields[0].fieldPath == "[version=2.0].[type=struct].[type=struct].test" + ) + assert schema_fields[0].type.type == RecordTypeClass() + assert schema_fields[0].nativeDataType == "STRUCT" + + assert ( + schema_fields[1].fieldPath + == "[version=2.0].[type=struct].[type=struct].test.[type=int].test" + ) + assert schema_fields[1].type.type == NumberTypeClass() + assert schema_fields[1].nativeDataType == "INTEGER" + + +@no_type_check +def test_get_avro_schema_for_sqlalchemy_unknown_column(): + schema_fields = get_schema_fields_for_sqlalchemy_column("invalid", "test") + assert len(schema_fields) == 1 + assert schema_fields[0].type.type == NullTypeClass() + assert schema_fields[0].fieldPath == "[version=2.0].[type=null]" + assert schema_fields[0].nativeDataType == "test" diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index 95de3cdb3c526..e6210f1f073f6 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -30,7 +30,7 @@ dependencies { implementation(externalDependency.kafkaAvroSerializer) { exclude group: "org.apache.avro" } - implementation externalDependency.avro_1_7 + implementation externalDependency.avro constraints { implementation('commons-collections:commons-collections:3.2.2') { because 'Vulnerability Issue' diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java index ee0d459aaa7d3..6212e57470be4 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java @@ -16,12 +16,14 @@ class AvroSerializer { private final Schema _recordSchema; private final Schema _genericAspectSchema; + private final Schema _changeTypeEnumSchema; private final EventFormatter _eventFormatter; public AvroSerializer() throws IOException { _recordSchema = new Schema.Parser() .parse(this.getClass().getClassLoader().getResourceAsStream("MetadataChangeProposal.avsc")); _genericAspectSchema = this._recordSchema.getField("aspect").schema().getTypes().get(1); + _changeTypeEnumSchema = this._recordSchema.getField("changeType").schema(); _eventFormatter = new EventFormatter(EventFormatter.Format.PEGASUS_JSON); } @@ -43,7 +45,7 @@ public GenericRecord serialize(MetadataChangeProposal mcp) throws IOException { genericRecord.put("aspect", genericAspect); genericRecord.put("aspectName", mcp.getAspectName()); genericRecord.put("entityType", mcp.getEntityType()); - genericRecord.put("changeType", mcp.getChangeType()); + genericRecord.put("changeType", new GenericData.EnumSymbol(_changeTypeEnumSchema, mcp.getChangeType())); return genericRecord; } } \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md index daea8d438679c..29b82aa3e68f5 100644 --- a/metadata-integration/java/datahub-protobuf/README.md +++ b/metadata-integration/java/datahub-protobuf/README.md @@ -1,6 +1,6 @@ # Protobuf Schemas -The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. In addition, you can supply the root message in cases where a single protobuf source file includes multiple non-nested messages. +The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. You can supply a file with multiple nested messages to be processed. If you have a file with multiple non-nested messages, you will need to separate them out into different files or supply the root message, as otherwise we will only process the first one. ## Supported Features diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java index 42884241d9f7c..d890c373f1299 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java @@ -259,7 +259,9 @@ private FieldDescriptorProto getNestedTypeFields(List pathList, Descrip messageType = messageType.getNestedType(value); } - if (pathList.get(pathSize - 2) == DescriptorProto.FIELD_FIELD_NUMBER) { + if (pathList.get(pathSize - 2) == DescriptorProto.FIELD_FIELD_NUMBER + && pathList.get(pathSize - 1) != DescriptorProto.RESERVED_RANGE_FIELD_NUMBER + && pathList.get(pathSize - 1) != DescriptorProto.RESERVED_NAME_FIELD_NUMBER) { return messageType.getField(pathList.get(pathSize - 1)); } else { return null; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index ad54cf6524398..740fed61f13d5 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -8,9 +8,9 @@ configurations { dependencies { implementation project(':entity-registry') api project(':metadata-utils') - api project(':metadata-events:mxe-avro-1.7') + api project(':metadata-events:mxe-avro') api project(':metadata-events:mxe-registration') - api project(':metadata-events:mxe-utils-avro-1.7') + api project(':metadata-events:mxe-utils-avro') api project(':metadata-models') api project(':metadata-service:restli-client') api project(':metadata-service:configuration') diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 13a7d16b723a7..ceaf37a1289d9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -256,13 +256,13 @@ public ScrollResult getCachedScrollResults( cacheAccess.stop(); if (result == null) { Timer.Context cacheMiss = MetricUtils.timer(this.getClass(), "scroll_cache_miss").time(); - result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText); + result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags); cache.put(cacheKey, toJsonString(result)); cacheMiss.stop(); MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc(); } } else { - result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText); + result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags); } return result; } @@ -328,7 +328,8 @@ private ScrollResult getRawScrollResults( @Nullable final String scrollId, @Nullable final String keepAlive, final int count, - final boolean fulltext) { + final boolean fulltext, + @Nullable final SearchFlags searchFlags) { if (fulltext) { return entitySearchService.fullTextScroll( entities, @@ -337,7 +338,8 @@ private ScrollResult getRawScrollResults( sortCriterion, scrollId, keepAlive, - count); + count, + searchFlags); } else { return entitySearchService.structuredScroll(entities, input, @@ -345,7 +347,8 @@ private ScrollResult getRawScrollResults( sortCriterion, scrollId, keepAlive, - count); + count, + searchFlags); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index ef5a555e95ba8..024cf2b0abec2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -175,23 +175,26 @@ public List getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn) @Nonnull @Override public ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) { + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) { log.debug(String.format( "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", entities, input, postFilters, sortCriterion, scrollId, size)); + SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags()); + flags.setFulltext(true); return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size, - new SearchFlags().setFulltext(true)); + flags); } @Nonnull @Override public ScrollResult structuredScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) { + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) { log.debug(String.format( "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", entities, input, postFilters, sortCriterion, scrollId, size)); - return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size, - new SearchFlags().setFulltext(false)); + SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags()); + flags.setFulltext(false); + return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size, flags); } public Optional raw(@Nonnull String indexName, @Nullable String jsonQuery) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index 004b2e0a2adc4..35cef71edd953 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.models.SearchScoreFieldSpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType; +import com.linkedin.metadata.search.utils.ESUtils; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,15 +32,6 @@ public static Map getPartialNgramConfigWithOverrides(Map KEYWORD_TYPE_MAP = ImmutableMap.of(TYPE, KEYWORD); - // Field Types - public static final String BOOLEAN = "boolean"; - public static final String DATE = "date"; - public static final String DOUBLE = "double"; - public static final String LONG = "long"; - public static final String OBJECT = "object"; - public static final String TEXT = "text"; - public static final String TOKEN_COUNT = "token_count"; - // Subfields public static final String DELIMITED = "delimited"; public static final String LENGTH = "length"; @@ -74,7 +66,7 @@ public static Map getMappings(@Nonnull final EntitySpec entitySp private static Map getMappingsForUrn() { Map subFields = new HashMap<>(); subFields.put(DELIMITED, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, URN_ANALYZER, SEARCH_ANALYZER, URN_SEARCH_ANALYZER, SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER) @@ -85,13 +77,13 @@ private static Map getMappingsForUrn() { ) )); return ImmutableMap.builder() - .put(TYPE, KEYWORD) + .put(TYPE, ESUtils.KEYWORD_FIELD_TYPE) .put(FIELDS, subFields) .build(); } private static Map getMappingsForRunId() { - return ImmutableMap.builder().put(TYPE, KEYWORD).build(); + return ImmutableMap.builder().put(TYPE, ESUtils.KEYWORD_FIELD_TYPE).build(); } private static Map getMappingsForField(@Nonnull final SearchableFieldSpec searchableFieldSpec) { @@ -104,23 +96,23 @@ private static Map getMappingsForField(@Nonnull final Searchable } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) { mappingForField.putAll(getMappingsForSearchText(fieldType)); } else if (fieldType == FieldType.BROWSE_PATH) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(FIELDS, ImmutableMap.of(LENGTH, ImmutableMap.of( - TYPE, TOKEN_COUNT, + TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE, ANALYZER, SLASH_PATTERN_ANALYZER))); mappingForField.put(ANALYZER, BROWSE_PATH_HIERARCHY_ANALYZER); mappingForField.put(FIELDDATA, true); } else if (fieldType == FieldType.BROWSE_PATH_V2) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(FIELDS, ImmutableMap.of(LENGTH, ImmutableMap.of( - TYPE, TOKEN_COUNT, + TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE, ANALYZER, UNIT_SEPARATOR_PATTERN_ANALYZER))); mappingForField.put(ANALYZER, BROWSE_PATH_V2_HIERARCHY_ANALYZER); mappingForField.put(FIELDDATA, true); } else if (fieldType == FieldType.URN || fieldType == FieldType.URN_PARTIAL) { - mappingForField.put(TYPE, TEXT); + mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE); mappingForField.put(ANALYZER, URN_ANALYZER); mappingForField.put(SEARCH_ANALYZER, URN_SEARCH_ANALYZER); mappingForField.put(SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER); @@ -135,13 +127,13 @@ private static Map getMappingsForField(@Nonnull final Searchable subFields.put(KEYWORD, KEYWORD_TYPE_MAP); mappingForField.put(FIELDS, subFields); } else if (fieldType == FieldType.BOOLEAN) { - mappingForField.put(TYPE, BOOLEAN); + mappingForField.put(TYPE, ESUtils.BOOLEAN_FIELD_TYPE); } else if (fieldType == FieldType.COUNT) { - mappingForField.put(TYPE, LONG); + mappingForField.put(TYPE, ESUtils.LONG_FIELD_TYPE); } else if (fieldType == FieldType.DATETIME) { - mappingForField.put(TYPE, DATE); + mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE); } else if (fieldType == FieldType.OBJECT) { - mappingForField.put(TYPE, OBJECT); + mappingForField.put(TYPE, ESUtils.OBJECT_FIELD_TYPE); } else { log.info("FieldType {} has no mappings implemented", fieldType); } @@ -149,10 +141,10 @@ private static Map getMappingsForField(@Nonnull final Searchable searchableFieldSpec.getSearchableAnnotation() .getHasValuesFieldName() - .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, BOOLEAN))); + .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.BOOLEAN_FIELD_TYPE))); searchableFieldSpec.getSearchableAnnotation() .getNumValuesFieldName() - .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, LONG))); + .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.LONG_FIELD_TYPE))); mappings.putAll(getMappingsForFieldNameAliases(searchableFieldSpec)); return mappings; @@ -160,7 +152,7 @@ private static Map getMappingsForField(@Nonnull final Searchable private static Map getMappingsForKeyword() { Map mappingForField = new HashMap<>(); - mappingForField.put(TYPE, KEYWORD); + mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE); mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER); // Add keyword subfield without lowercase filter mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP)); @@ -169,7 +161,7 @@ private static Map getMappingsForKeyword() { private static Map getMappingsForSearchText(FieldType fieldType) { Map mappingForField = new HashMap<>(); - mappingForField.put(TYPE, KEYWORD); + mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE); mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER); Map subFields = new HashMap<>(); if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) { @@ -186,14 +178,14 @@ private static Map getMappingsForSearchText(FieldType fieldType) String fieldName = entry.getKey(); String analyzerName = entry.getValue(); subFields.put(fieldName, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, analyzerName )); } } } subFields.put(DELIMITED, ImmutableMap.of( - TYPE, TEXT, + TYPE, ESUtils.TEXT_FIELD_TYPE, ANALYZER, TEXT_ANALYZER, SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER, SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER)); @@ -206,7 +198,7 @@ private static Map getMappingsForSearchText(FieldType fieldType) private static Map getMappingsForSearchScoreField( @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) { return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(), - ImmutableMap.of(TYPE, DOUBLE)); + ImmutableMap.of(TYPE, ESUtils.DOUBLE_FIELD_TYPE)); } private static Map getMappingsForFieldNameAliases(@Nonnull final SearchableFieldSpec searchableFieldSpec) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 5fcc10b7af5cf..49571a60d5f21 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -202,7 +202,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi if (!finalSearchFlags.isSkipHighlighting()) { searchSourceBuilder.highlighter(_highlights); } - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); if (finalSearchFlags.isGetSuggestions()) { ESUtils.buildNameSuggestions(searchSourceBuilder, input); @@ -242,8 +242,10 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi BoolQueryBuilder filterQuery = getFilterQuery(filter); searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery)); _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation); - searchSourceBuilder.highlighter(getHighlights()); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + if (!finalSearchFlags.isSkipHighlighting()) { + searchSourceBuilder.highlighter(_highlights); + } + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); log.debug("Search request is: " + searchRequest); searchRequest.indicesOptions(null); @@ -270,7 +272,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQuery); searchSourceBuilder.from(from).size(size); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -301,7 +303,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr searchSourceBuilder.size(size); ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 9a7d9a1b4c420..53765acb8e29e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; @@ -49,7 +52,28 @@ public class ESUtils { public static final int MAX_RESULT_SIZE = 10000; public static final String OPAQUE_ID_HEADER = "X-Opaque-Id"; public static final String HEADER_VALUE_DELIMITER = "|"; - public static final String KEYWORD_TYPE = "keyword"; + + // Field types + public static final String KEYWORD_FIELD_TYPE = "keyword"; + public static final String BOOLEAN_FIELD_TYPE = "boolean"; + public static final String DATE_FIELD_TYPE = "date"; + public static final String DOUBLE_FIELD_TYPE = "double"; + public static final String LONG_FIELD_TYPE = "long"; + public static final String OBJECT_FIELD_TYPE = "object"; + public static final String TEXT_FIELD_TYPE = "text"; + public static final String TOKEN_COUNT_FIELD_TYPE = "token_count"; + // End of field types + + public static final Set FIELD_TYPES_STORED_AS_KEYWORD = Set.of( + SearchableAnnotation.FieldType.KEYWORD, + SearchableAnnotation.FieldType.TEXT, + SearchableAnnotation.FieldType.TEXT_PARTIAL, + SearchableAnnotation.FieldType.WORD_GRAM); + public static final Set FIELD_TYPES_STORED_AS_TEXT = Set.of( + SearchableAnnotation.FieldType.BROWSE_PATH, + SearchableAnnotation.FieldType.BROWSE_PATH_V2, + SearchableAnnotation.FieldType.URN, + SearchableAnnotation.FieldType.URN_PARTIAL); public static final String ENTITY_NAME_FIELD = "_entityName"; public static final String NAME_SUGGESTION = "nameSuggestion"; @@ -174,6 +198,25 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries); } + public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { + if (FIELD_TYPES_STORED_AS_KEYWORD.contains(fieldType)) { + return KEYWORD_FIELD_TYPE; + } else if (FIELD_TYPES_STORED_AS_TEXT.contains(fieldType)) { + return TEXT_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.BOOLEAN) { + return BOOLEAN_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.COUNT) { + return LONG_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.DATETIME) { + return DATE_FIELD_TYPE; + } else if (fieldType == SearchableAnnotation.FieldType.OBJECT) { + return OBJECT_FIELD_TYPE; + } else { + log.warn("FieldType {} has no mappings implemented", fieldType); + return null; + } + } + /** * Populates source field of search query with the sort order as per the criterion provided. * @@ -189,14 +232,39 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion * @param sortCriterion {@link SortCriterion} to be applied to the search results */ public static void buildSortOrder(@Nonnull SearchSourceBuilder searchSourceBuilder, - @Nullable SortCriterion sortCriterion) { + @Nullable SortCriterion sortCriterion, List entitySpecs) { if (sortCriterion == null) { searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); } else { + Optional fieldTypeForDefault = Optional.empty(); + for (EntitySpec entitySpec : entitySpecs) { + List fieldSpecs = entitySpec.getSearchableFieldSpecs(); + for (SearchableFieldSpec fieldSpec : fieldSpecs) { + SearchableAnnotation annotation = fieldSpec.getSearchableAnnotation(); + if (annotation.getFieldName().equals(sortCriterion.getField()) + || annotation.getFieldNameAliases().contains(sortCriterion.getField())) { + fieldTypeForDefault = Optional.of(fieldSpec.getSearchableAnnotation().getFieldType()); + break; + } + } + if (fieldTypeForDefault.isPresent()) { + break; + } + } + if (fieldTypeForDefault.isEmpty()) { + log.warn("Sort criterion field " + sortCriterion.getField() + " was not found in any entity spec to be searched"); + } final SortOrder esSortOrder = (sortCriterion.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) ? SortOrder.ASC : SortOrder.DESC; - searchSourceBuilder.sort(new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder).unmappedType(KEYWORD_TYPE)); + FieldSortBuilder sortBuilder = new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder); + if (fieldTypeForDefault.isPresent()) { + String esFieldtype = getElasticTypeForFieldType(fieldTypeForDefault.get()); + if (esFieldtype != null) { + sortBuilder.unmappedType(esFieldtype); + } + } + searchSourceBuilder.sort(sortBuilder); } if (sortCriterion == null || !sortCriterion.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)) { searchSourceBuilder.sort(new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index a496fc427138e..3e8f83a531b59 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -169,7 +169,7 @@ public List getIndexSizes() { List res = new ArrayList<>(); try { String indicesPattern = _indexConvention.getAllTimeseriesAspectIndicesPattern(); - Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", indicesPattern + "/_stats")); + Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", "/" + indicesPattern + "/_stats")); JsonNode body = new ObjectMapper().readTree(r.getEntity().getContent()); body.get("indices").fields().forEachRemaining(entry -> { TimeseriesIndexSizeResult elemResult = new TimeseriesIndexSizeResult(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index 461a146022446..696e3b62834bd 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -47,8 +47,10 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import org.junit.Assert; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import org.opensearch.client.RestHighLevelClient; +import org.opensearch.action.search.SearchRequest; import org.springframework.cache.CacheManager; import org.springframework.cache.concurrent.ConcurrentMapCacheManager; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; @@ -108,6 +110,7 @@ abstract public class LineageServiceTestBase extends AbstractTestNGSpringContext private GraphService _graphService; private CacheManager _cacheManager; private LineageSearchService _lineageSearchService; + private RestHighLevelClient _searchClientSpy; private static final String ENTITY_NAME = "testEntity"; private static final Urn TEST_URN = TestEntityUtil.getTestEntityUrn(); @@ -162,10 +165,11 @@ private ElasticSearchService buildEntitySearchService() { EntityIndexBuilders indexBuilders = new EntityIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder); - ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false, + _searchClientSpy = spy(getSearchClient()); + ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClientSpy, _indexConvention, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null); - ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); - ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1); + ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClientSpy, _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration()); + ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClientSpy, _indexConvention, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } @@ -246,9 +250,15 @@ public void testSearchService() throws Exception { _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); + Mockito.reset(_searchClientSpy); searchResult = searchAcrossLineage(null, TEST1); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + // Verify that highlighting was turned off in the query + ArgumentCaptor searchRequestCaptor = ArgumentCaptor.forClass(SearchRequest.class); + Mockito.verify(_searchClientSpy, times(1)).search(searchRequestCaptor.capture(), any()); + SearchRequest capturedRequest = searchRequestCaptor.getValue(); + assertNull(capturedRequest.source().highlighter()); clearCache(false); when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 1660504810296..69dd5c80bef1d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -22,12 +22,15 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.AggregationMetadata; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; +import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.r2.RemoteInvocationException; import org.junit.Assert; import org.opensearch.client.RequestOptions; @@ -36,6 +39,9 @@ import org.opensearch.client.indices.AnalyzeResponse; import org.opensearch.client.indices.GetMappingsRequest; import org.opensearch.client.indices.GetMappingsResponse; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortBuilder; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; @@ -54,11 +60,7 @@ import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; -import static io.datahubproject.test.search.SearchTestUtils.autocomplete; -import static io.datahubproject.test.search.SearchTestUtils.scroll; -import static io.datahubproject.test.search.SearchTestUtils.search; -import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; -import static io.datahubproject.test.search.SearchTestUtils.searchStructured; +import static io.datahubproject.test.search.SearchTestUtils.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -174,6 +176,48 @@ public void testSearchFieldConfig() throws IOException { } } + @Test + public void testGetSortOrder() { + String dateFieldName = "lastOperationTime"; + List entityNamesToTestSearch = List.of("dataset", "chart", "corpgroup"); + List entitySpecs = entityNamesToTestSearch.stream().map( + name -> getEntityRegistry().getEntitySpec(name)) + .collect(Collectors.toList()); + SearchSourceBuilder builder = new SearchSourceBuilder(); + SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.DESCENDING).setField(dateFieldName); + ESUtils.buildSortOrder(builder, sortCriterion, entitySpecs); + List> sorts = builder.sorts(); + assertEquals(sorts.size(), 2); // sort by last modified and then by urn + for (SortBuilder sort : sorts) { + assertTrue(sort instanceof FieldSortBuilder); + FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort; + if (fieldSortBuilder.getFieldName().equals(dateFieldName)) { + assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.DESC); + assertEquals(fieldSortBuilder.unmappedType(), "date"); + } else { + assertEquals(fieldSortBuilder.getFieldName(), "urn"); + } + } + + // Test alias field + String entityNameField = "_entityName"; + SearchSourceBuilder nameBuilder = new SearchSourceBuilder(); + SortCriterion nameCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField(entityNameField); + ESUtils.buildSortOrder(nameBuilder, nameCriterion, entitySpecs); + sorts = nameBuilder.sorts(); + assertEquals(sorts.size(), 2); + for (SortBuilder sort : sorts) { + assertTrue(sort instanceof FieldSortBuilder); + FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort; + if (fieldSortBuilder.getFieldName().equals(entityNameField)) { + assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.ASC); + assertEquals(fieldSortBuilder.unmappedType(), "keyword"); + } else { + assertEquals(fieldSortBuilder.getFieldName(), "urn"); + } + } + } + @Test public void testDatasetHasTags() throws IOException { GetMappingsRequest req = new GetMappingsRequest() @@ -1454,6 +1498,16 @@ public void testColumnExactMatch() { "Expected table with column name exact match first"); } + @Test + public void testSortOrdering() { + String query = "unit_data"; + SortCriterion criterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime"); + SearchResult result = getSearchService().searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, criterion, 0, + 100, new SearchFlags().setFulltext(true).setSkipCache(true), null); + assertTrue(result.getEntities().size() > 2, + String.format("%s - Expected search results to have at least two results", query)); + } + private Stream getTokens(AnalyzeRequest request) throws IOException { return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 90c6c523c588f..0ea035a10f91d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -97,6 +97,30 @@ public void testDatasetFieldsAndHighlights() { ), "unexpected lineage fields in highlights: " + highlightFields); } + @Test + public void testSearchRequestHandlerHighlightingTurnedOff() { + SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequest searchRequest = requestHandler.getSearchRequest("testQuery", null, null, 0, + 10, new SearchFlags().setFulltext(false).setSkipHighlighting(true), null); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertEquals(sourceBuilder.from(), 0); + assertEquals(sourceBuilder.size(), 10); + // Filters + Collection aggBuilders = sourceBuilder.aggregations().getAggregatorFactories(); + // Expect 2 aggregations: textFieldOverride and _index + assertEquals(aggBuilders.size(), 2); + for (AggregationBuilder aggBuilder : aggBuilders) { + if (aggBuilder.getName().equals("textFieldOverride")) { + TermsAggregationBuilder filterPanelBuilder = (TermsAggregationBuilder) aggBuilder; + assertEquals(filterPanelBuilder.field(), "textFieldOverride.keyword"); + } else if (!aggBuilder.getName().equals("_entityType")) { + fail("Found unexepected aggregation: " + aggBuilder.getName()); + } + } + // Highlights should not be present + assertNull(sourceBuilder.highlighter()); + } + @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index cc60ba8679e1f..1362a0f69eff2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -45,6 +45,7 @@ import com.linkedin.timeseries.GroupingBucket; import com.linkedin.timeseries.GroupingBucketType; import com.linkedin.timeseries.TimeWindowSize; +import com.linkedin.timeseries.TimeseriesIndexSizeResult; import org.opensearch.client.RestHighLevelClient; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.BeforeClass; @@ -884,4 +885,23 @@ public void testCountByFilterAfterDelete() throws InterruptedException { _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, urnAndTimeFilter); assertEquals(count, 0L); } + + @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) + public void testGetIndexSizes() { + List result = _elasticSearchTimeseriesAspectService.getIndexSizes(); + //CHECKSTYLE:OFF + /* + Example result: + {aspectName=testentityprofile, sizeMb=52.234, + indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity} + {aspectName=testentityprofile, sizeMb=0.208, + indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests} + */ + // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there + //CHECKSTYLE:ON + assertTrue(result.size() > 0); + assertTrue( + result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals( + "es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1"))); + } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java index 67e1ee368f513..34aa6978f742f 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java @@ -5,7 +5,9 @@ import java.time.Duration; public interface SearchTestContainer { - String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456"; + + String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456"; + Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min GenericContainer startContainer(); diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle index d36fd0de40d03..fcb8b62e4ac9d 100644 --- a/metadata-jobs/mae-consumer/build.gradle +++ b/metadata-jobs/mae-consumer/build.gradle @@ -21,9 +21,9 @@ dependencies { implementation project(':ingestion-scheduler') implementation project(':metadata-utils') implementation project(":entity-registry") - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-events:mxe-registration') - implementation project(':metadata-events:mxe-utils-avro-1.7') + implementation project(':metadata-events:mxe-utils-avro') implementation project(':datahub-graphql-core') implementation externalDependency.elasticSearchRest diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle index 0bca55e0e5f92..97eec9fcff051 100644 --- a/metadata-jobs/mce-consumer/build.gradle +++ b/metadata-jobs/mce-consumer/build.gradle @@ -17,9 +17,9 @@ dependencies { } implementation project(':metadata-utils') implementation project(':metadata-events:mxe-schemas') - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-events:mxe-registration') - implementation project(':metadata-events:mxe-utils-avro-1.7') + implementation project(':metadata-events:mxe-utils-avro') implementation project(':metadata-io') implementation project(':metadata-service:restli-client') implementation spec.product.pegasus.restliClient diff --git a/metadata-jobs/pe-consumer/build.gradle b/metadata-jobs/pe-consumer/build.gradle index 1899a4de15635..81e8b8c9971f0 100644 --- a/metadata-jobs/pe-consumer/build.gradle +++ b/metadata-jobs/pe-consumer/build.gradle @@ -10,9 +10,9 @@ configurations { dependencies { avro project(path: ':metadata-models', configuration: 'avroSchema') implementation project(':li-utils') - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-events:mxe-registration') - implementation project(':metadata-events:mxe-utils-avro-1.7') + implementation project(':metadata-events:mxe-utils-avro') implementation(project(':metadata-service:factories')) { exclude group: 'org.neo4j.test' } diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index 53e7765152aef..bd8052283e168 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -23,6 +23,7 @@ dependencies { } } api project(':li-utils') + api project(path: ':li-utils', configuration: "dataTemplate") dataModel project(':li-utils') compileOnly externalDependency.lombok diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl index ae2a58028057b..e161270145a88 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl @@ -32,6 +32,11 @@ record AssertionInfo includes CustomProperties, ExternalReference { */ VOLUME + /** + * A raw SQL-statement based assertion + */ + SQL + /** * A schema or structural assertion. * @@ -56,7 +61,12 @@ record AssertionInfo includes CustomProperties, ExternalReference { volumeAssertion: optional VolumeAssertionInfo /** - * An schema Assertion definition. This field is populated when the type is DATASET_SCHEMA + * A SQL Assertion definition. This field is populated when the type is SQL. + */ + sqlAssertion: optional SqlAssertionInfo + + /** + * An schema Assertion definition. This field is populated when the type is DATA_SCHEMA */ schemaAssertion: optional SchemaAssertionInfo @@ -67,4 +77,9 @@ record AssertionInfo includes CustomProperties, ExternalReference { * the platform where it was ingested from. */ source: optional AssertionSource + + /** + * An optional human-readable description of the assertion + */ + description: optional string } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl new file mode 100644 index 0000000000000..f6ce738252f35 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl @@ -0,0 +1,67 @@ +namespace com.linkedin.assertion + +import com.linkedin.common.Urn +import com.linkedin.dataset.DatasetFilter + +/** +* Attributes defining a SQL Assertion +*/ +record SqlAssertionInfo { + /** + * The type of the SQL assertion being monitored. + */ + @Searchable = {} + type: enum SqlAssertionType { + /** + * A SQL Metric Assertion, e.g. one based on a numeric value returned by an arbitrary SQL query. + */ + METRIC + /** + * A SQL assertion that is evaluated against the CHANGE in a metric assertion + * over time. + */ + METRIC_CHANGE + } + + /** + * The entity targeted by this SQL check. + */ + @Searchable = { + "fieldType": "URN" + } + @Relationship = { + "name": "Asserts", + "entityTypes": [ "dataset" ] + } + entity: Urn + + /** + * The SQL statement to be executed when evaluating the assertion (or computing the metric). + * This should be a valid and complete statement, executable by itself. + * + * Usually this should be a SELECT query statement. + */ + statement: string + + /** + * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage. + * This value is required if the type is METRIC_CHANGE. + */ + changeType: optional AssertionValueChangeType + + /** + * The operator you'd like to apply to the result of the SQL query. + * + * Note that at this time, only numeric operators are valid inputs: + * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, + * BETWEEN. + */ + operator: AssertionStdOperator + + /** + * The parameters you'd like to provide as input to the operator. + * + * Note that only numeric parameter types are valid inputs: NUMBER. + */ + parameters: AssertionStdParameters +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl index b3e237202fc2f..f777b5d6e12e7 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl @@ -37,10 +37,10 @@ record DataHubIngestionSourceInfo { * Parameters associated with the Ingestion Source */ config: record DataHubIngestionSourceConfig { - /** - * The JSON recipe to use for ingestion - */ - recipe: string + /** + * The JSON recipe to use for ingestion + */ + recipe: string /** * The PyPI version of the datahub CLI to use when executing a recipe @@ -56,5 +56,10 @@ record DataHubIngestionSourceInfo { * Whether or not to run this ingestion source in debug mode */ debugMode: optional boolean + + /** + * Extra arguments for the ingestion run. + */ + extraArgs: optional map[string, string] } } \ No newline at end of file diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java index f9cf1b01e1762..d3c5ba822ac04 100644 --- a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java +++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java @@ -29,4 +29,6 @@ public class AuthenticationConfiguration { * The lifespan of a UI session token. */ private long sessionTokenDurationMs; + + private TokenServiceConfiguration tokenService; } diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java new file mode 100644 index 0000000000000..0a606f0f06d92 --- /dev/null +++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java @@ -0,0 +1,15 @@ +package com.datahub.authentication; + +import lombok.Data; + + +@Data +/** + * Configurations for DataHub token service + */ +public class TokenServiceConfiguration { + private String signingKey; + private String salt; + private String issuer; + private String signingAlgorithm; +} diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle index 2dd07ef10274c..61e9015adc942 100644 --- a/metadata-service/auth-filter/build.gradle +++ b/metadata-service/auth-filter/build.gradle @@ -14,4 +14,6 @@ dependencies { annotationProcessor externalDependency.lombok testImplementation externalDependency.mockito + testImplementation externalDependency.testng + testImplementation externalDependency.springBootTest } \ No newline at end of file diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java new file mode 100644 index 0000000000000..05ca428283a6c --- /dev/null +++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java @@ -0,0 +1,79 @@ +package com.datahub.auth.authentication; + +import com.datahub.auth.authentication.filter.AuthenticationFilter; +import com.datahub.authentication.AuthenticationConfiguration; +import com.datahub.authentication.AuthenticatorConfiguration; +import com.datahub.authentication.TokenServiceConfiguration; +import com.datahub.authentication.token.StatefulTokenService; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.config.AuthPluginConfiguration; +import com.linkedin.metadata.config.DataHubConfiguration; +import com.linkedin.metadata.config.PluginConfiguration; +import com.linkedin.metadata.entity.EntityService; +import java.util.List; +import java.util.Map; +import javax.servlet.ServletException; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.DependsOn; + +import static org.mockito.Mockito.*; + +@Configuration +public class AuthTestConfiguration { + + + @Bean + public EntityService entityService() { + return mock(EntityService.class); + } + + @Bean("dataHubTokenService") + public StatefulTokenService statefulTokenService(ConfigurationProvider configurationProvider, EntityService entityService) { + TokenServiceConfiguration tokenServiceConfiguration = configurationProvider.getAuthentication().getTokenService(); + return new StatefulTokenService( + tokenServiceConfiguration.getSigningKey(), + tokenServiceConfiguration.getSigningAlgorithm(), + tokenServiceConfiguration.getIssuer(), + entityService, + tokenServiceConfiguration.getSalt() + ); + } + + @Bean + public ConfigurationProvider configurationProvider() { + ConfigurationProvider configurationProvider = new ConfigurationProvider(); + AuthenticationConfiguration authenticationConfiguration = new AuthenticationConfiguration(); + authenticationConfiguration.setEnabled(true); + configurationProvider.setAuthentication(authenticationConfiguration); + DataHubConfiguration dataHubConfiguration = new DataHubConfiguration(); + PluginConfiguration pluginConfiguration = new PluginConfiguration(); + AuthPluginConfiguration authPluginConfiguration = new AuthPluginConfiguration(); + authenticationConfiguration.setSystemClientId("__datahub_system"); + authenticationConfiguration.setSystemClientSecret("JohnSnowKnowsNothing"); + TokenServiceConfiguration tokenServiceConfiguration = new TokenServiceConfiguration(); + tokenServiceConfiguration.setIssuer("datahub-metadata-service"); + tokenServiceConfiguration.setSigningKey("WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94="); + tokenServiceConfiguration.setSalt("ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O="); + tokenServiceConfiguration.setSigningAlgorithm("HS256"); + authenticationConfiguration.setTokenService(tokenServiceConfiguration); + AuthenticatorConfiguration authenticator = new AuthenticatorConfiguration(); + authenticator.setType("com.datahub.authentication.authenticator.DataHubTokenAuthenticator"); + authenticator.setConfigs(Map.of("signingKey", "WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=", + "salt", "ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=")); + List authenticators = List.of(authenticator); + authenticationConfiguration.setAuthenticators(authenticators); + authPluginConfiguration.setPath(""); + pluginConfiguration.setAuth(authPluginConfiguration); + dataHubConfiguration.setPlugin(pluginConfiguration); + configurationProvider.setDatahub(dataHubConfiguration); + return configurationProvider; + } + + @Bean + // TODO: Constructor injection + @DependsOn({"configurationProvider", "dataHubTokenService", "entityService"}) + public AuthenticationFilter authenticationFilter() throws ServletException { + return new AuthenticationFilter(); + } +} diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java new file mode 100644 index 0000000000000..2ac65bf09c912 --- /dev/null +++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java @@ -0,0 +1,53 @@ +package com.datahub.auth.authentication; + +import com.datahub.auth.authentication.filter.AuthenticationFilter; +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.token.StatefulTokenService; +import com.datahub.authentication.token.TokenException; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import javax.servlet.FilterChain; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +import static com.datahub.authentication.AuthenticationConstants.*; +import static org.mockito.Mockito.*; + + +@ContextConfiguration(classes = { AuthTestConfiguration.class }) +public class AuthenticationFilterTest extends AbstractTestNGSpringContextTests { + + @Autowired + AuthenticationFilter _authenticationFilter; + + @Autowired + StatefulTokenService _statefulTokenService; + + @Test + public void testExpiredToken() throws ServletException, IOException, TokenException { + _authenticationFilter.init(null); + HttpServletRequest servletRequest = mock(HttpServletRequest.class); + HttpServletResponse servletResponse = mock(HttpServletResponse.class); + FilterChain filterChain = mock(FilterChain.class); + Actor actor = new Actor(ActorType.USER, "datahub"); +// String token = _statefulTokenService.generateAccessToken(TokenType.SESSION, actor, 0L, System.currentTimeMillis(), "token", +// "token", actor.toUrnStr()); + // Token generated 9/11/23, invalid for all future dates + String token = "eyJhbGciOiJIUzI1NiJ9.eyJhY3RvclR5cGUiOiJVU0VSIZCI6ImRhdGFodWIiLCJ0eXBlIjoiU0VTU0lPTiIsInZlcnNpb24iOiIxIiwian" + + "RpIjoiMmI0MzZkZDAtYjEwOS00N2UwLWJmYTEtMzM2ZmU4MTU4MDE1Iiwic3ViIjoiZGF0YWh1YiIsImV4cCI6MTY5NDU0NzA2OCwiaXNzIjoiZGF" + + "0YWh1Yi1tZXRhZGF0YS1zZXJ2aWNlIn0.giqx7J5a9mxuubG6rXdAMoaGlcII-fqY-W82Wm7OlLI"; + when(servletRequest.getHeaderNames()).thenReturn(Collections.enumeration(List.of(AUTHORIZATION_HEADER_NAME))); + when(servletRequest.getHeader(AUTHORIZATION_HEADER_NAME)) + .thenReturn("Bearer " + token); + + _authenticationFilter.doFilter(servletRequest, servletResponse, filterChain); + verify(servletResponse, times(1)).sendError(eq(HttpServletResponse.SC_UNAUTHORIZED), anyString()); + } +} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java index d62c37160f816..f8eca541e1efb 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java @@ -82,7 +82,7 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request } @Override - public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { + public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { if (this.authorizers.isEmpty()) { return null; } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java index f653ccf72cf54..e30fb93109915 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java @@ -8,6 +8,8 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.policy.DataHubPolicyInfo; + +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -17,6 +19,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -51,11 +55,12 @@ public enum AuthorizationMode { // Maps privilege name to the associated set of policies for fast access. // Not concurrent data structure because writes are always against the entire thing. private final Map> _policyCache = new HashMap<>(); // Shared Policy Cache. + private final ReadWriteLock _lockPolicyCache = new ReentrantReadWriteLock(); private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1); private final PolicyRefreshRunnable _policyRefreshRunnable; private final PolicyEngine _policyEngine; - private ResourceSpecResolver _resourceSpecResolver; + private EntitySpecResolver _entitySpecResolver; private AuthorizationMode _mode; public static final String ALL = "ALL"; @@ -69,14 +74,14 @@ public DataHubAuthorizer( _systemAuthentication = Objects.requireNonNull(systemAuthentication); _mode = Objects.requireNonNull(mode); _policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient)); - _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache); + _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, _lockPolicyCache); _refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS); } @Override public void init(@Nonnull Map authorizerConfig, @Nonnull AuthorizerContext ctx) { // Pass. No static config. - _resourceSpecResolver = Objects.requireNonNull(ctx.getResourceSpecResolver()); + _entitySpecResolver = Objects.requireNonNull(ctx.getEntitySpecResolver()); } public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request) { @@ -86,30 +91,43 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, null); } - Optional resolvedResourceSpec = request.getResourceSpec().map(_resourceSpecResolver::resolve); + Optional resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve); - // 1. Fetch the policies relevant to the requested privilege. - final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>()); + _lockPolicyCache.readLock().lock(); + try { + // 1. Fetch the policies relevant to the requested privilege. + final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>()); - // 2. Evaluate each policy. - for (DataHubPolicyInfo policy : policiesToEvaluate) { - if (isRequestGranted(policy, request, resolvedResourceSpec)) { - // Short circuit if policy has granted privileges to this actor. - return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, - String.format("Granted by policy with type: %s", policy.getType())); + // 2. Evaluate each policy. + for (DataHubPolicyInfo policy : policiesToEvaluate) { + if (isRequestGranted(policy, request, resolvedResourceSpec)) { + // Short circuit if policy has granted privileges to this actor. + return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, + String.format("Granted by policy with type: %s", policy.getType())); + } } + return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null); + } finally { + _lockPolicyCache.readLock().unlock(); } - return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null); } - public List getGrantedPrivileges(final String actorUrn, final Optional resourceSpec) { + public List getGrantedPrivileges(final String actor, final Optional resourceSpec) { + + _lockPolicyCache.readLock().lock(); + try { + // 1. Fetch all policies + final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>()); - // 1. Fetch all policies - final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>()); + Urn actorUrn = UrnUtils.getUrn(actor); + final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor)); - Optional resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve); + Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); - return _policyEngine.getGrantedPrivileges(policiesToEvaluate, UrnUtils.getUrn(actorUrn), resolvedResourceSpec); + return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec); + } finally { + _lockPolicyCache.readLock().unlock(); + } } /** @@ -118,37 +136,43 @@ public List getGrantedPrivileges(final String actorUrn, final Optional resourceSpec) { - // Step 1: Find policies granting the privilege. - final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>()); - - Optional resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve); + final Optional resourceSpec) { final List authorizedUsers = new ArrayList<>(); final List authorizedGroups = new ArrayList<>(); boolean allUsers = false; boolean allGroups = false; - // Step 2: For each policy, determine whether the resource is a match. - for (DataHubPolicyInfo policy : policiesToEvaluate) { - if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) { - // Policy is not active, skip. - continue; - } + _lockPolicyCache.readLock().lock(); + try { + // Step 1: Find policies granting the privilege. + final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>()); - final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec); + Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); - // Step 3: For each matching policy, add actors that are authorized. - authorizedUsers.addAll(matchingActors.getUsers()); - authorizedGroups.addAll(matchingActors.getGroups()); - if (matchingActors.allUsers()) { - allUsers = true; - } - if (matchingActors.allGroups()) { - allGroups = true; + + // Step 2: For each policy, determine whether the resource is a match. + for (DataHubPolicyInfo policy : policiesToEvaluate) { + if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) { + // Policy is not active, skip. + continue; + } + + final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec); + + // Step 3: For each matching policy, add actors that are authorized. + authorizedUsers.addAll(matchingActors.getUsers()); + authorizedGroups.addAll(matchingActors.getGroups()); + if (matchingActors.allUsers()) { + allUsers = true; + } + if (matchingActors.allGroups()) { + allGroups = true; + } } + } finally { + _lockPolicyCache.readLock().unlock(); } - // Step 4: Return all authorized users and groups. return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, allUsers, allGroups); } @@ -180,19 +204,36 @@ private boolean isSystemRequest(final AuthorizationRequest request, final Authen /** * Returns true if a policy grants the requested privilege for a given actor and resource. */ - private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional resourceSpec) { + private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional resourceSpec) { if (AuthorizationMode.ALLOW_ALL.equals(mode())) { return true; } + + Optional actorUrn = getUrnFromRequestActor(request.getActorUrn()); + if (actorUrn.isEmpty()) { + return false; + } + + final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve( + new EntitySpec(actorUrn.get().getEntityType(), request.getActorUrn())); final PolicyEngine.PolicyEvaluationResult result = _policyEngine.evaluatePolicy( policy, - request.getActorUrn(), + resolvedActorSpec, request.getPrivilege(), resourceSpec ); return result.isGranted(); } + private Optional getUrnFromRequestActor(String actor) { + try { + return Optional.of(Urn.createFromString(actor)); + } catch (URISyntaxException e) { + log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actor)); + return Optional.empty(); + } + } + /** * A {@link Runnable} used to periodically fetch a new instance of the policies Cache. * @@ -206,6 +247,7 @@ static class PolicyRefreshRunnable implements Runnable { private final Authentication _systemAuthentication; private final PolicyFetcher _policyFetcher; private final Map> _policyCache; + private final ReadWriteLock _lockPolicyCache; @Override public void run() { @@ -231,10 +273,13 @@ public void run() { "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. start: {}, count: {}", start, count, e); return; } - synchronized (_policyCache) { - _policyCache.clear(); - _policyCache.putAll(newCache); - } + } + _lockPolicyCache.writeLock().lock(); + try { + _policyCache.clear(); + _policyCache.putAll(newCache); + } finally { + _lockPolicyCache.writeLock().unlock(); } log.debug(String.format("Successfully fetched %s policies.", total)); } catch (Exception e) { diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java new file mode 100644 index 0000000000000..65b0329a9c4f2 --- /dev/null +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java @@ -0,0 +1,41 @@ +package com.datahub.authorization; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.EntityFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.GroupMembershipFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; +import com.google.common.collect.ImmutableList; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.util.Pair; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + + +public class DefaultEntitySpecResolver implements EntitySpecResolver { + private final List _entityFieldResolverProviders; + + public DefaultEntitySpecResolver(Authentication systemAuthentication, EntityClient entityClient) { + _entityFieldResolverProviders = + ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(), + new DomainFieldResolverProvider(entityClient, systemAuthentication), + new OwnerFieldResolverProvider(entityClient, systemAuthentication), + new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication), + new GroupMembershipFieldResolverProvider(entityClient, systemAuthentication)); + } + + @Override + public ResolvedEntitySpec resolve(EntitySpec entitySpec) { + return new ResolvedEntitySpec(entitySpec, getFieldResolvers(entitySpec)); + } + + private Map getFieldResolvers(EntitySpec entitySpec) { + return _entityFieldResolverProviders.stream() + .flatMap(resolver -> resolver.getFieldTypes().stream().map(fieldType -> Pair.of(fieldType, resolver))) + .collect(Collectors.toMap(Pair::getKey, pair -> pair.getValue().getFieldResolver(entitySpec))); + } +} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java deleted file mode 100644 index cd4e0b0967829..0000000000000 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.datahub.authorization; - -import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; -import com.datahub.authentication.Authentication; -import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider; -import com.google.common.collect.ImmutableList; -import com.linkedin.entity.client.EntityClient; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - - -public class DefaultResourceSpecResolver implements ResourceSpecResolver { - private final List _resourceFieldResolverProviders; - - public DefaultResourceSpecResolver(Authentication systemAuthentication, EntityClient entityClient) { - _resourceFieldResolverProviders = - ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(), - new DomainFieldResolverProvider(entityClient, systemAuthentication), - new OwnerFieldResolverProvider(entityClient, systemAuthentication)); - } - - @Override - public ResolvedResourceSpec resolve(ResourceSpec resourceSpec) { - return new ResolvedResourceSpec(resourceSpec, getFieldResolvers(resourceSpec)); - } - - private Map getFieldResolvers(ResourceSpec resourceSpec) { - return _resourceFieldResolverProviders.stream() - .collect(Collectors.toMap(ResourceFieldResolverProvider::getFieldType, - hydrator -> hydrator.getFieldResolver(resourceSpec))); - } -} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java index 76ed18e2baf78..0dbb9cd132f8a 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java @@ -26,7 +26,7 @@ private FilterUtils() { * Creates new PolicyMatchCriterion with field and value, using EQUAL PolicyMatchCondition. */ @Nonnull - public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List values) { + public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List values) { return newCriterion(field, values, PolicyMatchCondition.EQUALS); } @@ -34,7 +34,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field * Creates new PolicyMatchCriterion with field, value and PolicyMatchCondition. */ @Nonnull - public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List values, + public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List values, @Nonnull PolicyMatchCondition policyMatchCondition) { return new PolicyMatchCriterion().setField(field.name()) .setValues(new StringArray(values)) @@ -45,7 +45,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field * Creates new PolicyMatchFilter from a map of Criteria by removing null-valued Criteria and using EQUAL PolicyMatchCondition (default). */ @Nonnull - public static PolicyMatchFilter newFilter(@Nullable Map> params) { + public static PolicyMatchFilter newFilter(@Nullable Map> params) { if (params == null) { return EMPTY_FILTER; } @@ -61,7 +61,7 @@ public static PolicyMatchFilter newFilter(@Nullable Map values) { + public static PolicyMatchFilter newFilter(@Nonnull EntityFieldType field, @Nonnull List values) { return newFilter(Collections.singletonMap(field, values)); } } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java index 6a36fac7de4e0..f8c017ea74e1f 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java @@ -1,7 +1,6 @@ package com.datahub.authorization; import com.datahub.authentication.Authentication; -import com.google.common.collect.ImmutableSet; import com.linkedin.common.Owner; import com.linkedin.common.Ownership; import com.linkedin.common.urn.Urn; @@ -11,8 +10,6 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; -import com.linkedin.identity.GroupMembership; -import com.linkedin.identity.NativeGroupMembership; import com.linkedin.identity.RoleMembership; import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; @@ -23,7 +20,7 @@ import com.linkedin.policy.PolicyMatchCriterion; import com.linkedin.policy.PolicyMatchCriterionArray; import com.linkedin.policy.PolicyMatchFilter; -import java.net.URISyntaxException; + import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -34,6 +31,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nullable; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -49,37 +47,22 @@ public class PolicyEngine { public PolicyEvaluationResult evaluatePolicy( final DataHubPolicyInfo policy, - final String actorStr, + final ResolvedEntitySpec resolvedActorSpec, final String privilege, - final Optional resource) { - try { - // Currently Actor must be an urn. Consider whether this contract should be pushed up. - final Urn actor = Urn.createFromString(actorStr); - return evaluatePolicy(policy, actor, privilege, resource); - } catch (URISyntaxException e) { - log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actorStr)); - return PolicyEvaluationResult.DENIED; - } - } - - public PolicyEvaluationResult evaluatePolicy( - final DataHubPolicyInfo policy, - final Urn actor, - final String privilege, - final Optional resource) { + final Optional resource) { final PolicyEvaluationContext context = new PolicyEvaluationContext(); log.debug("Evaluating policy {}", policy.getDisplayName()); // If the privilege is not in scope, deny the request. - if (!isPrivilegeMatch(privilege, policy.getPrivileges(), context)) { + if (!isPrivilegeMatch(privilege, policy.getPrivileges())) { log.debug("Policy denied based on irrelevant privileges {} for {}", policy.getPrivileges(), privilege); return PolicyEvaluationResult.DENIED; } // If policy is not applicable, deny the request - if (!isPolicyApplicable(policy, actor, resource, context)) { - log.debug("Policy does not applicable for actor {} and resource {}", actor, resource); + if (!isPolicyApplicable(policy, resolvedActorSpec, resource, context)) { + log.debug("Policy does not applicable for actor {} and resource {}", resolvedActorSpec.getSpec().getEntity(), resource); return PolicyEvaluationResult.DENIED; } @@ -89,7 +72,7 @@ public PolicyEvaluationResult evaluatePolicy( public PolicyActors getMatchingActors( final DataHubPolicyInfo policy, - final Optional resource) { + final Optional resource) { final List users = new ArrayList<>(); final List groups = new ArrayList<>(); boolean allUsers = false; @@ -126,8 +109,8 @@ public PolicyActors getMatchingActors( private boolean isPolicyApplicable( final DataHubPolicyInfo policy, - final Urn actor, - final Optional resource, + final ResolvedEntitySpec resolvedActorSpec, + final Optional resource, final PolicyEvaluationContext context ) { @@ -137,25 +120,21 @@ private boolean isPolicyApplicable( } // If the resource is not in scope, deny the request. - if (!isResourceMatch(policy.getType(), policy.getResources(), resource, context)) { + if (!isResourceMatch(policy.getType(), policy.getResources(), resource)) { return false; } // If the actor does not match, deny the request. - if (!isActorMatch(actor, policy.getActors(), resource, context)) { - return false; - } - - return true; + return isActorMatch(resolvedActorSpec, policy.getActors(), resource, context); } public List getGrantedPrivileges( final List policies, - final Urn actor, - final Optional resource) { + final ResolvedEntitySpec resolvedActorSpec, + final Optional resource) { PolicyEvaluationContext context = new PolicyEvaluationContext(); return policies.stream() - .filter(policy -> isPolicyApplicable(policy, actor, resource, context)) + .filter(policy -> isPolicyApplicable(policy, resolvedActorSpec, resource, context)) .flatMap(policy -> policy.getPrivileges().stream()) .distinct() .collect(Collectors.toList()); @@ -168,9 +147,8 @@ public List getGrantedPrivileges( * If the policy is of type "METADATA", the resourceSpec parameter will be matched against the * resource filter defined on the policy. */ - public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional resourceSpec) { - return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec, - new PolicyEvaluationContext()); + public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional resourceSpec) { + return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec); } /** @@ -178,8 +156,7 @@ public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optio */ private boolean isPrivilegeMatch( final String requestPrivilege, - final List policyPrivileges, - final PolicyEvaluationContext context) { + final List policyPrivileges) { return policyPrivileges.contains(requestPrivilege); } @@ -189,8 +166,7 @@ private boolean isPrivilegeMatch( private boolean isResourceMatch( final String policyType, final @Nullable DataHubResourceFilter policyResourceFilter, - final Optional requestResource, - final PolicyEvaluationContext context) { + final Optional requestResource) { if (PoliciesConfig.PLATFORM_POLICY_TYPE.equals(policyType)) { // Currently, platform policies have no associated resource. return true; @@ -199,7 +175,7 @@ private boolean isResourceMatch( // No resource defined on the policy. return true; } - if (!requestResource.isPresent()) { + if (requestResource.isEmpty()) { // Resource filter present in policy, but no resource spec provided. log.debug("Resource filter present in policy, but no resource spec provided."); return false; @@ -218,31 +194,31 @@ private PolicyMatchFilter getFilter(DataHubResourceFilter policyResourceFilter) } PolicyMatchCriterionArray criteria = new PolicyMatchCriterionArray(); if (policyResourceFilter.hasType()) { - criteria.add(new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_TYPE.name()) + criteria.add(new PolicyMatchCriterion().setField(EntityFieldType.TYPE.name()) .setValues(new StringArray(Collections.singletonList(policyResourceFilter.getType())))); } if (policyResourceFilter.hasType() && policyResourceFilter.hasResources() && !policyResourceFilter.isAllResources()) { criteria.add( - new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_URN.name()).setValues(policyResourceFilter.getResources())); + new PolicyMatchCriterion().setField(EntityFieldType.URN.name()).setValues(policyResourceFilter.getResources())); } return new PolicyMatchFilter().setCriteria(criteria); } - private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedResourceSpec resource) { + private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedEntitySpec resource) { return filter.getCriteria().stream().allMatch(criterion -> checkCriterion(criterion, resource)); } - private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedResourceSpec resource) { - ResourceFieldType resourceFieldType; + private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedEntitySpec resource) { + EntityFieldType entityFieldType; try { - resourceFieldType = ResourceFieldType.valueOf(criterion.getField().toUpperCase()); + entityFieldType = EntityFieldType.valueOf(criterion.getField().toUpperCase()); } catch (IllegalArgumentException e) { log.error("Unsupported field type {}", criterion.getField()); return false; } - Set fieldValues = resource.getFieldValues(resourceFieldType); + Set fieldValues = resource.getFieldValues(entityFieldType); return criterion.getValues() .stream() .anyMatch(filterValue -> checkCondition(fieldValues, filterValue, criterion.getCondition())); @@ -257,46 +233,51 @@ private boolean checkCondition(Set fieldValues, String filterValue, Poli } /** + * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise. * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise. */ private boolean isActorMatch( - final Urn actor, + final ResolvedEntitySpec resolvedActorSpec, final DataHubActorFilter actorFilter, - final Optional resourceSpec, + final Optional resourceSpec, final PolicyEvaluationContext context) { // 1. If the actor is a matching "User" in the actor filter, return true immediately. - if (isUserMatch(actor, actorFilter)) { + if (isUserMatch(resolvedActorSpec, actorFilter)) { return true; } // 2. If the actor is in a matching "Group" in the actor filter, return true immediately. - if (isGroupMatch(actor, actorFilter, context)) { + if (isGroupMatch(resolvedActorSpec, actorFilter, context)) { return true; } // 3. If the actor is the owner, either directly or indirectly via a group, return true immediately. - if (isOwnerMatch(actor, actorFilter, resourceSpec, context)) { + if (isOwnerMatch(resolvedActorSpec, actorFilter, resourceSpec, context)) { return true; } // 4. If the actor is in a matching "Role" in the actor filter, return true immediately. - return isRoleMatch(actor, actorFilter, context); + return isRoleMatch(resolvedActorSpec, actorFilter, context); } - private boolean isUserMatch(final Urn actor, final DataHubActorFilter actorFilter) { + private boolean isUserMatch(final ResolvedEntitySpec resolvedActorSpec, final DataHubActorFilter actorFilter) { // If the actor is a matching "User" in the actor filter, return true immediately. return actorFilter.isAllUsers() || (actorFilter.hasUsers() && Objects.requireNonNull(actorFilter.getUsers()) - .stream() - .anyMatch(user -> user.equals(actor))); + .stream().map(Urn::toString) + .anyMatch(user -> user.equals(resolvedActorSpec.getSpec().getEntity()))); } - private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilter, final PolicyEvaluationContext context) { + private boolean isGroupMatch( + final ResolvedEntitySpec resolvedActorSpec, + final DataHubActorFilter actorFilter, + final PolicyEvaluationContext context) { // If the actor is in a matching "Group" in the actor filter, return true immediately. if (actorFilter.isAllGroups() || actorFilter.hasGroups()) { - final Set groups = resolveGroups(actor, context); - return actorFilter.isAllGroups() || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups()) - .stream() + final Set groups = resolveGroups(resolvedActorSpec, context); + return (actorFilter.isAllGroups() && !groups.isEmpty()) + || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups()) + .stream().map(Urn::toString) .anyMatch(groups::contains)); } // If there are no groups on the policy, return false for the group match. @@ -304,24 +285,24 @@ private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilt } private boolean isOwnerMatch( - final Urn actor, + final ResolvedEntitySpec resolvedActorSpec, final DataHubActorFilter actorFilter, - final Optional requestResource, + final Optional requestResource, final PolicyEvaluationContext context) { // If the policy does not apply to owners, or there is no resource to own, return false immediately. - if (!actorFilter.isResourceOwners() || !requestResource.isPresent()) { + if (!actorFilter.isResourceOwners() || requestResource.isEmpty()) { return false; } List ownershipTypes = actorFilter.getResourceOwnersTypes(); - return isActorOwner(actor, requestResource.get(), ownershipTypes, context); + return isActorOwner(resolvedActorSpec, requestResource.get(), ownershipTypes, context); } - private Set getOwnersForType(ResourceSpec resourceSpec, List ownershipTypes) { - Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource()); + private Set getOwnersForType(EntitySpec resourceSpec, List ownershipTypes) { + Urn entityUrn = UrnUtils.getUrn(resourceSpec.getEntity()); EnvelopedAspect ownershipAspect; try { EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn, - Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication); + Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication); if (response == null || !response.getAspects().containsKey(Constants.OWNERSHIP_ASPECT_NAME)) { return Collections.emptySet(); } @@ -338,50 +319,56 @@ private Set getOwnersForType(ResourceSpec resourceSpec, List owners return ownersStream.map(owner -> owner.getOwner().toString()).collect(Collectors.toSet()); } - private boolean isActorOwner(Urn actor, ResolvedResourceSpec resourceSpec, List ownershipTypes, PolicyEvaluationContext context) { + private boolean isActorOwner( + final ResolvedEntitySpec resolvedActorSpec, + ResolvedEntitySpec resourceSpec, List ownershipTypes, + PolicyEvaluationContext context) { Set owners = this.getOwnersForType(resourceSpec.getSpec(), ownershipTypes); - if (isUserOwner(actor, owners)) { - return true; - } - final Set groups = resolveGroups(actor, context); - if (isGroupOwner(groups, owners)) { + if (isUserOwner(resolvedActorSpec, owners)) { return true; } - return false; + final Set groups = resolveGroups(resolvedActorSpec, context); + + return isGroupOwner(groups, owners); } - private boolean isUserOwner(Urn actor, Set owners) { - return owners.contains(actor.toString()); + private boolean isUserOwner(final ResolvedEntitySpec resolvedActorSpec, Set owners) { + return owners.contains(resolvedActorSpec.getSpec().getEntity()); } - private boolean isGroupOwner(Set groups, Set owners) { - return groups.stream().anyMatch(group -> owners.contains(group.toString())); + private boolean isGroupOwner(Set groups, Set owners) { + return groups.stream().anyMatch(owners::contains); } - private boolean isRoleMatch(final Urn actor, final DataHubActorFilter actorFilter, + private boolean isRoleMatch( + final ResolvedEntitySpec resolvedActorSpec, + final DataHubActorFilter actorFilter, final PolicyEvaluationContext context) { // Can immediately return false if the actor filter does not have any roles if (!actorFilter.hasRoles()) { return false; } // If the actor has a matching "Role" in the actor filter, return true immediately. - Set actorRoles = resolveRoles(actor, context); + Set actorRoles = resolveRoles(resolvedActorSpec, context); return Objects.requireNonNull(actorFilter.getRoles()) .stream() .anyMatch(actorRoles::contains); } - private Set resolveRoles(Urn actor, PolicyEvaluationContext context) { + private Set resolveRoles(final ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) { if (context.roles != null) { return context.roles; } + String actor = resolvedActorSpec.getSpec().getEntity(); + Set roles = new HashSet<>(); final EnvelopedAspectMap aspectMap; try { - final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor), - Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actor); + Urn actorUrn = Urn.createFromString(actor); + final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actorUrn), + Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actorUrn); if (corpUser == null || !corpUser.hasAspects()) { return roles; } @@ -403,62 +390,25 @@ private Set resolveRoles(Urn actor, PolicyEvaluationContext context) { return roles; } - private Set resolveGroups(Urn actor, PolicyEvaluationContext context) { + private Set resolveGroups(ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) { if (context.groups != null) { return context.groups; } - Set groups = new HashSet<>(); - final EnvelopedAspectMap aspectMap; - - try { - final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor), - ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication) - .get(actor); - if (corpUser == null || !corpUser.hasAspects()) { - return groups; - } - aspectMap = corpUser.getAspects(); - } catch (Exception e) { - throw new RuntimeException(String.format("Failed to fetch %s and %s for urn %s", GROUP_MEMBERSHIP_ASPECT_NAME, - NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME, actor), e); - } - - Optional maybeGroupMembership = resolveGroupMembership(aspectMap); - maybeGroupMembership.ifPresent(groupMembership -> groups.addAll(groupMembership.getGroups())); - - Optional maybeNativeGroupMembership = resolveNativeGroupMembership(aspectMap); - maybeNativeGroupMembership.ifPresent( - nativeGroupMembership -> groups.addAll(nativeGroupMembership.getNativeGroups())); + Set groups = resolvedActorSpec.getGroupMembership(); context.setGroups(groups); // Cache the groups. return groups; } - // TODO: Optimization - Cache the group membership. Refresh periodically. - private Optional resolveGroupMembership(final EnvelopedAspectMap aspectMap) { - if (aspectMap.containsKey(GROUP_MEMBERSHIP_ASPECT_NAME)) { - return Optional.of(new GroupMembership(aspectMap.get(GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data())); - } - return Optional.empty(); - } - - private Optional resolveNativeGroupMembership(final EnvelopedAspectMap aspectMap) { - if (aspectMap.containsKey(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) { - return Optional.of( - new NativeGroupMembership(aspectMap.get(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data())); - } - return Optional.empty(); - } - /** * Class used to store state across a single Policy evaluation. */ static class PolicyEvaluationContext { - private Set groups; + private Set groups; private Set roles; - public void setGroups(Set groups) { + public void setGroups(Set groups) { this.groups = groups; } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java new file mode 100644 index 0000000000000..cbb237654e969 --- /dev/null +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java @@ -0,0 +1,70 @@ +package com.datahub.authorization.fieldresolverprovider; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.datahub.authorization.FieldResolver; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.client.EntityClient; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.metadata.Constants.*; + +/** + * Provides field resolver for domain given resourceSpec + */ +@Slf4j +@RequiredArgsConstructor +public class DataPlatformInstanceFieldResolverProvider implements EntityFieldResolverProvider { + + private final EntityClient _entityClient; + private final Authentication _systemAuthentication; + + @Override + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.DATA_PLATFORM_INSTANCE); + } + + @Override + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromFunction(entitySpec, this::getDataPlatformInstance); + } + + private FieldResolver.FieldValue getDataPlatformInstance(EntitySpec entitySpec) { + Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity()); + // In the case that the entity is a platform instance, the associated platform instance entity is the instance itself + if (entityUrn.getEntityType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME)) { + return FieldResolver.FieldValue.builder() + .values(Collections.singleton(entityUrn.toString())) + .build(); + } + + EnvelopedAspect dataPlatformInstanceAspect; + try { + EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn, + Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME), _systemAuthentication); + if (response == null || !response.getAspects().containsKey(DATA_PLATFORM_INSTANCE_ASPECT_NAME)) { + return FieldResolver.emptyFieldValue(); + } + dataPlatformInstanceAspect = response.getAspects().get(DATA_PLATFORM_INSTANCE_ASPECT_NAME); + } catch (Exception e) { + log.error("Error while retrieving platform instance aspect for urn {}", entityUrn, e); + return FieldResolver.emptyFieldValue(); + } + DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataPlatformInstanceAspect.getValue().data()); + if (dataPlatformInstance.getInstance() == null) { + return FieldResolver.emptyFieldValue(); + } + return FieldResolver.FieldValue.builder() + .values(Collections.singleton(Objects.requireNonNull(dataPlatformInstance.getInstance()).toString())) + .build(); + } +} \ No newline at end of file diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java index 68c1dd4f644e5..15d821b75c0bd 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java @@ -2,8 +2,8 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.FieldResolver; -import com.datahub.authorization.ResourceFieldType; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.domain.DomainProperties; @@ -14,6 +14,7 @@ import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -27,23 +28,23 @@ /** - * Provides field resolver for domain given resourceSpec + * Provides field resolver for domain given entitySpec */ @Slf4j @RequiredArgsConstructor -public class DomainFieldResolverProvider implements ResourceFieldResolverProvider { +public class DomainFieldResolverProvider implements EntityFieldResolverProvider { private final EntityClient _entityClient; private final Authentication _systemAuthentication; @Override - public ResourceFieldType getFieldType() { - return ResourceFieldType.DOMAIN; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.DOMAIN); } @Override - public FieldResolver getFieldResolver(ResourceSpec resourceSpec) { - return FieldResolver.getResolverFromFunction(resourceSpec, this::getDomains); + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromFunction(entitySpec, this::getDomains); } private Set getBatchedParentDomains(@Nonnull final Set urns) { @@ -78,8 +79,8 @@ private Set getBatchedParentDomains(@Nonnull final Set urns) { return parentUrns; } - private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) { - final Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource()); + private FieldResolver.FieldValue getDomains(EntitySpec entitySpec) { + final Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity()); // In the case that the entity is a domain, the associated domain is the domain itself if (entityUrn.getEntityType().equals(DOMAIN_ENTITY_NAME)) { return FieldResolver.FieldValue.builder() diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java new file mode 100644 index 0000000000000..227d403a9cd1d --- /dev/null +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java @@ -0,0 +1,24 @@ +package com.datahub.authorization.fieldresolverprovider; + +import com.datahub.authorization.FieldResolver; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import java.util.List; + + +/** + * Base class for defining a class that provides the field resolver for the given field type + */ +public interface EntityFieldResolverProvider { + + /** + * List of fields that this hydrator is hydrating. + * @return + */ + List getFieldTypes(); + + /** + * Return resolver for fetching the field values given the entity + */ + FieldResolver getFieldResolver(EntitySpec entitySpec); +} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java index 58e3d78ce8c3b..addac84c68b18 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java @@ -1,22 +1,25 @@ package com.datahub.authorization.fieldresolverprovider; import com.datahub.authorization.FieldResolver; -import com.datahub.authorization.ResourceFieldType; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; /** - * Provides field resolver for entity type given resourceSpec + * Provides field resolver for entity type given entitySpec */ -public class EntityTypeFieldResolverProvider implements ResourceFieldResolverProvider { +public class EntityTypeFieldResolverProvider implements EntityFieldResolverProvider { + @Override - public ResourceFieldType getFieldType() { - return ResourceFieldType.RESOURCE_TYPE; + public List getFieldTypes() { + return ImmutableList.of(EntityFieldType.TYPE, EntityFieldType.RESOURCE_TYPE); } @Override - public FieldResolver getFieldResolver(ResourceSpec resourceSpec) { - return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getType())); + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getType())); } } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java index b9d98f1dcbac0..32960de687839 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java @@ -1,22 +1,25 @@ package com.datahub.authorization.fieldresolverprovider; import com.datahub.authorization.FieldResolver; -import com.datahub.authorization.ResourceFieldType; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; /** - * Provides field resolver for entity urn given resourceSpec + * Provides field resolver for entity urn given entitySpec */ -public class EntityUrnFieldResolverProvider implements ResourceFieldResolverProvider { +public class EntityUrnFieldResolverProvider implements EntityFieldResolverProvider { + @Override - public ResourceFieldType getFieldType() { - return ResourceFieldType.RESOURCE_URN; + public List getFieldTypes() { + return ImmutableList.of(EntityFieldType.URN, EntityFieldType.RESOURCE_URN); } @Override - public FieldResolver getFieldResolver(ResourceSpec resourceSpec) { - return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getResource())); + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getEntity())); } } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java new file mode 100644 index 0000000000000..b1202d9f4bbd3 --- /dev/null +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java @@ -0,0 +1,79 @@ +package com.datahub.authorization.fieldresolverprovider; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.FieldResolver; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.identity.NativeGroupMembership; +import com.linkedin.metadata.Constants; +import com.linkedin.identity.GroupMembership; +import java.util.Collections; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME; +import static com.linkedin.metadata.Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME; + + +/** + * Provides field resolver for owners given entitySpec + */ +@Slf4j +@RequiredArgsConstructor +public class GroupMembershipFieldResolverProvider implements EntityFieldResolverProvider { + + private final EntityClient _entityClient; + private final Authentication _systemAuthentication; + + @Override + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.GROUP_MEMBERSHIP); + } + + @Override + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromFunction(entitySpec, this::getGroupMembership); + } + + private FieldResolver.FieldValue getGroupMembership(EntitySpec entitySpec) { + Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity()); + EnvelopedAspect groupMembershipAspect; + EnvelopedAspect nativeGroupMembershipAspect; + List groups = new ArrayList<>(); + try { + EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn, + ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication); + if (response == null + || !(response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME) + || response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME))) { + return FieldResolver.emptyFieldValue(); + } + if (response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME)) { + groupMembershipAspect = response.getAspects().get(Constants.GROUP_MEMBERSHIP_ASPECT_NAME); + GroupMembership groupMembership = new GroupMembership(groupMembershipAspect.getValue().data()); + groups.addAll(groupMembership.getGroups()); + } + if (response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) { + nativeGroupMembershipAspect = response.getAspects().get(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME); + NativeGroupMembership nativeGroupMembership = new NativeGroupMembership(nativeGroupMembershipAspect.getValue().data()); + groups.addAll(nativeGroupMembership.getNativeGroups()); + } + } catch (Exception e) { + log.error("Error while retrieving group membership aspect for urn {}", entityUrn, e); + return FieldResolver.emptyFieldValue(); + } + return FieldResolver.FieldValue.builder() + .values(groups.stream().map(Urn::toString).collect(Collectors.toSet())) + .build(); + } +} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java index 20ec6a09377c8..3c27f9e6ce8d7 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java @@ -2,8 +2,8 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.FieldResolver; -import com.datahub.authorization.ResourceFieldType; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; import com.linkedin.common.Ownership; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -12,33 +12,34 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import java.util.Collections; +import java.util.List; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; /** - * Provides field resolver for owners given resourceSpec + * Provides field resolver for owners given entitySpec */ @Slf4j @RequiredArgsConstructor -public class OwnerFieldResolverProvider implements ResourceFieldResolverProvider { +public class OwnerFieldResolverProvider implements EntityFieldResolverProvider { private final EntityClient _entityClient; private final Authentication _systemAuthentication; @Override - public ResourceFieldType getFieldType() { - return ResourceFieldType.OWNER; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.OWNER); } @Override - public FieldResolver getFieldResolver(ResourceSpec resourceSpec) { - return FieldResolver.getResolverFromFunction(resourceSpec, this::getOwners); + public FieldResolver getFieldResolver(EntitySpec entitySpec) { + return FieldResolver.getResolverFromFunction(entitySpec, this::getOwners); } - private FieldResolver.FieldValue getOwners(ResourceSpec resourceSpec) { - Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource()); + private FieldResolver.FieldValue getOwners(EntitySpec entitySpec) { + Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity()); EnvelopedAspect ownershipAspect; try { EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn, diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java deleted file mode 100644 index 4ba4200f8035e..0000000000000 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.datahub.authorization.fieldresolverprovider; - -import com.datahub.authorization.FieldResolver; -import com.datahub.authorization.ResourceFieldType; -import com.datahub.authorization.ResourceSpec; - - -/** - * Base class for defining a class that provides the field resolver for the given field type - */ -public interface ResourceFieldResolverProvider { - - /** - * Field that this hydrator is hydrating - */ - ResourceFieldType getFieldType(); - - /** - * Return resolver for fetching the field values given the resource - */ - FieldResolver getFieldResolver(ResourceSpec resourceSpec); -} diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index 2e48123fb1813..24ecfa6fefc85 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -158,7 +158,7 @@ public void testSystemAuthentication() throws Exception { // Validate that the System Actor is authorized, even if there is no policy. - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID).toUrnStr(), @@ -172,7 +172,7 @@ public void testSystemAuthentication() throws Exception { @Test public void testAuthorizeGranted() throws Exception { - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( "urn:li:corpuser:test", @@ -186,7 +186,7 @@ public void testAuthorizeGranted() throws Exception { @Test public void testAuthorizeNotGranted() throws Exception { - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); // Policy for this privilege is inactive. AuthorizationRequest request = new AuthorizationRequest( @@ -203,7 +203,7 @@ public void testAllowAllMode() throws Exception { _dataHubAuthorizer.setMode(DataHubAuthorizer.AuthorizationMode.ALLOW_ALL); - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); // Policy for this privilege is inactive. AuthorizationRequest request = new AuthorizationRequest( @@ -219,7 +219,7 @@ public void testAllowAllMode() throws Exception { public void testInvalidateCache() throws Exception { // First make sure that the default policies are as expected. - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( "urn:li:corpuser:test", @@ -250,7 +250,7 @@ public void testInvalidateCache() throws Exception { public void testAuthorizedActorsActivePolicy() throws Exception { final AuthorizedActors actors = _dataHubAuthorizer.authorizedActors("EDIT_ENTITY_TAGS", // Should be inside the active policy. - Optional.of(new ResourceSpec("dataset", "urn:li:dataset:1"))); + Optional.of(new EntitySpec("dataset", "urn:li:dataset:1"))); assertTrue(actors.isAllUsers()); assertTrue(actors.isAllGroups()); @@ -272,7 +272,7 @@ public void testAuthorizedActorsActivePolicy() throws Exception { @Test public void testAuthorizationOnDomainWithPrivilegeIsAllowed() { - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( "urn:li:corpuser:test", @@ -285,7 +285,7 @@ public void testAuthorizationOnDomainWithPrivilegeIsAllowed() { @Test public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() { - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( "urn:li:corpuser:test", @@ -298,7 +298,7 @@ public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() { @Test public void testAuthorizationOnDomainWithoutPrivilegeIsDenied() { - ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test"); + EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test"); AuthorizationRequest request = new AuthorizationRequest( "urn:li:corpuser:test", @@ -334,7 +334,7 @@ private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List p resourceFilter.setType("dataset"); if (domain != null) { - resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(ResourceFieldType.DOMAIN, Collections.singletonList(domain.toString())))); + resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(EntityFieldType.DOMAIN, Collections.singletonList(domain.toString())))); } dataHubPolicyInfo.setResources(resourceFilter); @@ -398,6 +398,6 @@ private Map createDomainPropertiesBatchResponse(@Nullable f } private AuthorizerContext createAuthorizerContext(final Authentication systemAuthentication, final EntityClient entityClient) { - return new AuthorizerContext(Collections.emptyMap(), new DefaultResourceSpecResolver(systemAuthentication, entityClient)); + return new AuthorizerContext(Collections.emptyMap(), new DefaultEntitySpecResolver(systemAuthentication, entityClient)); } } diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java index 99d8fee309d91..be8c948f8ef89 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java @@ -11,15 +11,12 @@ import com.linkedin.common.OwnershipType; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringArray; import com.linkedin.entity.Aspect; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; -import com.linkedin.identity.CorpUserInfo; -import com.linkedin.identity.GroupMembership; import com.linkedin.identity.RoleMembership; import com.linkedin.metadata.Constants; import com.linkedin.policy.DataHubActorFilter; @@ -45,22 +42,19 @@ public class PolicyEngineTest { private static final String AUTHORIZED_PRINCIPAL = "urn:li:corpuser:datahub"; private static final String UNAUTHORIZED_PRINCIPAL = "urn:li:corpuser:unauthorized"; - private static final String AUTHORIZED_GROUP = "urn:li:corpGroup:authorizedGroup"; - private static final String RESOURCE_URN = "urn:li:dataset:test"; - private static final String DOMAIN_URN = "urn:li:domain:domain1"; - private static final String OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__technical_owner"; - private static final String OTHER_OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__data_steward"; private EntityClient _entityClient; private PolicyEngine _policyEngine; private Urn authorizedUserUrn; + private ResolvedEntitySpec resolvedAuthorizedUserSpec; private Urn unauthorizedUserUrn; + private ResolvedEntitySpec resolvedUnauthorizedUserSpec; private Urn resourceUrn; @BeforeMethod @@ -68,29 +62,34 @@ public void setupTest() throws Exception { _entityClient = Mockito.mock(EntityClient.class); _policyEngine = new PolicyEngine(Mockito.mock(Authentication.class), _entityClient); - // Init mocks. - EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse(); authorizedUserUrn = Urn.createFromString(AUTHORIZED_PRINCIPAL); + resolvedAuthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, AUTHORIZED_PRINCIPAL, + Collections.emptySet(), Collections.emptySet(), Collections.singleton(AUTHORIZED_GROUP)); + unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL); + resolvedUnauthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, UNAUTHORIZED_PRINCIPAL); + resourceUrn = Urn.createFromString(RESOURCE_URN); + + // Init role membership mocks. + EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse(); authorizedEntityResponse.setUrn(authorizedUserUrn); Map authorizedEntityResponseMap = Collections.singletonMap(authorizedUserUrn, authorizedEntityResponse); - when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), any(), - any())).thenReturn(authorizedEntityResponseMap); + when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), + eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(authorizedEntityResponseMap); EntityResponse unauthorizedEntityResponse = createUnauthorizedEntityResponse(); - unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL); unauthorizedEntityResponse.setUrn(unauthorizedUserUrn); Map unauthorizedEntityResponseMap = Collections.singletonMap(unauthorizedUserUrn, unauthorizedEntityResponse); - when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)), any(), - any())).thenReturn(unauthorizedEntityResponseMap); + when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)), + eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(unauthorizedEntityResponseMap); + // Init ownership type mocks. EntityResponse entityResponse = new EntityResponse(); EnvelopedAspectMap envelopedAspectMap = new EnvelopedAspectMap(); envelopedAspectMap.put(OWNERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new com.linkedin.entity.Aspect(createOwnershipAspect(true, true).data()))); entityResponse.setAspects(envelopedAspectMap); - resourceUrn = Urn.createFromString(RESOURCE_URN); Map mockMap = mock(Map.class); when(_entityClient.batchGetV2(any(), eq(Collections.singleton(resourceUrn)), eq(Collections.singleton(OWNERSHIP_ASPECT_NAME)), any())).thenReturn(mockMap); @@ -120,9 +119,9 @@ public void testEvaluatePolicyInactivePolicyState() { resourceFilter.setAllResources(true); resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result.isGranted()); @@ -149,9 +148,9 @@ public void testEvaluatePolicyPrivilegeFilterNoMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_OWNERS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_OWNERS", Optional.of(resourceSpec)); assertFalse(result.isGranted()); @@ -176,7 +175,8 @@ public void testEvaluatePlatformPolicyPrivilegeFilterMatch() throws Exception { dataHubPolicyInfo.setActors(actorFilter); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "MANAGE_POLICIES", Optional.empty()); + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "MANAGE_POLICIES", + Optional.empty()); assertTrue(result.isGranted()); // Verify no network calls @@ -208,10 +208,10 @@ public void testEvaluatePolicyActorFilterUserMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert Authorized user can edit entity tags. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); @@ -245,10 +245,10 @@ public void testEvaluatePolicyActorFilterUserNoMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert unauthorized user cannot edit entity tags. PolicyEngine.PolicyEvaluationResult result2 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, "urn:li:corpuser:test", "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, buildEntityResolvers(CORP_USER_ENTITY_NAME, "urn:li:corpuser:test"), "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result2.isGranted()); @@ -270,7 +270,7 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception { final DataHubActorFilter actorFilter = new DataHubActorFilter(); final UrnArray groupsUrnArray = new UrnArray(); - groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup")); + groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP)); actorFilter.setGroups(groupsUrnArray); actorFilter.setResourceOwners(false); actorFilter.setAllUsers(false); @@ -282,16 +282,15 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert authorized user can edit entity tags, because of group membership. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); - // Verify we are only calling for group during these requests. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), - any(), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -307,7 +306,7 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception { final DataHubActorFilter actorFilter = new DataHubActorFilter(); final UrnArray groupsUrnArray = new UrnArray(); - groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup")); + groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP)); actorFilter.setGroups(groupsUrnArray); actorFilter.setResourceOwners(false); actorFilter.setAllUsers(false); @@ -319,16 +318,15 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert unauthorized user cannot edit entity tags. PolicyEngine.PolicyEvaluationResult result2 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result2.isGranted()); - // Verify we are only calling for group during these requests. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), - eq(Collections.singleton(unauthorizedUserUrn)), any(), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -357,17 +355,17 @@ public void testEvaluatePolicyActorFilterRoleMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert authorized user can edit entity tags. PolicyEngine.PolicyEvaluationResult authorizedResult = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(authorizedResult.isGranted()); // Verify we are only calling for roles during these requests. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), - any(), any()); + verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), + eq(Collections.singleton(authorizedUserUrn)), any(), any()); } @Test @@ -396,10 +394,10 @@ public void testEvaluatePolicyActorFilterNoRoleMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert authorized user can edit entity tags. PolicyEngine.PolicyEvaluationResult unauthorizedResult = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(unauthorizedResult.isGranted()); @@ -431,16 +429,16 @@ public void testEvaluatePolicyActorFilterAllUsersMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert authorized user can edit entity tags, because of group membership. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); // Assert unauthorized user cannot edit entity tags. PolicyEngine.PolicyEvaluationResult result2 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result2.isGranted()); @@ -470,24 +468,21 @@ public void testEvaluatePolicyActorFilterAllGroupsMatch() throws Exception { resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert authorized user can edit entity tags, because of group membership. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); // Assert unauthorized user cannot edit entity tags. PolicyEngine.PolicyEvaluationResult result2 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); - assertTrue(result2.isGranted()); + assertFalse(result2.isGranted()); - // Verify we are only calling for group during these requests. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), - any(), any()); - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), - eq(Collections.singleton(unauthorizedUserUrn)), any(), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -519,17 +514,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersMatch() throws Except when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)), any())).thenReturn(entityResponse); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet()); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(), + Collections.emptySet()); // Assert authorized user can edit entity tags, because he is a user owner. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); - // Ensure no calls for group membership. - verify(_entityClient, times(0)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), - eq(null), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -562,13 +557,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeMatch() throws Ex when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)), any())).thenReturn(entityResponse); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet()); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(), + Collections.emptySet()); PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); + + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -601,13 +600,16 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeNoMatch() throws when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)), any())).thenReturn(entityResponse); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet()); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(), Collections.emptySet()); PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result1.isGranted()); + + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -639,17 +641,17 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersMatch() throws Excep when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)), any())).thenReturn(entityResponse); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet()); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet(), + Collections.emptySet()); // Assert authorized user can edit entity tags, because he is a user owner. PolicyEngine.PolicyEvaluationResult result1 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result1.isGranted()); - // Ensure that caching of groups is working with 1 call to entity client for each principal. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), - any(), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -673,16 +675,15 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersNoMatch() throws Exc resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); // Assert unauthorized user cannot edit entity tags. PolicyEngine.PolicyEvaluationResult result2 = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result2.isGranted()); - // Ensure that caching of groups is working with 1 call to entity client for each principal. - verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), - eq(Collections.singleton(unauthorizedUserUrn)), any(), any()); + // Verify no network calls + verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any()); } @Test @@ -706,10 +707,10 @@ public void testEvaluatePolicyResourceFilterAllResourcesMatch() throws Exception resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_. + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_. PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result.isGranted()); @@ -738,9 +739,9 @@ public void testEvaluatePolicyResourceFilterAllResourcesNoMatch() throws Excepti resourceFilter.setType("dataset"); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("chart", RESOURCE_URN); // Notice: Not a dataset. + ResolvedEntitySpec resourceSpec = buildEntityResolvers("chart", RESOURCE_URN); // Notice: Not a dataset. PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result.isGranted()); @@ -773,9 +774,9 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchLegacy() throws resourceFilter.setResources(resourceUrns); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result.isGranted()); @@ -801,13 +802,13 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatch() throws Excep final DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); resourceFilter.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), - ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN)))); + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), + EntityFieldType.URN, Collections.singletonList(RESOURCE_URN)))); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN); + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result.isGranted()); @@ -833,14 +834,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatch() throws Exc final DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); resourceFilter.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), - ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN)))); + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), + EntityFieldType.URN, Collections.singletonList(RESOURCE_URN)))); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy. + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy. PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result.isGranted()); @@ -866,14 +867,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchDomain() throws final DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); resourceFilter.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN, + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN, Collections.singletonList(DOMAIN_URN)))); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN)); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN), Collections.emptySet()); PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertTrue(result.isGranted()); @@ -899,14 +900,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatchDomain() thro final DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); resourceFilter.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN, + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN, Collections.singletonList(DOMAIN_URN)))); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), - Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), + Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match PolicyEngine.PolicyEvaluationResult result = - _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS", + _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS", Optional.of(resourceSpec)); assertFalse(result.isGranted()); @@ -933,7 +934,7 @@ public void testGetGrantedPrivileges() throws Exception { final DataHubResourceFilter resourceFilter1 = new DataHubResourceFilter(); resourceFilter1.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN, + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN, Collections.singletonList(DOMAIN_URN)))); dataHubPolicyInfo1.setResources(resourceFilter1); @@ -954,8 +955,8 @@ public void testGetGrantedPrivileges() throws Exception { final DataHubResourceFilter resourceFilter2 = new DataHubResourceFilter(); resourceFilter2.setFilter(FilterUtils.newFilter( - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), - ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN)))); + ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), + EntityFieldType.URN, Collections.singletonList(RESOURCE_URN)))); dataHubPolicyInfo2.setResources(resourceFilter2); // Policy 3, match dataset type and owner (legacy resource filter) @@ -981,25 +982,25 @@ public void testGetGrantedPrivileges() throws Exception { final List policies = ImmutableList.of(dataHubPolicyInfo1, dataHubPolicyInfo2, dataHubPolicyInfo3); - assertEquals(_policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.empty()), + assertEquals(_policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.empty()), Collections.emptyList()); - ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), - Collections.singleton(DOMAIN_URN)); // Everything matches + ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), + Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Everything matches assertEquals( - _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)), + _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)), ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2")); - resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), - Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match + resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), + Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match assertEquals( - _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)), + _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)), ImmutableList.of("PRIVILEGE_2_1", "PRIVILEGE_2_2")); - resourceSpec = buildResourceResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(), - Collections.singleton(DOMAIN_URN)); // Resource doesn't match + resourceSpec = buildEntityResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(), + Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource doesn't match assertEquals( - _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)), + _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)), ImmutableList.of("PRIVILEGE_1")); final EntityResponse entityResponse = new EntityResponse(); @@ -1008,16 +1009,16 @@ public void testGetGrantedPrivileges() throws Exception { entityResponse.setAspects(aspectMap); when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)), any())).thenReturn(entityResponse); - resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL), - Collections.singleton(DOMAIN_URN)); // Is owner + resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL), + Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Is owner assertEquals( - _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)), + _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)), ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2", "PRIVILEGE_3")); - resourceSpec = buildResourceResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL), - Collections.singleton(DOMAIN_URN)); // Resource type doesn't match + resourceSpec = buildEntityResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL), + Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource type doesn't match assertEquals( - _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)), + _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)), Collections.emptyList()); } @@ -1050,9 +1051,9 @@ public void testGetMatchingActorsResourceMatch() throws Exception { resourceFilter.setResources(resourceUrns); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP), - Collections.emptySet()); + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP), + Collections.emptySet(), Collections.emptySet()); PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec)); assertTrue(actors.allUsers()); @@ -1101,8 +1102,8 @@ public void testGetMatchingActorsNoResourceMatch() throws Exception { resourceFilter.setResources(resourceUrns); dataHubPolicyInfo.setResources(resourceFilter); - ResolvedResourceSpec resourceSpec = - buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy. + ResolvedEntitySpec resourceSpec = + buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy. PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec)); assertFalse(actors.allUsers()); @@ -1155,21 +1156,6 @@ private EntityResponse createAuthorizedEntityResponse() throws URISyntaxExceptio final EntityResponse entityResponse = new EntityResponse(); final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); - final CorpUserInfo userInfo = new CorpUserInfo(); - userInfo.setActive(true); - userInfo.setFullName("Data Hub"); - userInfo.setFirstName("Data"); - userInfo.setLastName("Hub"); - userInfo.setEmail("datahub@gmail.com"); - userInfo.setTitle("Admin"); - aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data()))); - - final GroupMembership groupsAspect = new GroupMembership(); - final UrnArray groups = new UrnArray(); - groups.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup")); - groupsAspect.setGroups(groups); - aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data()))); - final RoleMembership rolesAspect = new RoleMembership(); final UrnArray roles = new UrnArray(); roles.add(Urn.createFromString("urn:li:dataHubRole:admin")); @@ -1184,21 +1170,6 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept final EntityResponse entityResponse = new EntityResponse(); final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); - final CorpUserInfo userInfo = new CorpUserInfo(); - userInfo.setActive(true); - userInfo.setFullName("Unauthorized User"); - userInfo.setFirstName("Unauthorized"); - userInfo.setLastName("User"); - userInfo.setEmail("Unauth"); - userInfo.setTitle("Engineer"); - aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data()))); - - final GroupMembership groupsAspect = new GroupMembership(); - final UrnArray groups = new UrnArray(); - groups.add(Urn.createFromString("urn:li:corpGroup:unauthorizedGroup")); - groupsAspect.setGroups(groups); - aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data()))); - final RoleMembership rolesAspect = new RoleMembership(); final UrnArray roles = new UrnArray(); roles.add(Urn.createFromString("urn:li:dataHubRole:reader")); @@ -1209,17 +1180,18 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept return entityResponse; } - public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn) { - return buildResourceResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet()); + public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn) { + return buildEntityResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); } - public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn, Set owners, - Set domains) { - return new ResolvedResourceSpec(new ResourceSpec(entityType, entityUrn), - ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, - FieldResolver.getResolverFromValues(Collections.singleton(entityType)), ResourceFieldType.RESOURCE_URN, - FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), ResourceFieldType.OWNER, - FieldResolver.getResolverFromValues(owners), ResourceFieldType.DOMAIN, - FieldResolver.getResolverFromValues(domains))); + public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn, Set owners, + Set domains, Set groups) { + return new ResolvedEntitySpec(new EntitySpec(entityType, entityUrn), + ImmutableMap.of(EntityFieldType.TYPE, + FieldResolver.getResolverFromValues(Collections.singleton(entityType)), EntityFieldType.URN, + FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), EntityFieldType.OWNER, + FieldResolver.getResolverFromValues(owners), EntityFieldType.DOMAIN, + FieldResolver.getResolverFromValues(domains), EntityFieldType.GROUP_MEMBERSHIP, + FieldResolver.getResolverFromValues(groups))); } } diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java new file mode 100644 index 0000000000000..5c7d87f1c05a9 --- /dev/null +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java @@ -0,0 +1,193 @@ +package com.datahub.authorization.fieldresolverprovider; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; +import java.util.Collections; +import java.util.Set; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataPlatformInstanceFieldResolverProviderTest { + + private static final String DATA_PLATFORM_INSTANCE_URN = + "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)"; + private static final String RESOURCE_URN = + "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.testDataset,PROD)"; + private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN); + + @Mock + private EntityClient entityClientMock; + @Mock + private Authentication systemAuthenticationMock; + + private DataPlatformInstanceFieldResolverProvider dataPlatformInstanceFieldResolverProvider; + + @BeforeMethod + public void setup() { + MockitoAnnotations.initMocks(this); + dataPlatformInstanceFieldResolverProvider = + new DataPlatformInstanceFieldResolverProvider(entityClientMock, systemAuthenticationMock); + } + + @Test + public void shouldReturnDataPlatformInstanceType() { + assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldTypes().get(0)); + } + + @Test + public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() { + var resourceSpec = new EntitySpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(resourceSpec); + + assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues()); + verifyZeroInteractions(entityClientMock); + } + + @Test + public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException { + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(null); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnEmptyFieldValueWhenResourceHasNoDataPlatformInstance() + throws RemoteInvocationException, URISyntaxException { + var entityResponseMock = mock(EntityResponse.class); + when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap()); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException { + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenThrow(new RemoteInvocationException()); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnEmptyFieldValueWhenDataPlatformInstanceHasNoInstance() + throws RemoteInvocationException, URISyntaxException { + + var dataPlatform = new DataPlatformInstance() + .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3")); + var entityResponseMock = mock(EntityResponse.class); + var envelopedAspectMap = new EnvelopedAspectMap(); + envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(dataPlatform.data()))); + when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnFieldValueWithDataPlatformInstanceOfTheResource() + throws RemoteInvocationException, URISyntaxException { + + var dataPlatformInstance = new DataPlatformInstance() + .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3")) + .setInstance(Urn.createFromString(DATA_PLATFORM_INSTANCE_URN)); + var entityResponseMock = mock(EntityResponse.class); + var envelopedAspectMap = new EnvelopedAspectMap(); + envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(dataPlatformInstance.data()))); + when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } +} diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java new file mode 100644 index 0000000000000..af547f14cd3fc --- /dev/null +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java @@ -0,0 +1,212 @@ +package com.datahub.authorization.fieldresolverprovider; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.EntityFieldType; +import com.datahub.authorization.EntitySpec; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.identity.GroupMembership; +import com.linkedin.identity.NativeGroupMembership; +import com.linkedin.r2.RemoteInvocationException; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.net.URISyntaxException; +import java.util.Set; + +import static com.linkedin.metadata.Constants.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +public class GroupMembershipFieldResolverProviderTest { + + private static final String CORPGROUP_URN = "urn:li:corpGroup:groupname"; + private static final String NATIVE_CORPGROUP_URN = "urn:li:corpGroup:nativegroupname"; + private static final String RESOURCE_URN = "urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)"; + private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN); + + @Mock + private EntityClient entityClientMock; + @Mock + private Authentication systemAuthenticationMock; + + private GroupMembershipFieldResolverProvider groupMembershipFieldResolverProvider; + + @BeforeMethod + public void setup() { + MockitoAnnotations.initMocks(this); + groupMembershipFieldResolverProvider = + new GroupMembershipFieldResolverProvider(entityClientMock, systemAuthenticationMock); + } + + @Test + public void shouldReturnGroupsMembershipType() { + assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldTypes().get(0)); + } + + @Test + public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException { + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(null); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnEmptyFieldValueWhenResourceDoesNotBelongToAnyGroup() + throws RemoteInvocationException, URISyntaxException { + var entityResponseMock = mock(EntityResponse.class); + when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap()); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException { + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenThrow(new RemoteInvocationException()); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnFieldValueWithOnlyGroupsOfTheResource() + throws RemoteInvocationException, URISyntaxException { + + var groupMembership = new GroupMembership().setGroups( + new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN)))); + var entityResponseMock = mock(EntityResponse.class); + var envelopedAspectMap = new EnvelopedAspectMap(); + envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(groupMembership.data()))); + when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertEquals(Set.of(CORPGROUP_URN), result.getFieldValuesFuture().join().getValues()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnFieldValueWithOnlyNativeGroupsOfTheResource() + throws RemoteInvocationException, URISyntaxException { + + var nativeGroupMembership = new NativeGroupMembership().setNativeGroups( + new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN)))); + var entityResponseMock = mock(EntityResponse.class); + var envelopedAspectMap = new EnvelopedAspectMap(); + envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data()))); + when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertEquals(Set.of(NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } + + @Test + public void shouldReturnFieldValueWithGroupsAndNativeGroupsOfTheResource() + throws RemoteInvocationException, URISyntaxException { + + var groupMembership = new GroupMembership().setGroups( + new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN)))); + var nativeGroupMembership = new NativeGroupMembership().setNativeGroups( + new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN)))); + var entityResponseMock = mock(EntityResponse.class); + var envelopedAspectMap = new EnvelopedAspectMap(); + envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(groupMembership.data()))); + envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data()))); + when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap); + when(entityClientMock.getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + )).thenReturn(entityResponseMock); + + var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC); + + assertEquals(Set.of(CORPGROUP_URN, NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues()); + verify(entityClientMock, times(1)).getV2( + eq(DATASET_ENTITY_NAME), + any(Urn.class), + eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)), + eq(systemAuthenticationMock) + ); + } +} \ No newline at end of file diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index 4dfd96ac75c6c..5d72e24748072 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -25,6 +25,8 @@ authentication: # Key used to sign new tokens. signingKey: ${DATAHUB_TOKEN_SERVICE_SIGNING_KEY:WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=} salt: ${DATAHUB_TOKEN_SERVICE_SALT:ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=} + issuer: ${DATAHUB_TOKEN_SERVICE_ISSUER:datahub-metadata-service} + signingAlgorithm: ${DATAHUB_TOKEN_SERVICE_SIGNING_ALGORITHM:HS256} # The max duration of a UI session in milliseconds. Defaults to 1 day. sessionTokenDurationMs: ${SESSION_TOKEN_DURATION_MS:86400000} @@ -276,6 +278,10 @@ bootstrap: enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones backfillBrowsePathsV2: enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate + policies: + file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json} + # eg for local file + # file: "file:///datahub/datahub-gms/resources/custom-policies.json" servlets: waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java index bf50a0c7b6473..b90257870a8b2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java @@ -2,12 +2,12 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.authorization.DataHubAuthorizer; -import com.datahub.authorization.DefaultResourceSpecResolver; +import com.datahub.authorization.DefaultEntitySpecResolver; import com.datahub.plugins.PluginConstant; import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.ResourceSpecResolver; +import com.datahub.authorization.EntitySpecResolver; import com.datahub.plugins.common.PluginConfig; import com.datahub.plugins.common.PluginPermissionManager; import com.datahub.plugins.common.PluginType; @@ -64,7 +64,7 @@ public class AuthorizerChainFactory { @Scope("singleton") @Nonnull protected AuthorizerChain getInstance() { - final ResourceSpecResolver resolver = initResolver(); + final EntitySpecResolver resolver = initResolver(); // Extract + initialize customer authorizers from application configs. final List authorizers = new ArrayList<>(initCustomAuthorizers(resolver)); @@ -79,11 +79,11 @@ protected AuthorizerChain getInstance() { return new AuthorizerChain(authorizers, dataHubAuthorizer); } - private ResourceSpecResolver initResolver() { - return new DefaultResourceSpecResolver(systemAuthentication, entityClient); + private EntitySpecResolver initResolver() { + return new DefaultEntitySpecResolver(systemAuthentication, entityClient); } - private List initCustomAuthorizers(ResourceSpecResolver resolver) { + private List initCustomAuthorizers(EntitySpecResolver resolver) { final List customAuthorizers = new ArrayList<>(); Path pluginBaseDirectory = Paths.get(configurationProvider.getDatahub().getPlugin().getAuth().getPath()); @@ -99,7 +99,7 @@ private List initCustomAuthorizers(ResourceSpecResolver resolver) { return customAuthorizers; } - private void registerAuthorizer(List customAuthorizers, ResourceSpecResolver resolver, Config config) { + private void registerAuthorizer(List customAuthorizers, EntitySpecResolver resolver, Config config) { PluginConfigFactory authorizerPluginPluginConfigFactory = new PluginConfigFactory(config); // Load only Authorizer configuration from plugin config factory List authorizers = diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java index 6b2a61882be90..d47e1a0a73401 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java @@ -23,10 +23,10 @@ public class DataHubTokenServiceFactory { @Value("${authentication.tokenService.salt:}") private String saltingKey; - @Value("${elasticsearch.tokenService.signingAlgorithm:HS256}") + @Value("${authentication.tokenService.signingAlgorithm:HS256}") private String signingAlgorithm; - @Value("${elasticsearch.tokenService.issuer:datahub-metadata-service}") + @Value("${authentication.tokenService.issuer:datahub-metadata-service}") private String issuer; /** diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java index c490f00021201..3a761bd12647e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java @@ -31,6 +31,7 @@ import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; + import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; @@ -41,6 +42,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; +import org.springframework.core.io.Resource; @Configuration @@ -89,13 +91,16 @@ public class BootstrapManagerFactory { @Value("${bootstrap.backfillBrowsePathsV2.enabled}") private Boolean _backfillBrowsePathsV2Enabled; + @Value("${bootstrap.policies.file}") + private Resource _policiesResource; + @Bean(name = "bootstrapManager") @Scope("singleton") @Nonnull protected BootstrapManager createInstance() { final IngestRootUserStep ingestRootUserStep = new IngestRootUserStep(_entityService); final IngestPoliciesStep ingestPoliciesStep = - new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer); + new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer, _policiesResource); final IngestRolesStep ingestRolesStep = new IngestRolesStep(_entityService, _entityRegistry); final IngestDataPlatformsStep ingestDataPlatformsStep = new IngestDataPlatformsStep(_entityService); final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep = diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java index 87dcfd736da40..cf29645214466 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java @@ -25,6 +25,7 @@ import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.policy.DataHubPolicyInfo; + import java.io.IOException; import java.net.URISyntaxException; import java.util.Collections; @@ -35,7 +36,8 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + import static com.linkedin.metadata.Constants.*; @@ -52,6 +54,8 @@ public class IngestPoliciesStep implements BootstrapStep { private final EntitySearchService _entitySearchService; private final SearchDocumentTransformer _searchDocumentTransformer; + private final Resource _policiesResource; + @Override public String name() { return "IngestPoliciesStep"; @@ -66,10 +70,10 @@ public void execute() throws IOException, URISyntaxException { .maxStringLength(maxSize).build()); // 0. Execute preflight check to see whether we need to ingest policies - log.info("Ingesting default access policies..."); + log.info("Ingesting default access policies from: {}...", _policiesResource); // 1. Read from the file into JSON. - final JsonNode policiesObj = mapper.readTree(new ClassPathResource("./boot/policies.json").getFile()); + final JsonNode policiesObj = mapper.readTree(_policiesResource.getFile()); if (!policiesObj.isArray()) { throw new RuntimeException( diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java index ade49c876f168..207c2284e2673 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java @@ -45,8 +45,7 @@ import io.datahubproject.openapi.util.OpenApiEntitiesUtil; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.linkedin.metadata.models.EntitySpec; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.linkedin.metadata.authorization.PoliciesConfig; import com.google.common.collect.ImmutableList; import com.datahub.authorization.AuthUtil; @@ -377,7 +376,7 @@ public ResponseEntity scroll(@Valid Boolean systemMetadata, @Valid List sort, @Valid SortOrder sortOrder, @Valid String query) { Authentication authentication = AuthenticationContext.getAuthentication(); - EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz); + com.linkedin.metadata.models.EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz); checkScrollAuthorized(authentication, entitySpec); // TODO multi-field sort @@ -410,12 +409,12 @@ public ResponseEntity scroll(@Valid Boolean systemMetadata, @Valid List> resourceSpecs = List.of(Optional.of(new ResourceSpec(entitySpec.getName(), ""))); + List> resourceSpecs = List.of(Optional.of(new EntitySpec(entitySpec.getName(), ""))); if (_restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizationChain, actorUrnStr, resourceSpecs, orGroup)) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities."); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java index 6439e2f31f7b0..898f768cf999a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java @@ -8,7 +8,7 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -93,8 +93,8 @@ public ResponseEntity getEntities( ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType()) ))); - List> resourceSpecs = entityUrns.stream() - .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + List> resourceSpecs = entityUrns.stream() + .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities."); @@ -175,8 +175,8 @@ public ResponseEntity> deleteEntities( .map(URLDecoder::decode) .map(UrnUtils::getUrn).collect(Collectors.toSet()); - List> resourceSpecs = entityUrns.stream() - .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + List> resourceSpecs = entityUrns.stream() + .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) { UnauthorizedException unauthorizedException = new UnauthorizedException(actorUrnStr + " is unauthorized to delete entities."); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java index 1e37170f37b3b..4641fed3a8610 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java @@ -8,7 +8,7 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -131,8 +131,8 @@ public ResponseEntity getRelationships( // Re-using GET_ENTITY_PRIVILEGE here as it doesn't make sense to split the privileges between these APIs. ))); - List> resourceSpecs = - Collections.singletonList(Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString()))); + List> resourceSpecs = + Collections.singletonList(Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString()))); if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to get relationships."); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java index 5a0ce2e314e1b..fbde9e8072002 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java @@ -6,7 +6,7 @@ import com.datahub.authorization.AuthorizerChain; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -67,7 +67,7 @@ public ResponseEntity> getTimeline( Urn urn = Urn.createFromString(rawUrn); Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - ResourceSpec resourceSpec = new ResourceSpec(urn.getEntityType(), rawUrn); + EntitySpec resourceSpec = new EntitySpec(urn.getEntityType(), rawUrn); DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup( ImmutableList.of(new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.GET_TIMELINE_PRIVILEGE.getType())))); if (restApiAuthorizationEnabled && !AuthUtil.isAuthorized(_authorizerChain, actorUrnStr, Optional.of(resourceSpec), orGroup)) { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 2b3e84e2df20f..21dc5a4c8a0d6 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -5,7 +5,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -27,7 +27,6 @@ import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -378,11 +377,11 @@ public static GenericAspect convertGenericAspect(@Nonnull io.datahubproject.open public static boolean authorizeProposals(List proposals, EntityService entityService, Authorizer authorizer, String actorUrnStr, DisjunctivePrivilegeGroup orGroup) { - List> resourceSpecs = proposals.stream() + List> resourceSpecs = proposals.stream() .map(proposal -> { - EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType()); + com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType()); Urn entityUrn = EntityKeyUtils.getUrnFromProposal(proposal, entitySpec.getKeyAspectSpec()); - return Optional.of(new ResourceSpec(proposal.getEntityType(), entityUrn.toString())); + return Optional.of(new EntitySpec(proposal.getEntityType(), entityUrn.toString())); }) .collect(Collectors.toList()); return AuthUtil.isAuthorizedForResources(authorizer, actorUrnStr, resourceSpecs, orGroup); @@ -513,7 +512,7 @@ public static RollbackRunResultDto mapRollbackRunResult(RollbackRunResult rollba } public static UpsertAspectRequest createStatusRemoval(Urn urn, EntityService entityService) { - EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType()); + com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType()); if (entitySpec == null || !entitySpec.getAspectSpecMap().containsKey(STATUS_ASPECT_NAME)) { throw new IllegalArgumentException("Entity type is not valid for soft deletes: " + urn.getEntityType()); } diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java index b6bc282f10b65..442ac1b0d287b 100644 --- a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java +++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java @@ -4,7 +4,7 @@ import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.AuthorizedActors; import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.PluginConstant; import com.datahub.plugins.auth.authorization.Authorizer; import java.io.BufferedReader; @@ -74,7 +74,7 @@ public AuthorizationResult authorize(@Nonnull AuthorizationRequest request) { } @Override - public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { + public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { return new AuthorizedActors("ALL", null, null, true, true); } } diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle index cb307863748c3..de6fb6690e693 100644 --- a/metadata-service/restli-servlet-impl/build.gradle +++ b/metadata-service/restli-servlet-impl/build.gradle @@ -48,7 +48,7 @@ dependencies { implementation externalDependency.dropwizardMetricsCore implementation externalDependency.dropwizardMetricsJmx - compileOnly externalDependency.lombok + implementation externalDependency.lombok implementation externalDependency.neo4jJavaDriver implementation externalDependency.opentelemetryAnnotations diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 936c8bb67e645..af76af90ce77f 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -3,7 +3,7 @@ import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -20,7 +20,6 @@ import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.restli.RestliUtil; @@ -123,7 +122,7 @@ public Task get(@Nonnull String urnStr, @QueryParam("aspect") @Option Authentication authentication = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect for " + urn); } final VersionedAspect aspect = _entityService.getVersionedAspect(urn, aspectName, version); @@ -154,7 +153,7 @@ public Task getTimeseriesAspectValues( Authentication authentication = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_TIMESERIES_ASPECT_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get timeseries aspect for " + urn); } GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse(); @@ -193,11 +192,11 @@ public Task ingestProposal( } Authentication authentication = AuthenticationContext.getAuthentication(); - EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType()); + com.linkedin.metadata.models.EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType()); Urn urn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec()); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to modify entity " + urn); } String actorUrnStr = authentication.getActor().toUrnStr(); @@ -249,7 +248,7 @@ public Task getCount(@ActionParam(PARAM_ASPECT) @Nonnull String aspectN Authentication authentication = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), - (ResourceSpec) null)) { + (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect counts."); } return _entityService.getCountAspect(aspectName, urnLike); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index 3ff22fb767676..9bab846d1bdcc 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -4,7 +4,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -123,9 +123,9 @@ public Task rollback(@ActionParam("runId") @Nonnull String run List aspectRowsToDelete; aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete, 0, ESUtils.MAX_RESULT_SIZE); Set urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet(); - List> resourceSpecs = urns.stream() + List> resourceSpecs = urns.stream() .map(UrnUtils::getUrn) - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index f6dedfb9a07c6..3ee98b3244718 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -3,7 +3,7 @@ import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -173,7 +173,7 @@ public Task get(@Nonnull String urnStr, final Urn urn = Urn.createFromString(urnStr); Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity " + urn); } @@ -198,8 +198,8 @@ public Task> batchGet(@Nonnull Set urnStrs, for (final String urnStr : urnStrs) { urns.add(Urn.createFromString(urnStr)); } - List> resourceSpecs = urns.stream() - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + List> resourceSpecs = urns.stream() + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) @@ -242,7 +242,7 @@ public Task ingest(@ActionParam(PARAM_ENTITY) @Nonnull Entity entity, final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue()); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to edit entity " + urn); } @@ -273,10 +273,10 @@ public Task batchIngest(@ActionParam(PARAM_ENTITIES) @Nonnull Entity[] ent Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - List> resourceSpecs = Arrays.stream(entities) + List> resourceSpecs = Arrays.stream(entities) .map(Entity::getValue) .map(com.datahub.util.ModelUtils::getUrnFromSnapshotUnion) - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), resourceSpecs)) { @@ -322,7 +322,7 @@ public Task search(@ActionParam(PARAM_ENTITY) @Nonnull String enti @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -347,7 +347,7 @@ public Task searchAcrossEntities(@ActionParam(PARAM_ENTITIES) @Opt @ActionParam(PARAM_COUNT) int count, @ActionParam(PARAM_SEARCH_FLAGS) @Optional SearchFlags searchFlags) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -391,7 +391,7 @@ public Task searchAcrossLineage(@ActionParam(PARAM_URN) @No @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) throws URISyntaxException { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -443,7 +443,7 @@ public Task list(@ActionParam(PARAM_ENTITY) @Nonnull String entityNa Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -462,7 +462,7 @@ public Task autocomplete(@ActionParam(PARAM_ENTITY) @Nonnull Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -479,7 +479,7 @@ public Task browse(@ActionParam(PARAM_ENTITY) @Nonnull String enti Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -497,7 +497,7 @@ public Task getBrowsePaths( Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity: " + urn); } @@ -546,9 +546,9 @@ public Task deleteEntities(@ActionParam("registryId") @Optiona log.info("found {} rows to delete...", stringifyRowCount(aspectRowsToDelete.size())); response.setAspectsAffected(aspectRowsToDelete.size()); Set urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet(); - List> resourceSpecs = urns.stream() + List> resourceSpecs = urns.stream() .map(UrnUtils::getUrn) - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) @@ -590,7 +590,7 @@ public Task deleteEntity(@ActionParam(PARAM_URN) @Nonnull Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE), - Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) { + Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to delete entity: " + urnStr); } @@ -638,7 +638,7 @@ private Long deleteTimeseriesAspects(@Nonnull Urn urn, @Nullable Long startTimeM Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urn.toString()))) { + new EntitySpec(urn.getEntityType(), urn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to delete entity " + urn); } @@ -678,7 +678,7 @@ public Task deleteReferencesTo(@ActionParam(PARAM_URN) Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urnStr))) { + new EntitySpec(urn.getEntityType(), urnStr))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to delete entity " + urnStr); } @@ -695,7 +695,7 @@ public Task deleteReferencesTo(@ActionParam(PARAM_URN) public Task setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonnull Boolean value) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to enable and disable write mode."); } @@ -712,7 +712,7 @@ public Task setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonn public Task getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String entityName) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity counts."); } @@ -725,7 +725,7 @@ public Task getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String public Task batchGetTotalEntityCount(@ActionParam(PARAM_ENTITIES) @Nonnull String[] entityNames) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity counts."); } @@ -739,7 +739,7 @@ public Task listUrns(@ActionParam(PARAM_ENTITY) @Nonnull String @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count) throws URISyntaxException { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -757,10 +757,10 @@ public Task applyRetention(@ActionParam(PARAM_START) @Optional @Nullable @ActionParam(PARAM_URN) @Optional @Nullable String urn ) { Authentication auth = AuthenticationContext.getAuthentication(); - ResourceSpec resourceSpec = null; + EntitySpec resourceSpec = null; if (StringUtils.isNotBlank(urn)) { Urn resource = UrnUtils.getUrn(urn); - resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString()); + resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString()); } if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.APPLY_RETENTION_PRIVILEGE), resourceSpec)) { @@ -781,7 +781,7 @@ public Task filter(@ActionParam(PARAM_ENTITY) @Nonnull String enti Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to search."); } @@ -799,7 +799,7 @@ public Task exists(@ActionParam(PARAM_URN) @Nonnull String urnStr) thro Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), - new ResourceSpec(urn.getEntityType(), urnStr))) { + new EntitySpec(urn.getEntityType(), urnStr))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized get entity: " + urnStr); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java index 7efb93c0f50e6..0c3e93273b863 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java @@ -4,7 +4,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; @@ -68,7 +68,7 @@ public Task get(@Nonnull String urnStr, final Urn urn = Urn.createFromString(urnStr); Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity " + urn); } @@ -96,8 +96,8 @@ public Task> batchGet(@Nonnull Set urnStrs, urns.add(Urn.createFromString(urnStr)); } Authentication auth = AuthenticationContext.getAuthentication(); - List> resourceSpecs = urns.stream() - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + List> resourceSpecs = urns.stream() + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java index fd5c3507b5408..05b7e6b3ff24b 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java @@ -4,7 +4,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.VersionedUrn; import com.linkedin.common.urn.Urn; @@ -65,9 +65,9 @@ public Task> batchGetVersioned( @QueryParam(PARAM_ENTITY_TYPE) @Nonnull String entityType, @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames) { Authentication auth = AuthenticationContext.getAuthentication(); - List> resourceSpecs = versionedUrnStrs.stream() + List> resourceSpecs = versionedUrnStrs.stream() .map(versionedUrn -> UrnUtils.getUrn(versionedUrn.getUrn())) - .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))) + .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))) .collect(Collectors.toList()); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java index 313d16333f9e9..4a8e74c89039a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java @@ -4,7 +4,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.EntityRelationship; import com.linkedin.common.EntityRelationshipArray; @@ -107,7 +107,7 @@ public Task get(@QueryParam("urn") @Nonnull String rawUrn, Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), - Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) { + Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity lineage: " + rawUrn); } @@ -142,7 +142,7 @@ public UpdateResponse delete(@QueryParam("urn") @Nonnull String rawUrn) throws E Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE), - Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) { + Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to delete entity: " + rawUrn); } @@ -162,7 +162,7 @@ public Task getLineage(@ActionParam(PARAM_URN) @Nonnull Str Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), - Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) { + Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get entity lineage: " + urnStr); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index 188e5ae18ee8f..12586b66495a9 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -37,10 +37,10 @@ public static String restoreIndices( @Nonnull EntityService entityService ) { Authentication authentication = AuthenticationContext.getAuthentication(); - ResourceSpec resourceSpec = null; + EntitySpec resourceSpec = null; if (StringUtils.isNotBlank(urn)) { Urn resource = UrnUtils.getUrn(urn); - resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString()); + resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString()); } if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(authentication, authorizer, ImmutableList.of(PoliciesConfig.RESTORE_INDICES_PRIVILEGE), diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java index f36841bb4abae..a8018074497c4 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java @@ -3,7 +3,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableList; import com.linkedin.entity.Entity; import com.linkedin.metadata.authorization.PoliciesConfig; @@ -54,7 +54,7 @@ public Task producePlatformEvent( @ActionParam("event") @Nonnull PlatformEvent event) { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to produce platform events."); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java index 5c3b90a84aec1..9949556c99b81 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java @@ -4,7 +4,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.metadata.authorization.PoliciesConfig; @@ -82,13 +82,13 @@ public static RestLiServiceException invalidArgumentsException(@Nullable String } public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer, - @Nonnull final List privileges, @Nonnull final List> resources) { + @Nonnull final List privileges, @Nonnull final List> resources) { DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges); return AuthUtil.isAuthorizedForResources(authorizer, authentication.getActor().toUrnStr(), resources, orGroup); } public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer, - @Nonnull final List privileges, @Nullable final ResourceSpec resource) { + @Nonnull final List privileges, @Nullable final EntitySpec resource) { DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges); return AuthUtil.isAuthorized(authorizer, authentication.getActor().toUrnStr(), java.util.Optional.ofNullable(resource), orGroup); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index be70cf9c494ef..02d413301f3b4 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -4,7 +4,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; -import com.datahub.authorization.ResourceSpec; +import com.datahub.authorization.EntitySpec; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.JsonNode; @@ -125,7 +125,7 @@ public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregat return RestliUtil.toTask(() -> { Authentication auth = AuthenticationContext.getAuthentication(); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) - && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (ResourceSpec) null)) { + && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (EntitySpec) null)) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to edit entities."); } @@ -323,7 +323,7 @@ public Task query(@ActionParam(PARAM_RESOURCE) @Nonnull String Urn resourceUrn = UrnUtils.getUrn(resource); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE), - new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { + new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to query usage."); } @@ -383,7 +383,7 @@ public Task queryRange(@ActionParam(PARAM_RESOURCE) @Nonnull S Urn resourceUrn = UrnUtils.getUrn(resource); if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV)) && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE), - new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { + new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to query usage."); } diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle index 22c62af324c12..b6af3d330d185 100644 --- a/metadata-service/services/build.gradle +++ b/metadata-service/services/build.gradle @@ -9,9 +9,9 @@ dependencies { implementation externalDependency.jsonPatch implementation project(':entity-registry') implementation project(':metadata-utils') - implementation project(':metadata-events:mxe-avro-1.7') + implementation project(':metadata-events:mxe-avro') implementation project(':metadata-events:mxe-registration') - implementation project(':metadata-events:mxe-utils-avro-1.7') + implementation project(':metadata-events:mxe-utils-avro') implementation project(':metadata-models') implementation project(':metadata-service:restli-client') implementation project(':metadata-service:configuration') diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index a46b58aabfb0b..64f59780b887f 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -188,11 +188,12 @@ BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nullable * @param sortCriterion {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return + * @param searchFlags flags controlling search options * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata */ @Nonnull ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size); + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags); /** * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the @@ -204,11 +205,12 @@ ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String inpu * @param sortCriterion {@link SortCriterion} to be applied to search results * @param scrollId opaque scroll identifier to pass to search service * @param size the number of search hits to return + * @param searchFlags flags controlling search options * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata */ @Nonnull ScrollResult structuredScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size); + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags); /** * Max result size returned by the underlying search backend diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index 410596cc30cbe..18cb48bfcf1f0 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -64,7 +64,8 @@ "GET_TIMELINE_PRIVILEGE", "PRODUCE_PLATFORM_EVENT_PRIVILEGE", "MANAGE_DATA_PRODUCTS", - "MANAGE_GLOBAL_OWNERSHIP_TYPES" + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + "DELETE_ENTITY" ], "displayName":"Root User - Edit and View All Resources", "description":"Grants full edit and view privileges for all resources to root 'datahub' root user.", @@ -263,7 +264,8 @@ "GET_ENTITY_PRIVILEGE", "GET_TIMELINE_PRIVILEGE", "PRODUCE_PLATFORM_EVENT_PRIVILEGE", - "MANAGE_DATA_PRODUCTS" + "MANAGE_DATA_PRODUCTS", + "DELETE_ENTITY" ], "displayName":"Admins - Metadata Policy", "description":"Admins have all metadata privileges.", diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 1c1c368611488..7bc6aa2d43442 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -1,7 +1,7 @@ apply plugin: 'java-library' dependencies { - api externalDependency.avro_1_7 + api externalDependency.avro implementation externalDependency.commonsLang api externalDependency.dropwizardMetricsCore implementation externalDependency.dropwizardMetricsJmx @@ -16,8 +16,8 @@ dependencies { api project(':li-utils') api project(':entity-registry') - api project(':metadata-events:mxe-avro-1.7') - api project(':metadata-events:mxe-utils-avro-1.7') + api project(':metadata-events:mxe-avro') + api project(':metadata-events:mxe-utils-avro') implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/settings.gradle b/settings.gradle index d6777b07b3fb3..52de461383b5e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -20,10 +20,10 @@ include 'metadata-service:openapi-analytics-servlet' include 'metadata-service:plugin' include 'metadata-service:plugin:src:test:sample-test-plugins' include 'metadata-dao-impl:kafka-producer' -include 'metadata-events:mxe-avro-1.7' +include 'metadata-events:mxe-avro' include 'metadata-events:mxe-registration' include 'metadata-events:mxe-schemas' -include 'metadata-events:mxe-utils-avro-1.7' +include 'metadata-events:mxe-utils-avro' include 'metadata-ingestion' include 'metadata-jobs:mae-consumer' include 'metadata-jobs:mce-consumer' diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py index 4aa64c512f684..48f3564e6cd97 100644 --- a/smoke-test/tests/assertions/assertions_test.py +++ b/smoke-test/tests/assertions/assertions_test.py @@ -2,28 +2,29 @@ import urllib import pytest -import requests_wrapper as requests import tenacity from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation -from datahub.metadata.schema_classes import ( - AssertionInfoClass, - AssertionResultClass, - AssertionResultTypeClass, - AssertionRunEventClass, - AssertionRunStatusClass, - AssertionStdOperatorClass, - AssertionTypeClass, - DatasetAssertionInfoClass, - DatasetAssertionScopeClass, - PartitionSpecClass, - PartitionTypeClass, -) -from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info +from datahub.metadata.com.linkedin.pegasus2avro.assertion import \ + AssertionStdAggregation +from datahub.metadata.schema_classes import (AssertionInfoClass, + AssertionResultClass, + AssertionResultTypeClass, + AssertionRunEventClass, + AssertionRunStatusClass, + AssertionStdOperatorClass, + AssertionTypeClass, + DatasetAssertionInfoClass, + DatasetAssertionScopeClass, + PartitionSpecClass, + PartitionTypeClass) + +import requests_wrapper as requests +from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, + ingest_file_via_rest, wait_for_healthcheck_util) restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py index b9d2143d13ec7..550f0062d5a39 100644 --- a/smoke-test/tests/browse/browse_test.py +++ b/smoke-test/tests/browse/browse_test.py @@ -1,9 +1,10 @@ import time import pytest -import requests_wrapper as requests -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest +import requests_wrapper as requests +from tests.utils import (delete_urns_from_file, get_frontend_url, + ingest_file_via_rest) TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)" TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)" @@ -51,7 +52,9 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): # /prod -- There should be one entity get_browse_paths_json = { "query": get_browse_paths_query, - "variables": {"input": { "type": "DATASET", "path": ["prod"], "start": 0, "count": 100 } }, + "variables": { + "input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100} + }, } response = frontend_session.post( @@ -67,12 +70,19 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): browse = res_data["data"]["browse"] print(browse) - assert browse["entities"] == [{ "urn": TEST_DATASET_3_URN }] + assert browse["entities"] == [{"urn": TEST_DATASET_3_URN}] # /prod/kafka1 get_browse_paths_json = { "query": get_browse_paths_query, - "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka1"], "start": 0, "count": 10 } }, + "variables": { + "input": { + "type": "DATASET", + "path": ["prod", "kafka1"], + "start": 0, + "count": 10, + } + }, } response = frontend_session.post( @@ -88,16 +98,27 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): browse = res_data["data"]["browse"] assert browse == { - "total": 3, - "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }, { "urn": TEST_DATASET_3_URN }], - "groups": [], - "metadata": { "path": ["prod", "kafka1"], "totalNumEntities": 0 } + "total": 3, + "entities": [ + {"urn": TEST_DATASET_1_URN}, + {"urn": TEST_DATASET_2_URN}, + {"urn": TEST_DATASET_3_URN}, + ], + "groups": [], + "metadata": {"path": ["prod", "kafka1"], "totalNumEntities": 0}, } # /prod/kafka2 get_browse_paths_json = { "query": get_browse_paths_query, - "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka2"], "start": 0, "count": 10 } }, + "variables": { + "input": { + "type": "DATASET", + "path": ["prod", "kafka2"], + "start": 0, + "count": 10, + } + }, } response = frontend_session.post( @@ -113,10 +134,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): browse = res_data["data"]["browse"] assert browse == { - "total": 2, - "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }], - "groups": [], - "metadata": { "path": ["prod", "kafka2"], "totalNumEntities": 0 } + "total": 2, + "entities": [{"urn": TEST_DATASET_1_URN}, {"urn": TEST_DATASET_2_URN}], + "groups": [], + "metadata": {"path": ["prod", "kafka2"], "totalNumEntities": 0}, } - - diff --git a/smoke-test/tests/cli/datahub-cli.py b/smoke-test/tests/cli/datahub-cli.py index 1d0080bdd9d48..c3db6028efceb 100644 --- a/smoke-test/tests/cli/datahub-cli.py +++ b/smoke-test/tests/cli/datahub-cli.py @@ -1,8 +1,11 @@ import json -import pytest from time import sleep -from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity + +import pytest +from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type, + post_entity) from datahub.cli.ingest_cli import get_session_and_host, rollback + from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync ingested_dataset_run_id = "" @@ -24,24 +27,46 @@ def test_setup(): session, gms_host = get_session_and_host() - assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) - assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) + assert "browsePaths" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + ) + assert "editableDatasetProperties" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False + ) - ingested_dataset_run_id = ingest_file_via_rest("tests/cli/cli_test_data.json").config.run_id + ingested_dataset_run_id = ingest_file_via_rest( + "tests/cli/cli_test_data.json" + ).config.run_id print("Setup ingestion id: " + ingested_dataset_run_id) - assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) + assert "browsePaths" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + ) yield # Clean up rollback_url = f"{gms_host}/runs?action=rollback" - session.post(rollback_url, data=json.dumps({"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True})) - session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True})) + session.post( + rollback_url, + data=json.dumps( + {"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True} + ), + ) + session.post( + rollback_url, + data=json.dumps( + {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True} + ), + ) - assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) - assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) + assert "browsePaths" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + ) + assert "editableDatasetProperties" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False + ) @pytest.mark.dependency() @@ -49,9 +74,7 @@ def test_rollback_editable(): global ingested_dataset_run_id global ingested_editable_run_id platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-rollback" - ) + dataset_name = "test-rollback" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" @@ -59,23 +82,38 @@ def test_rollback_editable(): print("Ingested dataset id:", ingested_dataset_run_id) # Assert that second data ingestion worked - assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) + assert "browsePaths" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + ) # Make editable change - ingested_editable_run_id = ingest_file_via_rest("tests/cli/cli_editable_test_data.json").config.run_id + ingested_editable_run_id = ingest_file_via_rest( + "tests/cli/cli_editable_test_data.json" + ).config.run_id print("ingested editable id:", ingested_editable_run_id) # Assert that second data ingestion worked - assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) + assert "editableDatasetProperties" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False + ) # rollback ingestion 1 rollback_url = f"{gms_host}/runs?action=rollback" - session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False})) + session.post( + rollback_url, + data=json.dumps( + {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False} + ), + ) # Allow async MCP processor to handle ingestions & rollbacks wait_for_writes_to_sync() # EditableDatasetProperties should still be part of the entity that was soft deleted. - assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) + assert "editableDatasetProperties" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False + ) # But first ingestion aspects should not be present - assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, typed=False) + assert "browsePaths" not in get_aspects_for_entity( + entity_urn=dataset_urn, typed=False + ) diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py index 16925d26f6983..17c8924fb0998 100644 --- a/smoke-test/tests/cli/datahub_graph_test.py +++ b/smoke-test/tests/cli/datahub_graph_test.py @@ -1,13 +1,11 @@ import pytest import tenacity from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass -from tests.utils import ( - delete_urns_from_file, - get_gms_url, - get_sleep_info, - ingest_file_via_rest, -) +from datahub.metadata.schema_classes import (KafkaSchemaClass, + SchemaMetadataClass) + +from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, + ingest_file_via_rest) sleep_sec, sleep_times = get_sleep_info() diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py index 4288a61b7a0c1..106da7cd8d71e 100644 --- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py +++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py @@ -1,21 +1,22 @@ import json import logging +import sys import tempfile import time -import sys from json import JSONDecodeError from typing import Any, Dict, List, Optional -from click.testing import CliRunner, Result - import datahub.emitter.mce_builder as builder +from click.testing import CliRunner, Result from datahub.emitter.serialization_helper import pre_json_transform from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass + +import requests_wrapper as requests from tests.aspect_generators.timeseries.dataset_profile_gen import \ gen_dataset_profiles -from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync -import requests_wrapper as requests +from tests.utils import (get_strftime_from_timestamp_millis, + wait_for_writes_to_sync) logger = logging.getLogger(__name__) @@ -33,6 +34,7 @@ def sync_elastic() -> None: wait_for_writes_to_sync() + def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None: with tempfile.NamedTemporaryFile("w+t", suffix=".json") as aspect_file: aspect_text: str = json.dumps(pre_json_transform(dataset_profile.to_obj())) diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py index 61e7a5a65b494..e962b1a5cafd6 100644 --- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py +++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py @@ -2,14 +2,14 @@ import time from typing import Any, Dict, List, Optional -from click.testing import CliRunner, Result - import datahub.emitter.mce_builder as builder +from click.testing import CliRunner, Result from datahub.emitter.serialization_helper import post_json_transform from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass -from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync + import requests_wrapper as requests +from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync runner = CliRunner(mix_stderr=False) diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py index 405e061c016f9..7b986d3be0444 100644 --- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py +++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py @@ -1,6 +1,7 @@ import json import sys import tempfile +import time from typing import Any, Dict, Iterable, List import yaml @@ -8,7 +9,7 @@ from datahub.api.entities.corpgroup.corpgroup import CorpGroup from datahub.entrypoints import datahub from datahub.ingestion.graph.client import DataHubGraph, get_default_graph -import time + import requests_wrapper as requests from tests.utils import wait_for_writes_to_sync diff --git a/smoke-test/tests/conftest.py b/smoke-test/tests/conftest.py index eed7a983197ef..57b92a2db1c19 100644 --- a/smoke-test/tests/conftest.py +++ b/smoke-test/tests/conftest.py @@ -2,8 +2,8 @@ import pytest -from tests.utils import wait_for_healthcheck_util, get_frontend_session from tests.test_result_msg import send_message +from tests.utils import get_frontend_session, wait_for_healthcheck_util # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" @@ -28,5 +28,5 @@ def test_healthchecks(wait_for_healthchecks): def pytest_sessionfinish(session, exitstatus): - """ whole test run finishes. """ + """whole test run finishes.""" send_message(exitstatus) diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 15993733c592b..607835bf3649c 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -1,10 +1,16 @@ -import time +import logging import os import subprocess +import time _ELASTIC_BUFFER_WRITES_TIME_IN_SEC: int = 1 USE_STATIC_SLEEP: bool = bool(os.getenv("USE_STATIC_SLEEP", False)) -ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int(os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5)) +ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int( + os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5) +) + +logger = logging.getLogger(__name__) + def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if USE_STATIC_SLEEP: @@ -30,7 +36,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: lag_zero = True if not lag_zero: - logger.warning(f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}") + logger.warning( + f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}" + ) else: # we want to sleep for an additional period of time for Elastic writes buffer to clear - time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC) \ No newline at end of file + time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC) diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 575e3def6cf23..227645a87d30a 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -1,5 +1,7 @@ import pytest -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest + +from tests.utils import (delete_urns_from_file, get_frontend_url, + ingest_file_via_rest) @pytest.fixture(scope="module", autouse=False) @@ -225,6 +227,7 @@ def test_update_container(frontend_session, ingest_cleanup_data): "ownerUrn": new_owner, "resourceUrn": container_urn, "ownerEntityType": "CORP_USER", + "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner" } }, } diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js index e0d2bf240d74d..aeceaf99be889 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js @@ -4,42 +4,47 @@ const glossaryParentGroup = "Cypress"; describe("glossary sidebar navigation test", () => { it("create term and term parent group, move and delete term group", () => { - //create a new term group and term, move term to the group + + // Create a new term group and term, move term to the group cy.loginWithCredentials(); cy.goToGlossaryList(); - cy.clickOptionWithText("Add Term Group"); + cy.clickOptionWithTestId("add-term-group-button"); cy.waitTextVisible("Create Term Group"); - cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(glossaryTermGroup); - cy.get(".ant-modal-footer > button:last-child").click(); - cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).should("be.visible"); - cy.clickOptionWithText("Add Term"); + cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTermGroup); + cy.clickOptionWithTestId("glossary-entity-modal-create-button"); + cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).should("be.visible"); + cy.clickOptionWithTestId("add-term-button"); + cy.waitTextVisible("Created Term Group!"); cy.waitTextVisible("Create Glossary Term"); - cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(glossaryTerm); - cy.get(".ant-modal-footer > button:last-child").click(); - cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTerm).click(); - cy.waitTextVisible("No documentation yet"); + cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTerm); + cy.clickOptionWithTestId("glossary-entity-modal-create-button").wait(3000); + cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTerm).click().wait(3000); cy.openThreeDotDropdown(); - cy.clickOptionWithText("Move"); - cy.get('[role="dialog"]').contains(glossaryTermGroup).click({force: true}); - cy.get('[role="dialog"]').contains(glossaryTermGroup).should("be.visible"); - cy.get("button").contains("Move").click(); + cy.clickOptionWithTestId("entity-menu-move-button") + cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).click({force: true}); + cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).should("be.visible"); + cy.clickOptionWithTestId("glossary-entity-modal-move-button"); cy.waitTextVisible("Moved Glossary Term!"); - //ensure the new term is under the parent term group in the navigation sidebar - cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).click(); + + // Ensure the new term is under the parent term group in the navigation sidebar + cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click(); cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTerm).should("be.visible"); - //move a term group from the root level to be under a parent term group + + // Move a term group from the root level to be under a parent term group cy.goToGlossaryList(); cy.clickOptionWithText(glossaryTermGroup); cy.openThreeDotDropdown(); cy.clickOptionWithText("Move"); - cy.get('[role="dialog"]').contains(glossaryParentGroup).click({force: true}); - cy.get('[role="dialog"]').contains(glossaryParentGroup).should("be.visible"); - cy.get("button").contains("Move").click(); + cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryParentGroup).click({force: true}); + cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryParentGroup).should("be.visible"); + cy.clickOptionWithTestId("glossary-entity-modal-move-button"); cy.waitTextVisible("Moved Term Group!"); - //ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent - cy.get('*[class^="GlossaryBrowser"]').contains(glossaryParentGroup).click(); + + // Ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent + cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryParentGroup).click(); cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTermGroup).should("be.visible"); - //delete a term group + + // Delete a term group cy.goToGlossaryList(); cy.clickOptionWithText(glossaryParentGroup); cy.clickOptionWithText(glossaryTermGroup); @@ -50,7 +55,8 @@ describe("glossary sidebar navigation test", () => { cy.clickOptionWithText(glossaryTermGroup).wait(3000); cy.deleteFromDropdown(); cy.waitTextVisible("Deleted Term Group!"); - //ensure it is no longer in the sidebar navigator + + // Ensure it is no longer in the sidebar navigator cy.ensureTextNotPresent(glossaryTerm); cy.ensureTextNotPresent(glossaryTermGroup); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js new file mode 100644 index 0000000000000..315aa7b22b9da --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js @@ -0,0 +1,80 @@ +const test_dataset = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; +const first_degree = [ + "urn:li:chart:(looker,cypress_baz1)", + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)", + "urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)" +]; +const second_degree = [ + "urn:li:chart:(looker,cypress_baz2)", + "urn:li:dashboard:(looker,cypress_baz)", + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)" +]; +const third_degree_plus = [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)", + "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)", + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)" +]; +const downloadCsvFile = (filename) => { + cy.get('[data-testid="three-dot-menu"]').click(); + cy.get('[data-testid="download-as-csv-menu-item"]').click(); + cy.get('[data-testid="download-as-csv-input"]').clear().type(filename); + cy.get('[data-testid="csv-modal-download-button"]').click().wait(5000); + cy.ensureTextNotPresent("Creating CSV to download"); +}; + +describe("download lineage results to .csv file", () => { + + it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => { + cy.loginWithCredentials(); + cy.goToDataset(test_dataset,"SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); + + // Verify 1st degree of dependencies + cy.contains(/1 - 3 of 3/); + downloadCsvFile("first_degree_results.csv"); + let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv'); + first_degree.forEach(function (urn) { + first_degree_csv.should('contain', urn) + }); + second_degree.forEach(function (urn) { + first_degree_csv.should('not.contain', urn) + }); + third_degree_plus.forEach(function (urn) { + first_degree_csv.should('not.contain', urn); + }); + + // Verify 1st and 2nd degree of dependencies + cy.get('[data-testid="facet-degree-2"]').click().wait(5000); + cy.contains(/1 - 7 of 7/); + downloadCsvFile("second_degree_results.csv"); + let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv'); + first_degree.forEach(function (urn) { + second_degree_csv.should('contain', urn) + }); + second_degree.forEach(function (urn) { + second_degree_csv.should('contain', urn) + }); + third_degree_plus.forEach(function (urn) { + second_degree_csv.should('not.contain', urn); + }); + + // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download) + cy.get('[data-testid="facet-degree-3+"]').click().wait(5000); + cy.contains(/1 - 10 of 13/); + downloadCsvFile("third_plus_degree_results.csv"); + let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv'); + first_degree.forEach(function (urn) { + third_degree_csv.should('contain', urn) + }); + second_degree.forEach(function (urn) { + third_degree_csv.should('contain', urn) + }); + third_degree_plus.forEach(function (urn) { + third_degree_csv.should('contain', urn); + }); + }); +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js new file mode 100644 index 0000000000000..37ca62c8d1229 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js @@ -0,0 +1,68 @@ +import { aliasQuery } from "../utils"; +const DATASET_ENTITY_TYPE = 'dataset'; +const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)'; +const DOWNSTREAM_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; +const upstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)-Upstream"] text'; +const downstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)-Downstream"] text'; + +const verifyColumnPathModal = (from, to) => { + cy.get('[data-testid="entity-paths-modal"]').contains(from).should("be.visible"); + cy.get('[data-testid="entity-paths-modal"]').contains(to).should("be.visible"); +}; + +describe("column-Level lineage and impact analysis path test", () => { + beforeEach(() => { + cy.on('uncaught:exception', (err, runnable) => { return false; }); + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); + + it("verify column-level lineage path at lineage praph and impact analysis ", () => { + // Open dataset with column-level lineage configured an navigate to lineage tab -> visualize lineage + cy.loginWithCredentials(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + + // Enable “show columns” toggle + cy.waitTextVisible("SampleCypressHdfs"); + cy.clickOptionWithTestId("column-toggle"); + cy.waitTextVisible("shipment_info"); + + // Verify functionality of column lineage + cy.get(upstreamColumn).eq(3).click(); + cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'fill', 'white'); + cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'stroke', 'transparent'); + cy.get(downstreamColumn).eq(2).click(); + cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'fill', 'white'); + cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'stroke', 'transparent'); + + // Open dataset impact analysis view, enable column lineage + cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.clickOptionWithText("Downstream"); + + // Verify upstream column lineage, test column path modal + cy.clickOptionWithText("Upstream"); + cy.waitTextVisible("SampleCypressKafkaDataset"); + cy.ensureTextNotPresent("field_bar"); + cy.contains("Select column").click({ force: true}).wait(1000); + cy.get(".rc-virtual-list").contains("shipment_info").click(); + cy.waitTextVisible("field_bar"); + cy.clickOptionWithText("field_bar"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + + // Verify downstream column lineage, test column path modal + cy.goToDataset(DOWNSTREAM_DATASET_URN, "SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.ensureTextNotPresent("shipment_info"); + cy.contains("Select column").click({ force: true}).wait(1000); + cy.get(".rc-virtual-list").contains("field_bar").click(); + cy.waitTextVisible("shipment_info"); + cy.clickOptionWithText("shipment_info"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + }); +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js index 9e035f7f89772..85db210649c27 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js @@ -5,8 +5,6 @@ const TASKS_ENTITY_TYPE = 'tasks'; const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)'; const JAN_1_2021_TIMESTAMP = 1609553357755; const JAN_1_2022_TIMESTAMP = 1641089357755; -const TIMESTAMP_MILLIS_EIGHT_DAYS_AGO = getTimestampMillisNumDaysAgo(8); -const TIMESTAMP_MILLIS_ONE_DAY_AGO = getTimestampMillisNumDaysAgo(1); const TIMESTAMP_MILLIS_14_DAYS_AGO = getTimestampMillisNumDaysAgo(14); const TIMESTAMP_MILLIS_7_DAYS_AGO = getTimestampMillisNumDaysAgo(7); const TIMESTAMP_MILLIS_NOW = getTimestampMillisNumDaysAgo(0); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js index fcc0566f3f6ce..99ad9a68d35e1 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js @@ -5,7 +5,7 @@ const password = "Example password"; const group_name = `Test group ${test_id}`; const addOwner = (owner, type, elementId) => { - cy.clickOptionWithText("Add Owners"); + cy.clickOptionWithTestId("add-owners-button"); cy.contains("Search for users or groups...").click({ force: true }); cy.focused().type(owner); cy.clickOptionWithText(owner); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js index 83b66e2cb2549..5f9758a35ca0e 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -10,20 +10,20 @@ describe("edit documentation and link to dataset", () => { cy.visit( "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" ); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.waitTextVisible("my hive dataset"); cy.waitTextVisible("Sample doc"); - cy.clickOptionWithText("Edit"); + cy.clickOptionWithTestId("edit-documentation-button"); cy.focused().clear(); cy.focused().type(documentation_edited); - cy.get("button").contains("Save").click(); + cy.clickOptionWithTestId("description-editor-save-button"); cy.waitTextVisible("Description Updated"); cy.waitTextVisible(documentation_edited); //return documentation to original state - cy.clickOptionWithText("Edit"); + cy.clickOptionWithTestId("edit-documentation-button"); cy.focused().clear().wait(1000); cy.focused().type("my hive dataset"); - cy.get("button").contains("Save").click(); + cy.clickOptionWithTestId("description-editor-save-button"); cy.waitTextVisible("Description Updated"); cy.waitTextVisible("my hive dataset"); }); @@ -33,21 +33,21 @@ describe("edit documentation and link to dataset", () => { cy.visit( "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" ); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.contains("Sample doc").trigger("mouseover", { force: true }); cy.get('[data-icon="delete"]').click(); cy.waitTextVisible("Link Removed"); - cy.get("button").contains("Add Link").click().wait(1000); - cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url); + cy.clickOptionWithTestId("add-link-button").wait(1000); + cy.enterTextInTestId("add-link-modal-url", wrong_url); cy.waitTextVisible("This field must be a valid url."); cy.focused().clear(); cy.waitTextVisible("A URL is required."); - cy.focused().type(correct_url); + cy.enterTextInTestId("add-link-modal-url", correct_url); cy.ensureTextNotPresent("This field must be a valid url."); - cy.get("#addLinkForm_label").type("Sample doc"); - cy.get('[role="dialog"] button').contains("Add").click(); + cy.enterTextInTestId("add-link-modal-label", "Sample doc"); + cy.clickOptionWithTestId("add-link-modal-add-button"); cy.waitTextVisible("Link Added"); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.get(`[href='${correct_url}']`).should("be.visible"); }); @@ -55,18 +55,18 @@ describe("edit documentation and link to dataset", () => { cy.loginWithCredentials(); cy.visit("/domain/urn:li:domain:marketing/Entities"); cy.waitTextVisible("SampleCypressKafkaDataset"); - cy.get("button").contains("Add Link").click().wait(1000); - cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url); + cy.clickOptionWithTestId("add-link-button").wait(1000); + cy.enterTextInTestId("add-link-modal-url", wrong_url); cy.waitTextVisible("This field must be a valid url."); cy.focused().clear(); cy.waitTextVisible("A URL is required."); - cy.focused().type(correct_url); + cy.enterTextInTestId("add-link-modal-url", correct_url); cy.ensureTextNotPresent("This field must be a valid url."); - cy.get("#addLinkForm_label").type("Sample doc"); - cy.get('[role="dialog"] button').contains("Add").click(); + cy.enterTextInTestId("add-link-modal-label", "Sample doc"); + cy.clickOptionWithTestId("add-link-modal-add-button"); cy.waitTextVisible("Link Added"); - cy.get("[role='tab']").contains("Documentation").click(); - cy.waitTextVisible("Edit"); + cy.openEntityTab("Documentation"); + cy.get("[data-testid='edit-documentation-button']").should("be.visible"); cy.get(`[href='${correct_url}']`).should("be.visible"); cy.contains("Sample doc").trigger("mouseover", { force: true }); cy.get('[data-icon="delete"]').click(); @@ -83,14 +83,14 @@ describe("edit documentation and link to dataset", () => { cy.waitTextVisible("Foo field description has changed"); cy.focused().clear().wait(1000); cy.focused().type(documentation_edited); - cy.get("button").contains("Update").click(); + cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); cy.waitTextVisible(documentation_edited); cy.waitTextVisible("(edited)"); cy.get("tbody [data-icon='edit']").first().click({ force: true }); cy.focused().clear().wait(1000); cy.focused().type("Foo field description has changed"); - cy.get("button").contains("Update").click(); + cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); cy.waitTextVisible("Foo field description has changed"); cy.waitTextVisible("(edited)"); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js new file mode 100644 index 0000000000000..6c5dd77810644 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js @@ -0,0 +1,68 @@ + +const number = Math.floor(Math.random() * 100000); +const accound_id = `account${number}`; +const warehouse_id = `warehouse${number}`; +const username = `user${number}`; +const password = `password${number}`; +const role = `role${number}`; +const ingestion_source_name = `ingestion source ${number}`; + +describe("ingestion source creation flow", () => { + it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => { + // Go to ingestion page, create a snowflake source + cy.loginWithCredentials(); + cy.goToIngestionPage(); + cy.clickOptionWithTestId("create-ingestion-source-button"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").type(password); + cy.focused().blur(); + cy.get("#role").type(role); + + // Verify yaml recipe is generated correctly + cy.clickOptionWithTestId("recipe-builder-yaml-button"); + cy.waitTextVisible("account_id"); + cy.waitTextVisible(accound_id); + cy.waitTextVisible(warehouse_id); + cy.waitTextVisible(username); + cy.waitTextVisible(password); + cy.waitTextVisible(role); + + // Finish creating source + cy.clickOptionWithTestId("recipe-builder-next-button"); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000) + cy.waitTextVisible(ingestion_source_name); + cy.get('[data-testid="ingestion-source-table-status"]').contains("Pending...").should("be.visible"); + + // Verify ingestion source details are saved correctly + cy.get('[data-testid="ingestion-source-table-edit-button"]').first().click(); + cy.waitTextVisible("Edit Ingestion Source"); + cy.get("#account_id").should("have.value", accound_id); + cy.get("#warehouse").should("have.value", warehouse_id); + cy.get("#username").should("have.value", username); + cy.get("#password").should("have.value", password); + cy.get("#role").should("have.value", role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.get('[data-testid="source-name-input"]').clear().type(ingestion_source_name + " EDITED"); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully updated ingestion source!"); + cy.waitTextVisible(ingestion_source_name + " EDITED"); + + // Remove ingestion source + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name + " EDITED") + }) +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index 24a24cc21138d..3d052695e818f 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -7,7 +7,7 @@ describe("run managed ingestion", () => { it("create run managed ingestion source", () => { let number = Math.floor(Math.random() * 100000); let testName = `cypress test source ${number}` - let cli_version = "0.10.5.4"; + let cli_version = "0.12.0"; cy.login(); cy.goToIngestionPage(); cy.clickOptionWithText("Create new source"); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js index 466bb2ef0757e..77fd63b9cae02 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -8,23 +8,24 @@ const ingestion_source_name = `ingestion source ${number}`; describe("managing secrets for ingestion creation", () => { it("create a secret, create ingestion source using a secret, remove a secret", () => { + // Navigate to the manage ingestion page → secrets cy.loginWithCredentials(); - //navigate to the manage ingestion page → secrets cy.goToIngestionPage(); - cy.clickOptionWithText("Secrets"); - //create a new secret - cy.clickOptionWithText("Create new secret"); - cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); - cy.get('[role="dialog"] #name').type(`secretname${number}`); - cy.get('[role="dialog"] #value').type(`secretvalue${number}`); - cy.get('[role="dialog"] #description').type(`secretdescription${number}`); - cy.get('#createSecretButton').click(); + cy.openEntityTab("Secrets"); + + // Create a new secret + cy.clickOptionWithTestId("create-secret-button"); + cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`); + cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`); + cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`); + cy.clickOptionWithTestId("secret-modal-create-button"); cy.waitTextVisible("Successfully created Secret!"); cy.waitTextVisible(`secretname${number}`); - cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret - //create an ingestion source using a secret + cy.waitTextVisible(`secretdescription${number}`).wait(5000) + + // Create an ingestion source using a secret cy.goToIngestionPage(); - cy.clickOptionWithText("Create new source"); + cy.get("#ingestion-create-source").click(); cy.clickOptionWithText("Snowflake"); cy.waitTextVisible("Snowflake Recipe"); cy.get("#account_id").type(accound_id); @@ -40,11 +41,12 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Give this ingestion source a name."); cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); cy.get("button").contains("Save").click(); - cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data + cy.waitTextVisible("Successfully created ingestion source!").wait(5000) cy.waitTextVisible(ingestion_source_name); cy.get("button").contains("Pending...").should("be.visible"); - //remove a secret - cy.clickOptionWithText("Secrets"); + + // Remove a secret + cy.openEntityTab("Secrets"); cy.waitTextVisible(`secretname${number}`); cy.get('[data-icon="delete"]').first().click(); cy.waitTextVisible("Confirm Secret Removal"); @@ -52,14 +54,16 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Removed secret."); cy.ensureTextNotPresent(`secretname${number}`); cy.ensureTextNotPresent(`secretdescription${number}`); - //remove ingestion source + + // Remove ingestion source cy.goToIngestionPage(); cy.get('[data-testid="delete-button"]').first().click(); cy.waitTextVisible("Confirm Ingestion Source Removal"); cy.get("button").contains("Yes").click(); cy.waitTextVisible("Removed ingestion source."); cy.ensureTextNotPresent(ingestion_source_name) - //verify secret is not present during ingestion source creation for password dropdown + + // Verify secret is not present during ingestion source creation for password dropdown cy.clickOptionWithText("Create new source"); cy.clickOptionWithText("Snowflake"); cy.waitTextVisible("Snowflake Recipe"); @@ -68,13 +72,13 @@ describe("managing secrets for ingestion creation", () => { cy.get("#username").type(username); cy.get("#password").click().wait(1000); cy.ensureTextNotPresent(`secretname${number}`); - //verify secret can be added during ingestion source creation and used successfully + + // Verify secret can be added during ingestion source creation and used successfully cy.clickOptionWithText("Create Secret"); - cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); - cy.get('[role="dialog"] #name').type(`secretname${number}`); - cy.get('[role="dialog"] #value').type(`secretvalue${number}`); - cy.get('[role="dialog"] #description').type(`secretdescription${number}`); - cy.get('#createSecretButton').click(); + cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`) + cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`) + cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`) + cy.clickOptionWithTestId("secret-modal-create-button"); cy.waitTextVisible("Created secret!"); cy.get("#role").type(role); cy.get("button").contains("Next").click(); @@ -86,6 +90,7 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data cy.waitTextVisible(ingestion_source_name); cy.get("button").contains("Pending...").should("be.visible"); + //Remove ingestion source and secret cy.goToIngestionPage(); cy.get('[data-testid="delete-button"]').first().click(); diff --git a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js new file mode 100644 index 0000000000000..4637310b86496 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js @@ -0,0 +1,57 @@ +describe("auto-complete dropdown, filter plus query search test", () => { + + const platformQuerySearch = (query,test_id,active_filter) => { + cy.visit("/"); + cy.get("input[data-testid=search-input]").type(query); + cy.get(`[data-testid="quick-filter-urn:li:dataPlatform:${test_id}"]`).click(); + cy.focused().type("{enter}").wait(3000); + cy.url().should( + "include", + `?filter_platform___false___EQUAL___0=urn%3Ali%3AdataPlatform%3A${test_id}` + ); + cy.get('[data-testid="search-input"]').should("have.value", query); + cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible"); + cy.contains("of 0 results").should("not.exist"); + cy.contains(/of [0-9]+ results/); + } + + const entityQuerySearch = (query,test_id,active_filter) => { + cy.visit("/"); + cy.get("input[data-testid=search-input]").type(query); + cy.get(`[data-testid="quick-filter-${test_id}"]`).click(); + cy.focused().type("{enter}").wait(3000); + cy.url().should( + "include", + `?filter__entityType___false___EQUAL___0=${test_id}` + ); + cy.get('[data-testid="search-input"]').should("have.value", query); + cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible"); + cy.contains("of 0 results").should("not.exist"); + cy.contains(/of [0-9]+ results/); + } + + it("verify the 'filter by' section + query (result in search page with query applied + filter applied)", () => { + // Platform query plus filter test + cy.loginWithCredentials(); + // Airflow + platformQuerySearch ("cypress","airflow","Airflow"); + // BigQuery + platformQuerySearch ("cypress","bigquery","BigQuery"); + // dbt + platformQuerySearch ("cypress","dbt","dbt"); + // Hive + platformQuerySearch ("cypress","hive","Hive"); + + // Entity type query plus filter test + // Datasets + entityQuerySearch ("cypress","DATASET","Datasets"); + // Dashboards + entityQuerySearch ("cypress","DASHBOARD","Dashboards"); + // Pipelines + entityQuerySearch ("cypress","DATA_FLOW","Pipelines"); + // Domains + entityQuerySearch ("Marketing","DOMAIN","Domains"); + // Glossary Terms + entityQuerySearch ("cypress","GLOSSARY_TERM","Glossary Terms"); + }); +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index 64bc1253fc383..5e3664f944edf 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -66,6 +66,7 @@ Cypress.Commands.add("logout", () => { Cypress.Commands.add("goToGlossaryList", () => { cy.visit("/glossary"); cy.waitTextVisible("Glossary"); + cy.wait(3000); }); Cypress.Commands.add("goToDomainList", () => { diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index b3bacf39ac7ae..4ad2bc53fa87d 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -1,18 +1,16 @@ -from typing import Set, List - import datetime -import pytest -import subprocess import os +import subprocess +from typing import List, Set + +import pytest + +from tests.setup.lineage.ingest_time_lineage import (get_time_lineage_urns, + ingest_time_lineage) +from tests.utils import (create_datahub_step_state_aspects, delete_urns, + delete_urns_from_file, get_admin_username, + ingest_file_via_rest) -from tests.utils import ( - create_datahub_step_state_aspects, - get_admin_username, - ingest_file_via_rest, - delete_urns_from_file, - delete_urns, -) -from tests.setup.lineage.ingest_time_lineage import ingest_time_lineage, get_time_lineage_urns CYPRESS_TEST_DATA_DIR = "tests/cypress" TEST_DATA_FILENAME = "data.json" @@ -145,7 +143,6 @@ def ingest_cleanup_data(): delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") delete_urns(get_time_lineage_urns()) - print_now() print("deleting onboarding data file") if os.path.exists(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}"): diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py index db198098f21fa..baef1cb1cb3ba 100644 --- a/smoke-test/tests/dataproduct/test_dataproduct.py +++ b/smoke-test/tests/dataproduct/test_dataproduct.py @@ -1,4 +1,6 @@ +import logging import os +import subprocess import tempfile import time from random import randint @@ -17,8 +19,6 @@ DomainPropertiesClass, DomainsClass) from datahub.utilities.urns.urn import Urn -import subprocess -import logging logger = logging.getLogger(__name__) diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py index 68e001f983fbf..d920faaf3a89a 100644 --- a/smoke-test/tests/delete/delete_test.py +++ b/smoke-test/tests/delete/delete_test.py @@ -1,16 +1,14 @@ -import os import json -import pytest +import os from time import sleep + +import pytest from datahub.cli.cli_utils import get_aspects_for_entity from datahub.cli.ingest_cli import get_session_and_host -from tests.utils import ( - ingest_file_via_rest, - wait_for_healthcheck_util, - delete_urns_from_file, - wait_for_writes_to_sync, - get_datahub_graph, -) + +from tests.utils import (delete_urns_from_file, get_datahub_graph, + ingest_file_via_rest, wait_for_healthcheck_util, + wait_for_writes_to_sync) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" @@ -102,7 +100,7 @@ def test_delete_reference(test_setup, depends=["test_healthchecks"]): graph.delete_references_to_urn(tag_urn, dry_run=False) wait_for_writes_to_sync() - + # Validate that references no longer exist references_count, related_aspects = graph.delete_references_to_urn( tag_urn, dry_run=True diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py index 1149a970aa8e5..a8969804d03d7 100644 --- a/smoke-test/tests/deprecation/deprecation_test.py +++ b/smoke-test/tests/deprecation/deprecation_test.py @@ -1,10 +1,7 @@ import pytest -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - ingest_file_via_rest, - get_root_urn, -) + +from tests.utils import (delete_urns_from_file, get_frontend_url, get_root_urn, + ingest_file_via_rest) @pytest.fixture(scope="module", autouse=True) diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index 7ffe1682cafd8..fa8c918e3cbe1 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,12 +1,8 @@ import pytest import tenacity -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_gms_url, - ingest_file_via_rest, - get_sleep_info, -) + +from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, + get_sleep_info, ingest_file_via_rest) sleep_sec, sleep_times = get_sleep_info() @@ -240,4 +236,7 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): assert res_data assert res_data["data"]["dataset"]["domain"]["domain"]["urn"] == domain_urn - assert res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] == "Engineering" + assert ( + res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] + == "Engineering" + ) diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py index 1238a1dd5730a..b5e408731334e 100644 --- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py @@ -3,7 +3,8 @@ import pytest import tenacity -from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util +from tests.utils import (get_frontend_url, get_sleep_info, + wait_for_healthcheck_util) sleep_sec, sleep_times = get_sleep_info() diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py index 574e4fd4e4c88..f1d6abf5da794 100644 --- a/smoke-test/tests/patch/common_patch_tests.py +++ b/smoke-test/tests/patch/common_patch_tests.py @@ -2,25 +2,17 @@ import uuid from typing import Dict, Optional, Type -from datahub.emitter.mce_builder import ( - make_tag_urn, - make_term_urn, - make_user_urn, -) +from datahub.emitter.mce_builder import (make_tag_urn, make_term_urn, + make_user_urn) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_patch_builder import MetadataPatchProposal from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import ( - AuditStampClass, - GlobalTagsClass, - GlossaryTermAssociationClass, - GlossaryTermsClass, - OwnerClass, - OwnershipClass, - OwnershipTypeClass, - TagAssociationClass, - _Aspect, -) +from datahub.metadata.schema_classes import (AuditStampClass, GlobalTagsClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, OwnerClass, + OwnershipClass, + OwnershipTypeClass, + TagAssociationClass, _Aspect) def helper_test_entity_terms_patch( @@ -34,18 +26,14 @@ def get_terms(graph, entity_urn): term_urn = make_term_urn(term=f"testTerm-{uuid.uuid4()}") - term_association = GlossaryTermAssociationClass( - urn=term_urn, context="test" - ) + term_association = GlossaryTermAssociationClass(urn=term_urn, context="test") global_terms = GlossaryTermsClass( terms=[term_association], auditStamp=AuditStampClass( time=int(time.time() * 1000.0), actor=make_user_urn("tester") ), ) - mcpw = MetadataChangeProposalWrapper( - entityUrn=test_entity_urn, aspect=global_terms - ) + mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_terms) with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) @@ -88,9 +76,7 @@ def helper_test_dataset_tags_patch( tag_association = TagAssociationClass(tag=tag_urn, context="test") global_tags = GlobalTagsClass(tags=[tag_association]) - mcpw = MetadataChangeProposalWrapper( - entityUrn=test_entity_urn, aspect=global_tags - ) + mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_tags) with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) @@ -153,15 +139,11 @@ def helper_test_ownership_patch( assert owner.owners[0].owner == make_user_urn("jdoe") for patch_mcp in ( - patch_builder_class(test_entity_urn) - .add_owner(owner_to_add) - .build() + patch_builder_class(test_entity_urn).add_owner(owner_to_add).build() ): graph.emit_mcp(patch_mcp) - owner = graph.get_aspect( - entity_urn=test_entity_urn, aspect_type=OwnershipClass - ) + owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) assert len(owner.owners) == 2 for patch_mcp in ( @@ -171,9 +153,7 @@ def helper_test_ownership_patch( ): graph.emit_mcp(patch_mcp) - owner = graph.get_aspect( - entity_urn=test_entity_urn, aspect_type=OwnershipClass - ) + owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) assert len(owner.owners) == 1 assert owner.owners[0].owner == make_user_urn("jdoe") @@ -199,9 +179,7 @@ def get_custom_properties( orig_aspect = base_aspect assert hasattr(orig_aspect, "customProperties") orig_aspect.customProperties = base_property_map - mcpw = MetadataChangeProposalWrapper( - entityUrn=test_entity_urn, aspect=orig_aspect - ) + mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=orig_aspect) with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit(mcpw) diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py index 407410ee89914..342d5d683228a 100644 --- a/smoke-test/tests/patch/test_datajob_patches.py +++ b/smoke-test/tests/patch/test_datajob_patches.py @@ -3,19 +3,14 @@ from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import ( - DataJobInfoClass, - DataJobInputOutputClass, - EdgeClass, -) +from datahub.metadata.schema_classes import (DataJobInfoClass, + DataJobInputOutputClass, + EdgeClass) from datahub.specific.datajob import DataJobPatchBuilder from tests.patch.common_patch_tests import ( - helper_test_custom_properties_patch, - helper_test_dataset_tags_patch, - helper_test_entity_terms_patch, - helper_test_ownership_patch, -) + helper_test_custom_properties_patch, helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, helper_test_ownership_patch) def _make_test_datajob_urn( @@ -37,16 +32,12 @@ def test_datajob_ownership_patch(wait_for_healthchecks): # Tags def test_datajob_tags_patch(wait_for_healthchecks): - helper_test_dataset_tags_patch( - _make_test_datajob_urn(), DataJobPatchBuilder - ) + helper_test_dataset_tags_patch(_make_test_datajob_urn(), DataJobPatchBuilder) # Terms def test_dataset_terms_patch(wait_for_healthchecks): - helper_test_entity_terms_patch( - _make_test_datajob_urn(), DataJobPatchBuilder - ) + helper_test_entity_terms_patch(_make_test_datajob_urn(), DataJobPatchBuilder) # Custom Properties diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py index 239aab64675d8..6704d19760fb9 100644 --- a/smoke-test/tests/patch/test_dataset_patches.py +++ b/smoke-test/tests/patch/test_dataset_patches.py @@ -20,7 +20,10 @@ UpstreamClass, UpstreamLineageClass) from datahub.specific.dataset import DatasetPatchBuilder -from tests.patch.common_patch_tests import helper_test_entity_terms_patch, helper_test_dataset_tags_patch, helper_test_ownership_patch, helper_test_custom_properties_patch + +from tests.patch.common_patch_tests import ( + helper_test_custom_properties_patch, helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, helper_test_ownership_patch) # Common Aspect Patch Tests @@ -31,6 +34,7 @@ def test_dataset_ownership_patch(wait_for_healthchecks): ) helper_test_ownership_patch(dataset_urn, DatasetPatchBuilder) + # Tags def test_dataset_tags_patch(wait_for_healthchecks): dataset_urn = make_dataset_urn( @@ -38,6 +42,7 @@ def test_dataset_tags_patch(wait_for_healthchecks): ) helper_test_dataset_tags_patch(dataset_urn, DatasetPatchBuilder) + # Terms def test_dataset_terms_patch(wait_for_healthchecks): dataset_urn = make_dataset_urn( @@ -284,8 +289,15 @@ def test_custom_properties_patch(wait_for_healthchecks): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) - orig_dataset_properties = DatasetPropertiesClass(name="test_name", description="test_description") - helper_test_custom_properties_patch(test_entity_urn=dataset_urn, patch_builder_class=DatasetPatchBuilder, custom_properties_aspect_class=DatasetPropertiesClass, base_aspect=orig_dataset_properties) + orig_dataset_properties = DatasetPropertiesClass( + name="test_name", description="test_description" + ) + helper_test_custom_properties_patch( + test_entity_urn=dataset_urn, + patch_builder_class=DatasetPatchBuilder, + custom_properties_aspect_class=DatasetPropertiesClass, + base_aspect=orig_dataset_properties, + ) with DataHubGraph(DataHubGraphConfig()) as graph: # Patch custom properties along with name diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index b7091541894dd..67142181d2b96 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,12 +1,8 @@ import pytest import tenacity -from tests.utils import ( - get_frontend_url, - wait_for_healthcheck_util, - get_frontend_session, - get_sleep_info, - get_root_urn, -) + +from tests.utils import (get_frontend_session, get_frontend_url, get_root_urn, + get_sleep_info, wait_for_healthcheck_util) TEST_POLICY_NAME = "Updated Platform Policy" diff --git a/smoke-test/tests/setup/lineage/helper_classes.py b/smoke-test/tests/setup/lineage/helper_classes.py index 53f77b08d15ed..d550f3093be85 100644 --- a/smoke-test/tests/setup/lineage/helper_classes.py +++ b/smoke-test/tests/setup/lineage/helper_classes.py @@ -1,10 +1,7 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -from datahub.metadata.schema_classes import ( - EdgeClass, - SchemaFieldDataTypeClass, -) +from datahub.metadata.schema_classes import EdgeClass, SchemaFieldDataTypeClass @dataclass diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py index 8e3e9c5352922..588a1625419bc 100644 --- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py +++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py @@ -1,36 +1,20 @@ from typing import List -from datahub.emitter.mce_builder import ( - make_dataset_urn, - make_data_flow_urn, - make_data_job_urn_with_flow, -) +from datahub.emitter.mce_builder import (make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import ( - DateTypeClass, - NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass, -) +from datahub.metadata.schema_classes import (DateTypeClass, NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass) -from tests.setup.lineage.constants import ( - AIRFLOW_DATA_PLATFORM, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO, -) -from tests.setup.lineage.helper_classes import ( - Field, - Dataset, - Task, - Pipeline, -) -from tests.setup.lineage.utils import ( - create_edge, - create_node, - create_nodes_and_edges, - emit_mcps, -) +from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task +from tests.setup.lineage.utils import (create_edge, create_node, + create_nodes_and_edges, emit_mcps) # Constants for Case 2 DAILY_TEMPERATURE_DATASET_ID = "climate.daily_temperature" diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py index 35a8e6d5cf02e..bb9f51b6b5e9b 100644 --- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py +++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py @@ -1,32 +1,18 @@ from typing import List -from datahub.emitter.mce_builder import ( - make_dataset_urn, -) +from datahub.emitter.mce_builder import make_dataset_urn from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import ( - NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass, - UpstreamClass, -) +from datahub.metadata.schema_classes import (NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, UpstreamClass) -from tests.setup.lineage.constants import ( - DATASET_ENTITY_TYPE, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO, -) -from tests.setup.lineage.helper_classes import ( - Field, - Dataset, -) -from tests.setup.lineage.utils import ( - create_node, - create_upstream_edge, - create_upstream_mcp, - emit_mcps, -) +from tests.setup.lineage.constants import (DATASET_ENTITY_TYPE, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.helper_classes import Dataset, Field +from tests.setup.lineage.utils import (create_node, create_upstream_edge, + create_upstream_mcp, emit_mcps) # Constants for Case 3 GDP_DATASET_ID = "economic_data.gdp" diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py index f4fb795147478..6079d7a3d2b63 100644 --- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py +++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py @@ -1,36 +1,20 @@ from typing import List -from datahub.emitter.mce_builder import ( - make_dataset_urn, - make_data_flow_urn, - make_data_job_urn_with_flow, -) +from datahub.emitter.mce_builder import (make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import ( - NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass, -) - -from tests.setup.lineage.constants import ( - AIRFLOW_DATA_PLATFORM, - BQ_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO, -) -from tests.setup.lineage.helper_classes import ( - Field, - Dataset, - Task, - Pipeline, -) -from tests.setup.lineage.utils import ( - create_edge, - create_node, - create_nodes_and_edges, - emit_mcps, -) +from datahub.metadata.schema_classes import (NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass) +from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, + BQ_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task +from tests.setup.lineage.utils import (create_edge, create_node, + create_nodes_and_edges, emit_mcps) # Constants for Case 1 TRANSACTIONS_DATASET_ID = "transactions.transactions" diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py index cae8e0124d501..3aec979707290 100644 --- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py +++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py @@ -1,12 +1,14 @@ +import os from typing import List from datahub.emitter.rest_emitter import DatahubRestEmitter -from tests.setup.lineage.ingest_input_datasets_change import ingest_input_datasets_change, get_input_datasets_change_urns -from tests.setup.lineage.ingest_data_job_change import ingest_data_job_change, get_data_job_change_urns -from tests.setup.lineage.ingest_dataset_join_change import ingest_dataset_join_change, get_dataset_join_change_urns - -import os +from tests.setup.lineage.ingest_data_job_change import ( + get_data_job_change_urns, ingest_data_job_change) +from tests.setup.lineage.ingest_dataset_join_change import ( + get_dataset_join_change_urns, ingest_dataset_join_change) +from tests.setup.lineage.ingest_input_datasets_change import ( + get_input_datasets_change_urns, ingest_input_datasets_change) SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080" TOKEN = os.getenv("DATAHUB_TOKEN") or "" @@ -20,4 +22,8 @@ def ingest_time_lineage() -> None: def get_time_lineage_urns() -> List[str]: - return get_input_datasets_change_urns() + get_data_job_change_urns() + get_dataset_join_change_urns() + return ( + get_input_datasets_change_urns() + + get_data_job_change_urns() + + get_dataset_join_change_urns() + ) diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py index 672f7a945a6af..c72f6ccb89b7a 100644 --- a/smoke-test/tests/setup/lineage/utils.py +++ b/smoke-test/tests/setup/lineage/utils.py @@ -1,41 +1,30 @@ import datetime -from datahub.emitter.mce_builder import ( - make_data_platform_urn, - make_dataset_urn, - make_data_job_urn_with_flow, - make_data_flow_urn, -) +from typing import List + +from datahub.emitter.mce_builder import (make_data_flow_urn, + make_data_job_urn_with_flow, + make_data_platform_urn, + make_dataset_urn) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage -from datahub.metadata.schema_classes import ( - AuditStampClass, - ChangeTypeClass, - DatasetLineageTypeClass, - DatasetPropertiesClass, - DataFlowInfoClass, - DataJobInputOutputClass, - DataJobInfoClass, - EdgeClass, - MySqlDDLClass, - SchemaFieldClass, - SchemaMetadataClass, - UpstreamClass, -) -from typing import List - -from tests.setup.lineage.constants import ( - DATASET_ENTITY_TYPE, - DATA_JOB_ENTITY_TYPE, - DATA_FLOW_ENTITY_TYPE, - DATA_FLOW_INFO_ASPECT_NAME, - DATA_JOB_INFO_ASPECT_NAME, - DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, -) -from tests.setup.lineage.helper_classes import ( - Dataset, - Pipeline, -) +from datahub.metadata.schema_classes import (AuditStampClass, ChangeTypeClass, + DataFlowInfoClass, + DataJobInfoClass, + DataJobInputOutputClass, + DatasetLineageTypeClass, + DatasetPropertiesClass, EdgeClass, + MySqlDDLClass, SchemaFieldClass, + SchemaMetadataClass, + UpstreamClass) + +from tests.setup.lineage.constants import (DATA_FLOW_ENTITY_TYPE, + DATA_FLOW_INFO_ASPECT_NAME, + DATA_JOB_ENTITY_TYPE, + DATA_JOB_INFO_ASPECT_NAME, + DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, + DATASET_ENTITY_TYPE) +from tests.setup.lineage.helper_classes import Dataset, Pipeline def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]: @@ -85,10 +74,10 @@ def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]: def create_edge( - source_urn: str, - destination_urn: str, - created_timestamp_millis: int, - updated_timestamp_millis: int, + source_urn: str, + destination_urn: str, + created_timestamp_millis: int, + updated_timestamp_millis: int, ) -> EdgeClass: created_audit_stamp: AuditStampClass = AuditStampClass( time=created_timestamp_millis, actor="urn:li:corpuser:unknown" @@ -105,7 +94,7 @@ def create_edge( def create_nodes_and_edges( - airflow_dag: Pipeline, + airflow_dag: Pipeline, ) -> List[MetadataChangeProposalWrapper]: mcps = [] data_flow_urn = make_data_flow_urn( @@ -160,9 +149,9 @@ def create_nodes_and_edges( def create_upstream_edge( - upstream_entity_urn: str, - created_timestamp_millis: int, - updated_timestamp_millis: int, + upstream_entity_urn: str, + created_timestamp_millis: int, + updated_timestamp_millis: int, ): created_audit_stamp: AuditStampClass = AuditStampClass( time=created_timestamp_millis, actor="urn:li:corpuser:unknown" @@ -180,11 +169,11 @@ def create_upstream_edge( def create_upstream_mcp( - entity_type: str, - entity_urn: str, - upstreams: List[UpstreamClass], - timestamp_millis: int, - run_id: str = "", + entity_type: str, + entity_urn: str, + upstreams: List[UpstreamClass], + timestamp_millis: int, + run_id: str = "", ) -> MetadataChangeProposalWrapper: print(f"Creating upstreamLineage aspect for {entity_urn}") timestamp_millis: int = int(datetime.datetime.now().timestamp() * 1000) @@ -203,7 +192,7 @@ def create_upstream_mcp( def emit_mcps( - emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper] + emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper] ) -> None: for mcp in mcps: emitter.emit_mcp(mcp) diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py index b0ca29b544cfe..6ac75765286f0 100644 --- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py @@ -1,5 +1,7 @@ import pytest -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util + +from tests.utils import (delete_urns_from_file, get_frontend_url, + ingest_file_via_rest, wait_for_healthcheck_util) @pytest.fixture(scope="module", autouse=True) diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py index 3672abcda948d..3127061c9f506 100644 --- a/smoke-test/tests/telemetry/telemetry_test.py +++ b/smoke-test/tests/telemetry/telemetry_test.py @@ -7,5 +7,7 @@ def test_no_clientID(): client_id_urn = "urn:li:telemetry:clientId" aspect = ["telemetryClientId"] - res_data = json.dumps(get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False)) + res_data = json.dumps( + get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False) + ) assert res_data == "{}" diff --git a/smoke-test/tests/test_result_msg.py b/smoke-test/tests/test_result_msg.py index e3b336db9d66c..b9775e8ee4acd 100644 --- a/smoke-test/tests/test_result_msg.py +++ b/smoke-test/tests/test_result_msg.py @@ -1,6 +1,6 @@ -from slack_sdk import WebClient import os +from slack_sdk import WebClient datahub_stats = {} @@ -10,10 +10,10 @@ def add_datahub_stats(stat_name, stat_val): def send_to_slack(passed: str): - slack_api_token = os.getenv('SLACK_API_TOKEN') - slack_channel = os.getenv('SLACK_CHANNEL') - slack_thread_ts = os.getenv('SLACK_THREAD_TS') - test_identifier = os.getenv('TEST_IDENTIFIER', 'LOCAL_TEST') + slack_api_token = os.getenv("SLACK_API_TOKEN") + slack_channel = os.getenv("SLACK_CHANNEL") + slack_thread_ts = os.getenv("SLACK_THREAD_TS") + test_identifier = os.getenv("TEST_IDENTIFIER", "LOCAL_TEST") if slack_api_token is None or slack_channel is None: return client = WebClient(token=slack_api_token) @@ -26,14 +26,21 @@ def send_to_slack(passed: str): message += f"Num {entity_type} is {val}\n" if slack_thread_ts is None: - client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}') + client.chat_postMessage( + channel=slack_channel, + text=f"{test_identifier} Status - {passed}\n{message}", + ) else: - client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}', thread_ts=slack_thread_ts) + client.chat_postMessage( + channel=slack_channel, + text=f"{test_identifier} Status - {passed}\n{message}", + thread_ts=slack_thread_ts, + ) def send_message(exitstatus): try: - send_to_slack('PASSED' if exitstatus == 0 else 'FAILED') + send_to_slack("PASSED" if exitstatus == 0 else "FAILED") except Exception as e: # We don't want to fail pytest at all print(f"Exception happened for sending msg to slack {e}") diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index a10cf13a08029..c6adb402e5d51 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -4,17 +4,15 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource from datahub.ingestion.source.state.checkpoint import Checkpoint -from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState -from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalHandler +from datahub.ingestion.source.state.entity_removal_state import \ + GenericCheckpointState +from datahub.ingestion.source.state.stale_entity_removal_handler import \ + StaleEntityRemovalHandler from sqlalchemy import create_engine from sqlalchemy.sql import text -from tests.utils import ( - get_gms_url, - get_mysql_password, - get_mysql_url, - get_mysql_username, -) +from tests.utils import (get_gms_url, get_mysql_password, get_mysql_url, + get_mysql_username) def test_stateful_ingestion(wait_for_healthchecks): diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py index 0b87f90a92c58..213a2ea087b7a 100644 --- a/smoke-test/tests/tests/tests_test.py +++ b/smoke-test/tests/tests/tests_test.py @@ -1,9 +1,13 @@ import pytest import tenacity -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info + +from tests.utils import (delete_urns_from_file, get_frontend_url, + get_sleep_info, ingest_file_via_rest, + wait_for_healthcheck_util) sleep_sec, sleep_times = get_sleep_info() + @pytest.fixture(scope="module", autouse=True) def ingest_cleanup_data(request): print("ingesting test data") @@ -18,6 +22,7 @@ def wait_for_healthchecks(): wait_for_healthcheck_util() yield + @pytest.mark.dependency() def test_healthchecks(wait_for_healthchecks): # Call to wait_for_healthchecks fixture will do the actual functionality. diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py index a73d585c6c72d..4705343c1a2ba 100644 --- a/smoke-test/tests/timeline/timeline_test.py +++ b/smoke-test/tests/timeline/timeline_test.py @@ -3,14 +3,14 @@ from datahub.cli import timeline_cli from datahub.cli.cli_utils import guess_entity_type, post_entity -from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync, get_datahub_graph + +from tests.utils import (get_datahub_graph, ingest_file_via_rest, + wait_for_writes_to_sync) def test_all(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" @@ -18,8 +18,13 @@ def test_all(): ingest_file_via_rest("tests/timeline/timeline_test_datav2.json") ingest_file_via_rest("tests/timeline/timeline_test_datav3.json") - res_data = timeline_cli.get_timeline(dataset_urn, ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM", - "OWNER"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, + ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM", "OWNER"], + None, + None, + False, + ) get_datahub_graph().hard_delete_entity(urn=dataset_urn) assert res_data @@ -35,9 +40,7 @@ def test_all(): def test_schema(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" @@ -45,7 +48,9 @@ def test_schema(): put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav2.json") put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav3.json") - res_data = timeline_cli.get_timeline(dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False + ) get_datahub_graph().hard_delete_entity(urn=dataset_urn) assert res_data @@ -61,9 +66,7 @@ def test_schema(): def test_glossary(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" @@ -71,7 +74,9 @@ def test_glossary(): put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv2.json") put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv3.json") - res_data = timeline_cli.get_timeline(dataset_urn, ["GLOSSARY_TERM"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, ["GLOSSARY_TERM"], None, None, False + ) get_datahub_graph().hard_delete_entity(urn=dataset_urn) assert res_data @@ -87,17 +92,29 @@ def test_glossary(): def test_documentation(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentation.json") - put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv2.json") - put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv3.json") + put( + dataset_urn, + "institutionalMemory", + "test_resources/timeline/newdocumentation.json", + ) + put( + dataset_urn, + "institutionalMemory", + "test_resources/timeline/newdocumentationv2.json", + ) + put( + dataset_urn, + "institutionalMemory", + "test_resources/timeline/newdocumentationv3.json", + ) - res_data = timeline_cli.get_timeline(dataset_urn, ["DOCUMENTATION"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, ["DOCUMENTATION"], None, None, False + ) get_datahub_graph().hard_delete_entity(urn=dataset_urn) assert res_data @@ -113,9 +130,7 @@ def test_documentation(): def test_tags(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" @@ -139,9 +154,7 @@ def test_tags(): def test_ownership(): platform = "urn:li:dataPlatform:kafka" - dataset_name = ( - "test-timeline-sample-kafka" - ) + dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index b10ad3aa3fc2a..55f3de594af4e 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -1,15 +1,11 @@ import os -import pytest -import requests from time import sleep -from tests.utils import ( - get_frontend_url, - wait_for_healthcheck_util, - get_admin_credentials, - wait_for_writes_to_sync, -) +import pytest +import requests +from tests.utils import (get_admin_credentials, get_frontend_url, + wait_for_healthcheck_util, wait_for_writes_to_sync) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index af03efd4f71f8..bd75b13d1910f 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -1,19 +1,20 @@ import functools import json +import logging import os -from datetime import datetime, timedelta, timezone import subprocess import time -from typing import Any, Dict, List, Tuple +from datetime import datetime, timedelta, timezone from time import sleep -from joblib import Parallel, delayed +from typing import Any, Dict, List, Tuple -import requests_wrapper as requests -import logging from datahub.cli import cli_utils from datahub.cli.cli_utils import get_system_auth -from datahub.ingestion.graph.client import DataHubGraph, DatahubClientConfig +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.run.pipeline import Pipeline +from joblib import Parallel, delayed + +import requests_wrapper as requests from tests.consistency_utils import wait_for_writes_to_sync TIME: int = 1581407189000 @@ -174,6 +175,7 @@ def delete(entry): wait_for_writes_to_sync() + # Fixed now value NOW: datetime = datetime.now() @@ -232,6 +234,3 @@ def create_datahub_step_state_aspects( ] with open(onboarding_filename, "w") as f: json.dump(aspects_dict, f, indent=2) - - - diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py index 4da69750a167b..685c3bd80b04d 100644 --- a/smoke-test/tests/views/views_test.py +++ b/smoke-test/tests/views/views_test.py @@ -1,16 +1,14 @@ -import pytest import time + +import pytest import tenacity -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_gms_url, - ingest_file_via_rest, - get_sleep_info, -) + +from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, + get_sleep_info, ingest_file_via_rest) sleep_sec, sleep_times = get_sleep_info() + @pytest.mark.dependency() def test_healthchecks(wait_for_healthchecks): # Call to wait_for_healthchecks fixture will do the actual functionality. @@ -40,6 +38,7 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou assert after_count == before_count + 1 return after_count + @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) @@ -111,18 +110,18 @@ def test_create_list_delete_global_view(frontend_session): new_view_name = "Test View" new_view_description = "Test Description" new_view_definition = { - "entityTypes": ["DATASET", "DASHBOARD"], - "filter": { - "operator": "AND", - "filters": [ - { - "field": "tags", - "values": ["urn:li:tag:test"], - "negated": False, - "condition": "EQUAL" - } - ] - } + "entityTypes": ["DATASET", "DASHBOARD"], + "filter": { + "operator": "AND", + "filters": [ + { + "field": "tags", + "values": ["urn:li:tag:test"], + "negated": False, + "condition": "EQUAL", + } + ], + }, } # Create new View @@ -137,7 +136,7 @@ def test_create_list_delete_global_view(frontend_session): "viewType": "GLOBAL", "name": new_view_name, "description": new_view_description, - "definition": new_view_definition + "definition": new_view_definition, } }, } @@ -169,9 +168,7 @@ def test_create_list_delete_global_view(frontend_session): "query": """mutation deleteView($urn: String!) {\n deleteView(urn: $urn) }""", - "variables": { - "urn": view_urn - }, + "variables": {"urn": view_urn}, } response = frontend_session.post( @@ -189,7 +186,9 @@ def test_create_list_delete_global_view(frontend_session): ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_global_view"]) +@pytest.mark.dependency( + depends=["test_healthchecks", "test_create_list_delete_global_view"] +) def test_create_list_delete_personal_view(frontend_session): # Get count of existing views @@ -237,18 +236,18 @@ def test_create_list_delete_personal_view(frontend_session): new_view_name = "Test View" new_view_description = "Test Description" new_view_definition = { - "entityTypes": ["DATASET", "DASHBOARD"], - "filter": { - "operator": "AND", - "filters": [ - { - "field": "tags", - "values": ["urn:li:tag:test"], - "negated": False, - "condition": "EQUAL" - } - ] - } + "entityTypes": ["DATASET", "DASHBOARD"], + "filter": { + "operator": "AND", + "filters": [ + { + "field": "tags", + "values": ["urn:li:tag:test"], + "negated": False, + "condition": "EQUAL", + } + ], + }, } # Create new View @@ -263,7 +262,7 @@ def test_create_list_delete_personal_view(frontend_session): "viewType": "PERSONAL", "name": new_view_name, "description": new_view_description, - "definition": new_view_definition + "definition": new_view_definition, } }, } @@ -293,9 +292,7 @@ def test_create_list_delete_personal_view(frontend_session): "query": """mutation deleteView($urn: String!) {\n deleteView(urn: $urn) }""", - "variables": { - "urn": view_urn - }, + "variables": {"urn": view_urn}, } response = frontend_session.post( @@ -312,25 +309,28 @@ def test_create_list_delete_personal_view(frontend_session): before_count=new_count, ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_personal_view"]) + +@pytest.mark.dependency( + depends=["test_healthchecks", "test_create_list_delete_personal_view"] +) def test_update_global_view(frontend_session): # First create a view new_view_name = "Test View" new_view_description = "Test Description" new_view_definition = { - "entityTypes": ["DATASET", "DASHBOARD"], - "filter": { - "operator": "AND", - "filters": [ - { - "field": "tags", - "values": ["urn:li:tag:test"], - "negated": False, - "condition": "EQUAL" - } - ] - } + "entityTypes": ["DATASET", "DASHBOARD"], + "filter": { + "operator": "AND", + "filters": [ + { + "field": "tags", + "values": ["urn:li:tag:test"], + "negated": False, + "condition": "EQUAL", + } + ], + }, } # Create new View @@ -345,7 +345,7 @@ def test_update_global_view(frontend_session): "viewType": "PERSONAL", "name": new_view_name, "description": new_view_description, - "definition": new_view_definition + "definition": new_view_definition, } }, } @@ -366,18 +366,18 @@ def test_update_global_view(frontend_session): new_view_name = "New Test View" new_view_description = "New Test Description" new_view_definition = { - "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"], - "filter": { - "operator": "OR", - "filters": [ - { - "field": "glossaryTerms", - "values": ["urn:li:glossaryTerm:test"], - "negated": True, - "condition": "CONTAIN" - } - ] - } + "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"], + "filter": { + "operator": "OR", + "filters": [ + { + "field": "glossaryTerms", + "values": ["urn:li:glossaryTerm:test"], + "negated": True, + "condition": "CONTAIN", + } + ], + }, } update_view_json = { @@ -391,8 +391,8 @@ def test_update_global_view(frontend_session): "input": { "name": new_view_name, "description": new_view_description, - "definition": new_view_definition - } + "definition": new_view_definition, + }, }, } @@ -411,9 +411,7 @@ def test_update_global_view(frontend_session): "query": """mutation deleteView($urn: String!) {\n deleteView(urn: $urn) }""", - "variables": { - "urn": view_urn - }, + "variables": {"urn": view_urn}, } response = frontend_session.post(